mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-04-29 03:00:45 -04:00
Compare commits
22 Commits
fix-agent-
...
fix-github
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c18fe4282f | ||
|
|
42e9e441d9 | ||
|
|
a2bab24e22 | ||
|
|
6c56195785 | ||
|
|
d8444ef626 | ||
|
|
64e96b7c3c | ||
|
|
7967662898 | ||
|
|
096d74acae | ||
|
|
dcef5ae1f1 | ||
|
|
cfbf29f6e8 | ||
|
|
59b369047f | ||
|
|
07468e39f7 | ||
|
|
0b0bfdff05 | ||
|
|
42b0a89366 | ||
|
|
e78d7de0c0 | ||
|
|
6751bba939 | ||
|
|
039e966dad | ||
|
|
a1f73bb4c6 | ||
|
|
bf769d1744 | ||
|
|
15e9435b35 | ||
|
|
3e15b849a3 | ||
|
|
c32934ed2f |
@@ -53,7 +53,7 @@ repos:
|
||||
# Use -p (package) to avoid dual module name conflict when using MYPYPATH
|
||||
# MYPYPATH=enterprise allows resolving bare imports like "from integrations.xxx"
|
||||
# Note: tests package excluded to avoid conflict with core openhands tests
|
||||
entry: bash -c 'MYPYPATH=enterprise mypy --config-file enterprise/dev_config/python/mypy.ini -p integrations -p server -p storage -p sync -p experiments'
|
||||
entry: bash -c 'MYPYPATH=enterprise mypy --config-file enterprise/dev_config/python/mypy.ini -p integrations -p server -p storage -p sync'
|
||||
always_run: true
|
||||
pass_filenames: false
|
||||
files: ^enterprise/
|
||||
|
||||
@@ -1,47 +0,0 @@
|
||||
import os
|
||||
|
||||
import posthog
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
|
||||
# Initialize PostHog
|
||||
posthog.api_key = os.environ.get('POSTHOG_CLIENT_KEY', 'phc_placeholder')
|
||||
posthog.host = os.environ.get('POSTHOG_HOST', 'https://us.i.posthog.com')
|
||||
|
||||
# Log PostHog configuration with masked API key for security
|
||||
api_key = posthog.api_key
|
||||
if api_key and len(api_key) > 8:
|
||||
masked_key = f'{api_key[:4]}...{api_key[-4:]}'
|
||||
else:
|
||||
masked_key = 'not_set_or_too_short'
|
||||
logger.info('posthog_configuration', extra={'posthog_api_key_masked': masked_key})
|
||||
|
||||
# Global toggle for the experiment manager
|
||||
ENABLE_EXPERIMENT_MANAGER = (
|
||||
os.environ.get('ENABLE_EXPERIMENT_MANAGER', 'false').lower() == 'true'
|
||||
)
|
||||
|
||||
# Get the current experiment type from environment variable
|
||||
# If None, no experiment is running
|
||||
EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT = os.environ.get(
|
||||
'EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT', ''
|
||||
)
|
||||
# System prompt experiment toggle
|
||||
EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT = os.environ.get(
|
||||
'EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT', ''
|
||||
)
|
||||
|
||||
EXPERIMENT_CLAUDE4_VS_GPT5 = os.environ.get('EXPERIMENT_CLAUDE4_VS_GPT5', '')
|
||||
|
||||
EXPERIMENT_CONDENSER_MAX_STEP = os.environ.get('EXPERIMENT_CONDENSER_MAX_STEP', '')
|
||||
|
||||
logger.info(
|
||||
'experiment_manager:run_conversation_variant_test:experiment_config',
|
||||
extra={
|
||||
'enable_experiment_manager': ENABLE_EXPERIMENT_MANAGER,
|
||||
'experiment_litellm_default_model_experiment': EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT,
|
||||
'experiment_system_prompt_experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
|
||||
'experiment_claude4_vs_gpt5_experiment': EXPERIMENT_CLAUDE4_VS_GPT5,
|
||||
'experiment_condenser_max_step': EXPERIMENT_CONDENSER_MAX_STEP,
|
||||
},
|
||||
)
|
||||
@@ -1,99 +0,0 @@
|
||||
from uuid import UUID
|
||||
|
||||
from experiments.constants import (
|
||||
ENABLE_EXPERIMENT_MANAGER,
|
||||
EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
|
||||
)
|
||||
from experiments.experiment_versions import (
|
||||
handle_system_prompt_experiment,
|
||||
)
|
||||
|
||||
from openhands.core.config.openhands_config import OpenHandsConfig
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.experiments.experiment_manager import ExperimentManager
|
||||
from openhands.sdk import Agent
|
||||
from openhands.server.session.conversation_init_data import ConversationInitData
|
||||
|
||||
|
||||
class SaaSExperimentManager(ExperimentManager):
|
||||
@staticmethod
|
||||
def run_agent_variant_tests__v1(
|
||||
user_id: str | None, conversation_id: UUID, agent: Agent
|
||||
) -> Agent:
|
||||
if not ENABLE_EXPERIMENT_MANAGER:
|
||||
logger.info(
|
||||
'experiment_manager:run_conversation_variant_test:skipped',
|
||||
extra={'reason': 'experiment_manager_disabled'},
|
||||
)
|
||||
return agent
|
||||
|
||||
if EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT:
|
||||
# Skip experiment for planning agents which require their specialized prompt
|
||||
if agent.system_prompt_filename != 'system_prompt_planning.j2':
|
||||
agent = agent.model_copy(
|
||||
update={'system_prompt_filename': 'system_prompt_long_horizon.j2'}
|
||||
)
|
||||
|
||||
return agent
|
||||
|
||||
@staticmethod
|
||||
def run_conversation_variant_test(
|
||||
user_id, conversation_id, conversation_settings
|
||||
) -> ConversationInitData:
|
||||
"""
|
||||
Run conversation variant test and potentially modify the conversation settings
|
||||
based on the PostHog feature flags.
|
||||
|
||||
Args:
|
||||
user_id: The user ID
|
||||
conversation_id: The conversation ID
|
||||
conversation_settings: The conversation settings that may include convo_id and llm_model
|
||||
|
||||
Returns:
|
||||
The modified conversation settings
|
||||
"""
|
||||
logger.debug(
|
||||
'experiment_manager:run_conversation_variant_test:started',
|
||||
extra={'user_id': user_id, 'conversation_id': conversation_id},
|
||||
)
|
||||
|
||||
return conversation_settings
|
||||
|
||||
@staticmethod
|
||||
def run_config_variant_test(
|
||||
user_id: str | None, conversation_id: str, config: OpenHandsConfig
|
||||
) -> OpenHandsConfig:
|
||||
"""
|
||||
Run agent config variant test and potentially modify the OpenHands config
|
||||
based on the current experiment type and PostHog feature flags.
|
||||
|
||||
Args:
|
||||
user_id: The user ID
|
||||
conversation_id: The conversation ID
|
||||
config: The OpenHands configuration
|
||||
|
||||
Returns:
|
||||
The modified OpenHands configuration
|
||||
"""
|
||||
logger.info(
|
||||
'experiment_manager:run_config_variant_test:started',
|
||||
extra={'user_id': user_id},
|
||||
)
|
||||
|
||||
# Skip all experiment processing if the experiment manager is disabled
|
||||
if not ENABLE_EXPERIMENT_MANAGER:
|
||||
logger.info(
|
||||
'experiment_manager:run_config_variant_test:skipped',
|
||||
extra={'reason': 'experiment_manager_disabled'},
|
||||
)
|
||||
return config
|
||||
|
||||
# Pass the entire OpenHands config to the system prompt experiment
|
||||
# Let the experiment handler directly modify the config as needed
|
||||
modified_config = handle_system_prompt_experiment(
|
||||
user_id, conversation_id, config
|
||||
)
|
||||
|
||||
# Condenser max step experiment is applied via conversation variant test,
|
||||
# not config variant test. Return modified config from system prompt only.
|
||||
return modified_config
|
||||
@@ -1,107 +0,0 @@
|
||||
"""
|
||||
LiteLLM model experiment handler.
|
||||
|
||||
This module contains the handler for the LiteLLM model experiment.
|
||||
"""
|
||||
|
||||
import posthog
|
||||
from experiments.constants import EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT
|
||||
from server.constants import (
|
||||
IS_FEATURE_ENV,
|
||||
build_litellm_proxy_model_path,
|
||||
get_default_litellm_model,
|
||||
)
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
|
||||
|
||||
def handle_litellm_default_model_experiment(
|
||||
user_id, conversation_id, conversation_settings
|
||||
):
|
||||
"""
|
||||
Handle the LiteLLM model experiment.
|
||||
|
||||
Args:
|
||||
user_id: The user ID
|
||||
conversation_id: The conversation ID
|
||||
conversation_settings: The conversation settings
|
||||
|
||||
Returns:
|
||||
Modified conversation settings
|
||||
"""
|
||||
# No-op if the specific experiment is not enabled
|
||||
if not EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT:
|
||||
logger.info(
|
||||
'experiment_manager:ab_testing:skipped',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'reason': 'experiment_not_enabled',
|
||||
'experiment': EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT,
|
||||
},
|
||||
)
|
||||
return conversation_settings
|
||||
|
||||
# Use experiment name as the flag key
|
||||
try:
|
||||
enabled_variant = posthog.get_feature_flag(
|
||||
EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT, conversation_id
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:get_feature_flag:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
return conversation_settings
|
||||
|
||||
# Log the experiment event
|
||||
# If this is a feature environment, add "FEATURE_" prefix to user_id for PostHog
|
||||
posthog_user_id = f'FEATURE_{user_id}' if IS_FEATURE_ENV else user_id
|
||||
|
||||
try:
|
||||
posthog.capture(
|
||||
distinct_id=posthog_user_id,
|
||||
event='model_set',
|
||||
properties={
|
||||
'conversation_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
'original_user_id': user_id,
|
||||
'is_feature_env': IS_FEATURE_ENV,
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:posthog_capture:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_LITELLM_DEFAULT_MODEL_EXPERIMENT,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
# Continue execution as this is not critical
|
||||
|
||||
logger.info(
|
||||
'posthog_capture',
|
||||
extra={
|
||||
'event': 'model_set',
|
||||
'posthog_user_id': posthog_user_id,
|
||||
'is_feature_env': IS_FEATURE_ENV,
|
||||
'conversation_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
},
|
||||
)
|
||||
|
||||
# Set the model based on the feature flag variant
|
||||
if enabled_variant == 'claude37':
|
||||
# Use the shared utility to construct the LiteLLM proxy model path
|
||||
model = build_litellm_proxy_model_path('claude-3-7-sonnet-20250219')
|
||||
# Update the conversation settings with the selected model
|
||||
conversation_settings.llm_model = model
|
||||
else:
|
||||
# Update the conversation settings with the default model for the current version
|
||||
conversation_settings.llm_model = get_default_litellm_model()
|
||||
|
||||
return conversation_settings
|
||||
@@ -1,181 +0,0 @@
|
||||
"""
|
||||
System prompt experiment handler.
|
||||
|
||||
This module contains the handler for the system prompt experiment that uses
|
||||
the PostHog variant as the system prompt filename.
|
||||
"""
|
||||
|
||||
import copy
|
||||
|
||||
import posthog
|
||||
from experiments.constants import EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT
|
||||
from server.constants import IS_FEATURE_ENV
|
||||
from storage.experiment_assignment_store import ExperimentAssignmentStore
|
||||
|
||||
from openhands.core.config.openhands_config import OpenHandsConfig
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
|
||||
|
||||
def _get_system_prompt_variant(user_id, conversation_id):
|
||||
"""
|
||||
Get the system prompt variant for the experiment.
|
||||
|
||||
Args:
|
||||
user_id: The user ID
|
||||
conversation_id: The conversation ID
|
||||
|
||||
Returns:
|
||||
str or None: The PostHog variant name or None if experiment is not enabled or error occurs
|
||||
"""
|
||||
# No-op if the specific experiment is not enabled
|
||||
if not EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT:
|
||||
logger.info(
|
||||
'experiment_manager_002:ab_testing:skipped',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'reason': 'experiment_not_enabled',
|
||||
'experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
|
||||
},
|
||||
)
|
||||
return None
|
||||
|
||||
# Use experiment name as the flag key
|
||||
try:
|
||||
enabled_variant = posthog.get_feature_flag(
|
||||
EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT, conversation_id
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:get_feature_flag:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
return None
|
||||
|
||||
# Store the experiment assignment in the database
|
||||
try:
|
||||
experiment_store = ExperimentAssignmentStore()
|
||||
experiment_store.update_experiment_variant(
|
||||
conversation_id=conversation_id,
|
||||
experiment_name='system_prompt_experiment',
|
||||
variant=enabled_variant,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:store_assignment:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
|
||||
'variant': enabled_variant,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
# Fail the experiment if we cannot track the splits - results would not be explainable
|
||||
return None
|
||||
|
||||
# Log the experiment event
|
||||
# If this is a feature environment, add "FEATURE_" prefix to user_id for PostHog
|
||||
posthog_user_id = f'FEATURE_{user_id}' if IS_FEATURE_ENV else user_id
|
||||
|
||||
try:
|
||||
posthog.capture(
|
||||
distinct_id=posthog_user_id,
|
||||
event='system_prompt_set',
|
||||
properties={
|
||||
'conversation_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
'original_user_id': user_id,
|
||||
'is_feature_env': IS_FEATURE_ENV,
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:posthog_capture:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
# Continue execution as this is not critical
|
||||
|
||||
logger.info(
|
||||
'posthog_capture',
|
||||
extra={
|
||||
'event': 'system_prompt_set',
|
||||
'posthog_user_id': posthog_user_id,
|
||||
'is_feature_env': IS_FEATURE_ENV,
|
||||
'conversation_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
},
|
||||
)
|
||||
|
||||
return enabled_variant
|
||||
|
||||
|
||||
def handle_system_prompt_experiment(
|
||||
user_id, conversation_id, config: OpenHandsConfig
|
||||
) -> OpenHandsConfig:
|
||||
"""
|
||||
Handle the system prompt experiment for OpenHands config.
|
||||
|
||||
Args:
|
||||
user_id: The user ID
|
||||
conversation_id: The conversation ID
|
||||
config: The OpenHands configuration
|
||||
|
||||
Returns:
|
||||
Modified OpenHands configuration
|
||||
"""
|
||||
enabled_variant = _get_system_prompt_variant(user_id, conversation_id)
|
||||
|
||||
# If variant is None, experiment is not enabled or there was an error
|
||||
if enabled_variant is None:
|
||||
return config
|
||||
|
||||
# Deep copy the config to avoid modifying the original
|
||||
modified_config = copy.deepcopy(config)
|
||||
|
||||
# Set the system prompt filename based on the variant
|
||||
if enabled_variant == 'control':
|
||||
# Use the long-horizon system prompt for the control variant
|
||||
agent_config = modified_config.get_agent_config(modified_config.default_agent)
|
||||
agent_config.system_prompt_filename = 'system_prompt_long_horizon.j2'
|
||||
agent_config.enable_plan_mode = True
|
||||
elif enabled_variant == 'interactive':
|
||||
modified_config.get_agent_config(
|
||||
modified_config.default_agent
|
||||
).system_prompt_filename = 'system_prompt_interactive.j2'
|
||||
elif enabled_variant == 'no_tools':
|
||||
modified_config.get_agent_config(
|
||||
modified_config.default_agent
|
||||
).system_prompt_filename = 'system_prompt.j2'
|
||||
else:
|
||||
logger.error(
|
||||
'system_prompt_experiment:unknown_variant',
|
||||
extra={
|
||||
'user_id': user_id,
|
||||
'convo_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
'reason': 'no explicit mapping; returning original config',
|
||||
},
|
||||
)
|
||||
return config
|
||||
|
||||
# Log which prompt is being used
|
||||
logger.info(
|
||||
'system_prompt_experiment:prompt_selected',
|
||||
extra={
|
||||
'user_id': user_id,
|
||||
'convo_id': conversation_id,
|
||||
'system_prompt_filename': modified_config.get_agent_config(
|
||||
modified_config.default_agent
|
||||
).system_prompt_filename,
|
||||
'variant': enabled_variant,
|
||||
},
|
||||
)
|
||||
|
||||
return modified_config
|
||||
@@ -1,137 +0,0 @@
|
||||
"""
|
||||
LiteLLM model experiment handler.
|
||||
|
||||
This module contains the handler for the LiteLLM model experiment.
|
||||
"""
|
||||
|
||||
import posthog
|
||||
from experiments.constants import EXPERIMENT_CLAUDE4_VS_GPT5
|
||||
from server.constants import (
|
||||
IS_FEATURE_ENV,
|
||||
build_litellm_proxy_model_path,
|
||||
get_default_litellm_model,
|
||||
)
|
||||
from storage.experiment_assignment_store import ExperimentAssignmentStore
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.server.session.conversation_init_data import ConversationInitData
|
||||
|
||||
|
||||
def _get_model_variant(user_id: str | None, conversation_id: str) -> str | None:
|
||||
if not EXPERIMENT_CLAUDE4_VS_GPT5:
|
||||
logger.info(
|
||||
'experiment_manager:ab_testing:skipped',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'reason': 'experiment_not_enabled',
|
||||
'experiment': EXPERIMENT_CLAUDE4_VS_GPT5,
|
||||
},
|
||||
)
|
||||
return None
|
||||
|
||||
try:
|
||||
enabled_variant = posthog.get_feature_flag(
|
||||
EXPERIMENT_CLAUDE4_VS_GPT5, conversation_id
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:get_feature_flag:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_CLAUDE4_VS_GPT5,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
return None
|
||||
|
||||
# Store the experiment assignment in the database
|
||||
try:
|
||||
experiment_store = ExperimentAssignmentStore()
|
||||
experiment_store.update_experiment_variant(
|
||||
conversation_id=conversation_id,
|
||||
experiment_name='claude4_vs_gpt5_experiment',
|
||||
variant=enabled_variant,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:store_assignment:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_CLAUDE4_VS_GPT5,
|
||||
'variant': enabled_variant,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
# Fail the experiment if we cannot track the splits - results would not be explainable
|
||||
return None
|
||||
|
||||
# Log the experiment event
|
||||
# If this is a feature environment, add "FEATURE_" prefix to user_id for PostHog
|
||||
posthog_user_id = f'FEATURE_{user_id}' if IS_FEATURE_ENV else user_id
|
||||
|
||||
try:
|
||||
posthog.capture(
|
||||
distinct_id=posthog_user_id,
|
||||
event='claude4_or_gpt5_set',
|
||||
properties={
|
||||
'conversation_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
'original_user_id': user_id,
|
||||
'is_feature_env': IS_FEATURE_ENV,
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:posthog_capture:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_CLAUDE4_VS_GPT5,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
# Continue execution as this is not critical
|
||||
|
||||
logger.info(
|
||||
'posthog_capture',
|
||||
extra={
|
||||
'event': 'claude4_or_gpt5_set',
|
||||
'posthog_user_id': posthog_user_id,
|
||||
'is_feature_env': IS_FEATURE_ENV,
|
||||
'conversation_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
},
|
||||
)
|
||||
|
||||
return enabled_variant
|
||||
|
||||
|
||||
def handle_claude4_vs_gpt5_experiment(
|
||||
user_id: str | None,
|
||||
conversation_id: str,
|
||||
conversation_settings: ConversationInitData,
|
||||
) -> ConversationInitData:
|
||||
"""
|
||||
Handle the LiteLLM model experiment.
|
||||
|
||||
Args:
|
||||
user_id: The user ID
|
||||
conversation_id: The conversation ID
|
||||
conversation_settings: The conversation settings
|
||||
|
||||
Returns:
|
||||
Modified conversation settings
|
||||
"""
|
||||
|
||||
enabled_variant = _get_model_variant(user_id, conversation_id)
|
||||
|
||||
if not enabled_variant:
|
||||
return conversation_settings
|
||||
|
||||
# Set the model based on the feature flag variant
|
||||
if enabled_variant == 'gpt5':
|
||||
model = build_litellm_proxy_model_path('gpt-5-2025-08-07')
|
||||
conversation_settings.llm_model = model
|
||||
else:
|
||||
conversation_settings.llm_model = get_default_litellm_model()
|
||||
|
||||
return conversation_settings
|
||||
@@ -1,232 +0,0 @@
|
||||
"""
|
||||
Condenser max step experiment handler.
|
||||
|
||||
This module contains the handler for the condenser max step experiment that tests
|
||||
different max_size values for the condenser configuration.
|
||||
"""
|
||||
|
||||
from uuid import UUID
|
||||
|
||||
import posthog
|
||||
from experiments.constants import EXPERIMENT_CONDENSER_MAX_STEP
|
||||
from server.constants import IS_FEATURE_ENV
|
||||
from storage.experiment_assignment_store import ExperimentAssignmentStore
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.sdk import Agent
|
||||
from openhands.sdk.context.condenser import (
|
||||
LLMSummarizingCondenser,
|
||||
)
|
||||
from openhands.server.session.conversation_init_data import ConversationInitData
|
||||
|
||||
|
||||
def _get_condenser_max_step_variant(user_id, conversation_id):
|
||||
"""
|
||||
Get the condenser max step variant for the experiment.
|
||||
|
||||
Args:
|
||||
user_id: The user ID
|
||||
conversation_id: The conversation ID
|
||||
|
||||
Returns:
|
||||
str or None: The PostHog variant name or None if experiment is not enabled or error occurs
|
||||
"""
|
||||
# No-op if the specific experiment is not enabled
|
||||
if not EXPERIMENT_CONDENSER_MAX_STEP:
|
||||
logger.info(
|
||||
'experiment_manager_004:ab_testing:skipped',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'reason': 'experiment_not_enabled',
|
||||
'experiment': EXPERIMENT_CONDENSER_MAX_STEP,
|
||||
},
|
||||
)
|
||||
return None
|
||||
|
||||
# Use experiment name as the flag key
|
||||
try:
|
||||
enabled_variant = posthog.get_feature_flag(
|
||||
EXPERIMENT_CONDENSER_MAX_STEP, conversation_id
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:get_feature_flag:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_CONDENSER_MAX_STEP,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
return None
|
||||
|
||||
# Store the experiment assignment in the database
|
||||
try:
|
||||
experiment_store = ExperimentAssignmentStore()
|
||||
experiment_store.update_experiment_variant(
|
||||
conversation_id=conversation_id,
|
||||
experiment_name='condenser_max_step_experiment',
|
||||
variant=enabled_variant,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:store_assignment:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_CONDENSER_MAX_STEP,
|
||||
'variant': enabled_variant,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
# Fail the experiment if we cannot track the splits - results would not be explainable
|
||||
return None
|
||||
|
||||
# Log the experiment event
|
||||
# If this is a feature environment, add "FEATURE_" prefix to user_id for PostHog
|
||||
posthog_user_id = f'FEATURE_{user_id}' if IS_FEATURE_ENV else user_id
|
||||
|
||||
try:
|
||||
posthog.capture(
|
||||
distinct_id=posthog_user_id,
|
||||
event='condenser_max_step_set',
|
||||
properties={
|
||||
'conversation_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
'original_user_id': user_id,
|
||||
'is_feature_env': IS_FEATURE_ENV,
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'experiment_manager:posthog_capture:failed',
|
||||
extra={
|
||||
'convo_id': conversation_id,
|
||||
'experiment': EXPERIMENT_CONDENSER_MAX_STEP,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
# Continue execution as this is not critical
|
||||
|
||||
logger.info(
|
||||
'posthog_capture',
|
||||
extra={
|
||||
'event': 'condenser_max_step_set',
|
||||
'posthog_user_id': posthog_user_id,
|
||||
'is_feature_env': IS_FEATURE_ENV,
|
||||
'conversation_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
},
|
||||
)
|
||||
|
||||
return enabled_variant
|
||||
|
||||
|
||||
def handle_condenser_max_step_experiment(
|
||||
user_id: str | None,
|
||||
conversation_id: str,
|
||||
conversation_settings: ConversationInitData,
|
||||
) -> ConversationInitData:
|
||||
"""
|
||||
Handle the condenser max step experiment for conversation settings.
|
||||
|
||||
We should not modify persistent user settings. Instead, apply the experiment
|
||||
variant to the conversation's in-memory settings object for this session only.
|
||||
|
||||
Variants:
|
||||
- control -> condenser_max_size = 120
|
||||
- treatment -> condenser_max_size = 80
|
||||
|
||||
Returns the (potentially) modified conversation_settings.
|
||||
"""
|
||||
|
||||
enabled_variant = _get_condenser_max_step_variant(user_id, conversation_id)
|
||||
|
||||
if enabled_variant is None:
|
||||
return conversation_settings
|
||||
|
||||
if enabled_variant == 'control':
|
||||
condenser_max_size = 120
|
||||
elif enabled_variant == 'treatment':
|
||||
condenser_max_size = 80
|
||||
else:
|
||||
logger.error(
|
||||
'condenser_max_step_experiment:unknown_variant',
|
||||
extra={
|
||||
'user_id': user_id,
|
||||
'convo_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
'reason': 'unknown variant; returning original conversation settings',
|
||||
},
|
||||
)
|
||||
return conversation_settings
|
||||
|
||||
try:
|
||||
# Apply the variant to this conversation only; do not persist to DB.
|
||||
# Not all OpenHands versions expose `condenser_max_size` on settings.
|
||||
if hasattr(conversation_settings, 'condenser_max_size'):
|
||||
conversation_settings.condenser_max_size = condenser_max_size
|
||||
logger.info(
|
||||
'condenser_max_step_experiment:conversation_settings_applied',
|
||||
extra={
|
||||
'user_id': user_id,
|
||||
'convo_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
'condenser_max_size': condenser_max_size,
|
||||
},
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
'condenser_max_step_experiment:field_missing_on_settings',
|
||||
extra={
|
||||
'user_id': user_id,
|
||||
'convo_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
'reason': 'condenser_max_size not present on ConversationInitData',
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'condenser_max_step_experiment:apply_failed',
|
||||
extra={
|
||||
'user_id': user_id,
|
||||
'convo_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
'error': str(e),
|
||||
},
|
||||
)
|
||||
return conversation_settings
|
||||
|
||||
return conversation_settings
|
||||
|
||||
|
||||
def handle_condenser_max_step_experiment__v1(
|
||||
user_id: str | None,
|
||||
conversation_id: UUID,
|
||||
agent: Agent,
|
||||
) -> Agent:
|
||||
enabled_variant = _get_condenser_max_step_variant(user_id, str(conversation_id))
|
||||
|
||||
if enabled_variant is None:
|
||||
return agent
|
||||
|
||||
if enabled_variant == 'control':
|
||||
condenser_max_size = 120
|
||||
elif enabled_variant == 'treatment':
|
||||
condenser_max_size = 80
|
||||
else:
|
||||
logger.error(
|
||||
'condenser_max_step_experiment:unknown_variant',
|
||||
extra={
|
||||
'user_id': user_id,
|
||||
'convo_id': conversation_id,
|
||||
'variant': enabled_variant,
|
||||
'reason': 'unknown variant; returning original conversation settings',
|
||||
},
|
||||
)
|
||||
return agent
|
||||
|
||||
condenser_llm = agent.llm.model_copy(update={'usage_id': 'condenser'})
|
||||
condenser = LLMSummarizingCondenser(
|
||||
llm=condenser_llm, max_size=condenser_max_size, keep_first=4
|
||||
)
|
||||
|
||||
return agent.model_copy(update={'condenser': condenser})
|
||||
@@ -1,25 +0,0 @@
|
||||
"""
|
||||
Experiment versions package.
|
||||
|
||||
This package contains handlers for different experiment versions.
|
||||
"""
|
||||
|
||||
from experiments.experiment_versions._001_litellm_default_model_experiment import (
|
||||
handle_litellm_default_model_experiment,
|
||||
)
|
||||
from experiments.experiment_versions._002_system_prompt_experiment import (
|
||||
handle_system_prompt_experiment,
|
||||
)
|
||||
from experiments.experiment_versions._003_llm_claude4_vs_gpt5_experiment import (
|
||||
handle_claude4_vs_gpt5_experiment,
|
||||
)
|
||||
from experiments.experiment_versions._004_condenser_max_step_experiment import (
|
||||
handle_condenser_max_step_experiment,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'handle_litellm_default_model_experiment',
|
||||
'handle_system_prompt_experiment',
|
||||
'handle_claude4_vs_gpt5_experiment',
|
||||
'handle_condenser_max_step_experiment',
|
||||
]
|
||||
@@ -20,6 +20,7 @@ from integrations.models import (
|
||||
from integrations.types import ResolverViewInterface
|
||||
from integrations.utils import (
|
||||
CONVERSATION_URL,
|
||||
ENABLE_SOLVABILITY_ANALYSIS,
|
||||
HOST_URL,
|
||||
OPENHANDS_RESOLVER_TEMPLATES_DIR,
|
||||
get_session_expired_message,
|
||||
@@ -370,19 +371,19 @@ class GithubManager(Manager[GithubViewType]):
|
||||
# 3. Once the conversation is started, its base cost will include the report's spend as well which allows us to control max budget per resolver task
|
||||
convo_metadata = await github_view.initialize_new_conversation()
|
||||
solvability_summary = None
|
||||
try:
|
||||
if user_token:
|
||||
if not ENABLE_SOLVABILITY_ANALYSIS:
|
||||
logger.info(
|
||||
'[Github]: Solvability report feature is disabled, skipping'
|
||||
)
|
||||
else:
|
||||
try:
|
||||
solvability_summary = await summarize_issue_solvability(
|
||||
github_view, user_token
|
||||
)
|
||||
else:
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
'[Github]: No user token available for solvability analysis'
|
||||
f'[Github]: Error summarizing issue solvability: {str(e)}'
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f'[Github]: Error summarizing issue solvability: {str(e)}'
|
||||
)
|
||||
|
||||
saas_user_auth = await get_saas_user_auth(
|
||||
github_view.user_info.keycloak_user_id, self.token_manager
|
||||
|
||||
@@ -3,8 +3,9 @@ from typing import Any
|
||||
from uuid import UUID
|
||||
|
||||
import httpx
|
||||
from github import Auth, Github, GithubIntegration
|
||||
from integrations.utils import CONVERSATION_URL, get_summary_instruction
|
||||
from github import Auth, Github, GithubException, GithubIntegration
|
||||
from integrations.utils import get_summary_instruction
|
||||
from integrations.v1_utils import handle_callback_error
|
||||
from pydantic import Field
|
||||
from server.auth.constants import GITHUB_APP_CLIENT_ID, GITHUB_APP_PRIVATE_KEY
|
||||
|
||||
@@ -42,7 +43,6 @@ class GithubV1CallbackProcessor(EventCallbackProcessor):
|
||||
event: Event,
|
||||
) -> EventCallbackResult | None:
|
||||
"""Process events for GitHub V1 integration."""
|
||||
|
||||
# Only handle ConversationStateUpdateEvent
|
||||
if not isinstance(event, ConversationStateUpdateEvent):
|
||||
return None
|
||||
@@ -78,25 +78,20 @@ class GithubV1CallbackProcessor(EventCallbackProcessor):
|
||||
detail=summary,
|
||||
)
|
||||
except Exception as e:
|
||||
_logger.exception('[GitHub V1] Error processing callback: %s', e)
|
||||
|
||||
# Only try to post error to GitHub if we have basic requirements
|
||||
try:
|
||||
# Check if we have installation ID and credentials before posting
|
||||
if (
|
||||
self.github_view_data.get('installation_id')
|
||||
and GITHUB_APP_CLIENT_ID
|
||||
and GITHUB_APP_PRIVATE_KEY
|
||||
):
|
||||
await self._post_summary_to_github(
|
||||
f'OpenHands encountered an error: **{str(e)}**.\n\n'
|
||||
f'[See the conversation]({CONVERSATION_URL.format(conversation_id)})'
|
||||
'for more information.'
|
||||
)
|
||||
except Exception as post_error:
|
||||
_logger.warning(
|
||||
'[GitHub V1] Failed to post error message to GitHub: %s', post_error
|
||||
)
|
||||
# Check if we have installation ID and credentials before posting
|
||||
can_post_error = bool(
|
||||
self.github_view_data.get('installation_id')
|
||||
and GITHUB_APP_CLIENT_ID
|
||||
and GITHUB_APP_PRIVATE_KEY
|
||||
)
|
||||
await handle_callback_error(
|
||||
error=e,
|
||||
conversation_id=conversation_id,
|
||||
service_name='GitHub',
|
||||
service_logger=_logger,
|
||||
can_post_error=can_post_error,
|
||||
post_error_func=self._post_summary_to_github,
|
||||
)
|
||||
|
||||
return EventCallbackResult(
|
||||
status=EventCallbackResultStatus.ERROR,
|
||||
@@ -137,19 +132,30 @@ class GithubV1CallbackProcessor(EventCallbackProcessor):
|
||||
full_repo_name = self.github_view_data['full_repo_name']
|
||||
issue_number = self.github_view_data['issue_number']
|
||||
|
||||
if self.inline_pr_comment:
|
||||
try:
|
||||
if self.inline_pr_comment:
|
||||
with Github(auth=Auth.Token(installation_token)) as github_client:
|
||||
repo = github_client.get_repo(full_repo_name)
|
||||
pr = repo.get_pull(issue_number)
|
||||
pr.create_review_comment_reply(
|
||||
comment_id=self.github_view_data.get('comment_id', ''),
|
||||
body=summary,
|
||||
)
|
||||
return
|
||||
|
||||
with Github(auth=Auth.Token(installation_token)) as github_client:
|
||||
repo = github_client.get_repo(full_repo_name)
|
||||
pr = repo.get_pull(issue_number)
|
||||
pr.create_review_comment_reply(
|
||||
comment_id=self.github_view_data.get('comment_id', ''), body=summary
|
||||
issue = repo.get_issue(number=issue_number)
|
||||
issue.create_comment(summary)
|
||||
except GithubException as e:
|
||||
if e.status == 410:
|
||||
_logger.info(
|
||||
'[GitHub V1] Issue/PR %s#%s was deleted, skipping summary post',
|
||||
full_repo_name,
|
||||
issue_number,
|
||||
)
|
||||
return
|
||||
|
||||
with Github(auth=Auth.Token(installation_token)) as github_client:
|
||||
repo = github_client.get_repo(full_repo_name)
|
||||
issue = repo.get_issue(number=issue_number)
|
||||
issue.create_comment(summary)
|
||||
else:
|
||||
raise
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Agent / sandbox helpers
|
||||
@@ -167,8 +173,8 @@ class GithubV1CallbackProcessor(EventCallbackProcessor):
|
||||
send_message_request = AskAgentRequest(question=message_content)
|
||||
|
||||
url = (
|
||||
f'{agent_server_url.rstrip("/")}'
|
||||
f'/api/conversations/{conversation_id}/ask_agent'
|
||||
f"{agent_server_url.rstrip('/')}"
|
||||
f"/api/conversations/{conversation_id}/ask_agent"
|
||||
)
|
||||
headers = {'X-Session-API-Key': session_api_key}
|
||||
payload = send_message_request.model_dump()
|
||||
@@ -230,8 +236,7 @@ class GithubV1CallbackProcessor(EventCallbackProcessor):
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
async def _request_summary(self, conversation_id: UUID) -> str:
|
||||
"""
|
||||
Ask the agent to produce a summary of its work and return the agent response.
|
||||
"""Ask the agent to produce a summary of its work and return the agent response.
|
||||
|
||||
NOTE: This method now returns a string (the agent server's response text)
|
||||
and raises exceptions on errors. The wrapping into EventCallbackResult
|
||||
|
||||
@@ -3,7 +3,8 @@ from typing import Any
|
||||
from uuid import UUID
|
||||
|
||||
import httpx
|
||||
from integrations.utils import CONVERSATION_URL, get_summary_instruction
|
||||
from integrations.utils import get_summary_instruction
|
||||
from integrations.v1_utils import handle_callback_error
|
||||
from pydantic import Field
|
||||
|
||||
from openhands.agent_server.models import AskAgentRequest, AskAgentResponse
|
||||
@@ -75,20 +76,15 @@ class GitlabV1CallbackProcessor(EventCallbackProcessor):
|
||||
detail=summary,
|
||||
)
|
||||
except Exception as e:
|
||||
_logger.exception('[GitLab V1] Error processing callback: %s', e)
|
||||
|
||||
# Only try to post error to GitLab if we have basic requirements
|
||||
try:
|
||||
if self.gitlab_view_data.get('keycloak_user_id'):
|
||||
await self._post_summary_to_gitlab(
|
||||
f'OpenHands encountered an error: **{str(e)}**.\n\n'
|
||||
f'[See the conversation]({CONVERSATION_URL.format(conversation_id)}) '
|
||||
'for more information.'
|
||||
)
|
||||
except Exception as post_error:
|
||||
_logger.warning(
|
||||
'[GitLab V1] Failed to post error message to GitLab: %s', post_error
|
||||
)
|
||||
can_post_error = bool(self.gitlab_view_data.get('keycloak_user_id'))
|
||||
await handle_callback_error(
|
||||
error=e,
|
||||
conversation_id=conversation_id,
|
||||
service_name='GitLab',
|
||||
service_logger=_logger,
|
||||
can_post_error=can_post_error,
|
||||
post_error_func=self._post_summary_to_gitlab,
|
||||
)
|
||||
|
||||
return EventCallbackResult(
|
||||
status=EventCallbackResultStatus.ERROR,
|
||||
@@ -149,8 +145,8 @@ class GitlabV1CallbackProcessor(EventCallbackProcessor):
|
||||
send_message_request = AskAgentRequest(question=message_content)
|
||||
|
||||
url = (
|
||||
f'{agent_server_url.rstrip("/")}'
|
||||
f'/api/conversations/{conversation_id}/ask_agent'
|
||||
f"{agent_server_url.rstrip('/')}"
|
||||
f"/api/conversations/{conversation_id}/ask_agent"
|
||||
)
|
||||
headers = {'X-Session-API-Key': session_api_key}
|
||||
payload = send_message_request.model_dump()
|
||||
|
||||
@@ -2,7 +2,8 @@ import logging
|
||||
from uuid import UUID
|
||||
|
||||
import httpx
|
||||
from integrations.utils import CONVERSATION_URL, get_summary_instruction
|
||||
from integrations.utils import get_summary_instruction
|
||||
from integrations.v1_utils import handle_callback_error
|
||||
from pydantic import Field
|
||||
from slack_sdk import WebClient
|
||||
from storage.slack_team_store import SlackTeamStore
|
||||
@@ -39,7 +40,6 @@ class SlackV1CallbackProcessor(EventCallbackProcessor):
|
||||
event: Event,
|
||||
) -> EventCallbackResult | None:
|
||||
"""Process events for Slack V1 integration."""
|
||||
|
||||
# Only handle ConversationStateUpdateEvent
|
||||
if not isinstance(event, ConversationStateUpdateEvent):
|
||||
return None
|
||||
@@ -62,19 +62,14 @@ class SlackV1CallbackProcessor(EventCallbackProcessor):
|
||||
detail=summary,
|
||||
)
|
||||
except Exception as e:
|
||||
_logger.exception('[Slack V1] Error processing callback: %s', e)
|
||||
|
||||
# Only try to post error to Slack if we have basic requirements
|
||||
try:
|
||||
await self._post_summary_to_slack(
|
||||
f'OpenHands encountered an error: **{str(e)}**.\n\n'
|
||||
f'[See the conversation]({CONVERSATION_URL.format(conversation_id)})'
|
||||
'for more information.'
|
||||
)
|
||||
except Exception as post_error:
|
||||
_logger.warning(
|
||||
'[Slack V1] Failed to post error message to Slack: %s', post_error
|
||||
)
|
||||
await handle_callback_error(
|
||||
error=e,
|
||||
conversation_id=conversation_id,
|
||||
service_name='Slack',
|
||||
service_logger=_logger,
|
||||
can_post_error=True, # Slack always attempts to post errors
|
||||
post_error_func=self._post_summary_to_slack,
|
||||
)
|
||||
|
||||
return EventCallbackResult(
|
||||
status=EventCallbackResultStatus.ERROR,
|
||||
@@ -149,8 +144,8 @@ class SlackV1CallbackProcessor(EventCallbackProcessor):
|
||||
send_message_request = AskAgentRequest(question=message_content)
|
||||
|
||||
url = (
|
||||
f'{agent_server_url.rstrip("/")}'
|
||||
f'/api/conversations/{conversation_id}/ask_agent'
|
||||
f"{agent_server_url.rstrip('/')}"
|
||||
f"/api/conversations/{conversation_id}/ask_agent"
|
||||
)
|
||||
headers = {'X-Session-API-Key': session_api_key}
|
||||
payload = send_message_request.model_dump()
|
||||
@@ -212,8 +207,7 @@ class SlackV1CallbackProcessor(EventCallbackProcessor):
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
async def _request_summary(self, conversation_id: UUID) -> str:
|
||||
"""
|
||||
Ask the agent to produce a summary of its work and return the agent response.
|
||||
"""Ask the agent to produce a summary of its work and return the agent response.
|
||||
|
||||
NOTE: This method now returns a string (the agent server's response text)
|
||||
and raises exceptions on errors. The wrapping into EventCallbackResult
|
||||
|
||||
@@ -1,3 +1,8 @@
|
||||
import logging
|
||||
from typing import Callable, Coroutine
|
||||
from uuid import UUID
|
||||
|
||||
from integrations.utils import CONVERSATION_URL
|
||||
from pydantic import SecretStr
|
||||
from server.auth.saas_user_auth import SaasUserAuth
|
||||
from server.auth.token_manager import TokenManager
|
||||
@@ -6,6 +11,78 @@ from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.server.user_auth.user_auth import UserAuth
|
||||
|
||||
|
||||
def is_budget_exceeded_error(error_message: str) -> bool:
|
||||
"""Check if an error message indicates a budget exceeded condition.
|
||||
|
||||
This is used to downgrade error logs to info logs for budget exceeded errors
|
||||
since they are expected cost control behavior rather than unexpected errors.
|
||||
"""
|
||||
lower_message = error_message.lower()
|
||||
return 'budget' in lower_message and 'exceeded' in lower_message
|
||||
|
||||
|
||||
BUDGET_EXCEEDED_USER_MESSAGE = 'LLM budget has been exceeded, please re-fill.'
|
||||
|
||||
|
||||
async def handle_callback_error(
|
||||
error: Exception,
|
||||
conversation_id: UUID,
|
||||
service_name: str,
|
||||
service_logger: logging.Logger,
|
||||
can_post_error: bool,
|
||||
post_error_func: Callable[[str], Coroutine],
|
||||
) -> None:
|
||||
"""Handle callback processing errors with appropriate logging and user messages.
|
||||
|
||||
This centralizes the error handling logic for V1 callback processors to:
|
||||
- Log budget exceeded errors at INFO level (expected cost control behavior)
|
||||
- Log other errors at EXCEPTION level
|
||||
- Post user-friendly error messages to the integration platform
|
||||
|
||||
Args:
|
||||
error: The exception that occurred
|
||||
conversation_id: The conversation ID for logging and linking
|
||||
service_name: The service name for log messages (e.g., "GitHub", "GitLab", "Slack")
|
||||
service_logger: The logger instance to use for logging
|
||||
can_post_error: Whether the prerequisites are met to post an error message
|
||||
post_error_func: Async function to post the error message to the platform
|
||||
"""
|
||||
error_str = str(error)
|
||||
budget_exceeded = is_budget_exceeded_error(error_str)
|
||||
|
||||
# Log appropriately based on error type
|
||||
if budget_exceeded:
|
||||
service_logger.info(
|
||||
'[%s V1] Budget exceeded for conversation %s: %s',
|
||||
service_name,
|
||||
conversation_id,
|
||||
error,
|
||||
)
|
||||
else:
|
||||
service_logger.exception(
|
||||
'[%s V1] Error processing callback: %s', service_name, error
|
||||
)
|
||||
|
||||
# Try to post error message to the platform
|
||||
if can_post_error:
|
||||
try:
|
||||
error_detail = (
|
||||
BUDGET_EXCEEDED_USER_MESSAGE if budget_exceeded else error_str
|
||||
)
|
||||
await post_error_func(
|
||||
f'OpenHands encountered an error: **{error_detail}**\n\n'
|
||||
f'[See the conversation]({CONVERSATION_URL.format(conversation_id)}) '
|
||||
'for more information.'
|
||||
)
|
||||
except Exception as post_error:
|
||||
service_logger.warning(
|
||||
'[%s V1] Failed to post error message to %s: %s',
|
||||
service_name,
|
||||
service_name,
|
||||
post_error,
|
||||
)
|
||||
|
||||
|
||||
async def get_saas_user_auth(
|
||||
keycloak_user_id: str, token_manager: TokenManager
|
||||
) -> UserAuth:
|
||||
|
||||
7
enterprise/poetry.lock
generated
7
enterprise/poetry.lock
generated
@@ -569,14 +569,14 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "authlib"
|
||||
version = "1.6.6"
|
||||
version = "1.6.7"
|
||||
description = "The ultimate Python library in building OAuth and OpenID Connect servers and clients."
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "authlib-1.6.6-py2.py3-none-any.whl", hash = "sha256:7d9e9bc535c13974313a87f53e8430eb6ea3d1cf6ae4f6efcd793f2e949143fd"},
|
||||
{file = "authlib-1.6.6.tar.gz", hash = "sha256:45770e8e056d0f283451d9996fbb59b70d45722b45d854d58f32878d0a40c38e"},
|
||||
{file = "authlib-1.6.7-py2.py3-none-any.whl", hash = "sha256:c637340d9a02789d2efa1d003a7437d10d3e565237bcb5fcbc6c134c7b95bab0"},
|
||||
{file = "authlib-1.6.7.tar.gz", hash = "sha256:dbf10100011d1e1b34048c9d120e83f13b35d69a826ae762b93d2fb5aafc337b"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -6149,6 +6149,7 @@ aiohttp = ">=3.13.3"
|
||||
anthropic = {version = "*", extras = ["vertex"]}
|
||||
anyio = "4.9"
|
||||
asyncpg = ">=0.30"
|
||||
authlib = ">=1.6.7"
|
||||
bashlex = ">=0.18"
|
||||
boto3 = "*"
|
||||
browsergym-core = "0.13.3"
|
||||
|
||||
@@ -17,7 +17,6 @@ packages = [
|
||||
{ include = "storage" },
|
||||
{ include = "sync" },
|
||||
{ include = "integrations" },
|
||||
{ include = "experiments" },
|
||||
]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
|
||||
@@ -129,10 +129,6 @@ async def _process_batch_operations_background(
|
||||
# No action required
|
||||
continue
|
||||
|
||||
if subpath == 'exp_config.json':
|
||||
# No action required
|
||||
continue
|
||||
|
||||
# Log unhandled paths for future implementation
|
||||
logger.warning(
|
||||
'unknown_path_in_batch_webhook',
|
||||
|
||||
@@ -1,27 +1,52 @@
|
||||
import asyncio
|
||||
import hashlib
|
||||
import hmac
|
||||
import os
|
||||
|
||||
from fastapi import APIRouter, Header, HTTPException, Request
|
||||
from fastapi.responses import JSONResponse
|
||||
from integrations.github.data_collector import GitHubDataCollector
|
||||
from integrations.github.github_manager import GithubManager
|
||||
from integrations.models import Message, SourceType
|
||||
from server.auth.constants import GITHUB_APP_WEBHOOK_SECRET
|
||||
from server.auth.token_manager import TokenManager
|
||||
from starlette.requests import ClientDisconnect
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
|
||||
# Environment variable to disable GitHub webhooks
|
||||
GITHUB_WEBHOOKS_ENABLED = os.environ.get('GITHUB_WEBHOOKS_ENABLED', '1') in (
|
||||
'1',
|
||||
'true',
|
||||
)
|
||||
github_integration_router = APIRouter(prefix='/integration')
|
||||
token_manager = TokenManager()
|
||||
data_collector = GitHubDataCollector()
|
||||
github_manager = GithubManager(token_manager, data_collector)
|
||||
|
||||
# Lazy-initialized singleton for GitHub manager
|
||||
_github_manager = None
|
||||
|
||||
|
||||
def _get_github_manager():
|
||||
"""Get the GitHub manager singleton, initializing it lazily if needed.
|
||||
|
||||
This lazy initialization pattern allows the module to be imported without
|
||||
requiring environment variables to be set, which is useful for testing.
|
||||
"""
|
||||
global _github_manager
|
||||
if _github_manager is None:
|
||||
from integrations.github.data_collector import GitHubDataCollector
|
||||
from integrations.github.github_manager import GithubManager
|
||||
from server.auth.token_manager import TokenManager
|
||||
|
||||
token_manager = TokenManager()
|
||||
data_collector = GitHubDataCollector()
|
||||
_github_manager = GithubManager(token_manager, data_collector)
|
||||
return _github_manager
|
||||
|
||||
|
||||
def _get_webhook_secret() -> str:
|
||||
"""Get the GitHub webhook secret from environment.
|
||||
|
||||
This function reads the secret at runtime rather than import time,
|
||||
allowing the module to be imported without environment variables set.
|
||||
"""
|
||||
return os.environ.get('GITHUB_APP_WEBHOOK_SECRET', '')
|
||||
|
||||
|
||||
def _is_webhooks_enabled() -> bool:
|
||||
"""Check if GitHub webhooks are enabled.
|
||||
|
||||
Reads the environment variable at runtime for testability.
|
||||
"""
|
||||
return os.environ.get('GITHUB_WEBHOOKS_ENABLED', '1') in ('1', 'true')
|
||||
|
||||
|
||||
def verify_github_signature(payload: bytes, signature: str):
|
||||
@@ -30,10 +55,11 @@ def verify_github_signature(payload: bytes, signature: str):
|
||||
status_code=403, detail='x-hub-signature-256 header is missing!'
|
||||
)
|
||||
|
||||
webhook_secret = _get_webhook_secret()
|
||||
expected_signature = (
|
||||
'sha256='
|
||||
+ hmac.new(
|
||||
GITHUB_APP_WEBHOOK_SECRET.encode('utf-8'),
|
||||
webhook_secret.encode('utf-8'),
|
||||
msg=payload,
|
||||
digestmod=hashlib.sha256,
|
||||
).hexdigest()
|
||||
@@ -49,7 +75,7 @@ async def github_events(
|
||||
x_hub_signature_256: str = Header(None),
|
||||
):
|
||||
# Check if GitHub webhooks are enabled
|
||||
if not GITHUB_WEBHOOKS_ENABLED:
|
||||
if not _is_webhooks_enabled():
|
||||
logger.info(
|
||||
'GitHub webhooks are disabled by GITHUB_WEBHOOKS_ENABLED environment variable'
|
||||
)
|
||||
@@ -59,8 +85,7 @@ async def github_events(
|
||||
)
|
||||
|
||||
try:
|
||||
# Add timeout to prevent hanging on slow/stalled clients
|
||||
payload = await asyncio.wait_for(request.body(), timeout=15.0)
|
||||
payload = await request.body()
|
||||
verify_github_signature(payload, x_hub_signature_256)
|
||||
|
||||
payload_data = await request.json()
|
||||
@@ -72,19 +97,22 @@ async def github_events(
|
||||
content={'error': 'Installation ID is missing in the payload.'},
|
||||
)
|
||||
|
||||
# Import Message and SourceType lazily to avoid import-time dependencies
|
||||
from integrations.models import Message, SourceType
|
||||
|
||||
message_payload = {'payload': payload_data, 'installation': installation_id}
|
||||
message = Message(source=SourceType.GITHUB, message=message_payload)
|
||||
await github_manager.receive_message(message)
|
||||
await _get_github_manager().receive_message(message)
|
||||
|
||||
return JSONResponse(
|
||||
status_code=200,
|
||||
content={'message': 'GitHub events endpoint reached successfully.'},
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning('GitHub webhook request timed out waiting for request body')
|
||||
except ClientDisconnect:
|
||||
logger.debug('GitHub webhook client disconnected before completing request')
|
||||
return JSONResponse(
|
||||
status_code=408,
|
||||
content={'error': 'Request timeout - client took too long to send data.'},
|
||||
status_code=499,
|
||||
content={'error': 'Client disconnected.'},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception(f'Error processing GitHub event: {e}')
|
||||
|
||||
@@ -391,39 +391,11 @@ class SaasNestedConversationManager(ConversationManager):
|
||||
await self._setup_nested_settings(client, api_url, settings)
|
||||
await self._setup_provider_tokens(client, api_url, settings)
|
||||
await self._setup_custom_secrets(client, api_url, settings.custom_secrets) # type: ignore
|
||||
await self._setup_experiment_config(client, api_url, sid, user_id)
|
||||
await self._create_nested_conversation(
|
||||
client, api_url, sid, user_id, settings, initial_user_msg, replay_json
|
||||
)
|
||||
await self._wait_for_conversation_ready(client, api_url, sid)
|
||||
|
||||
async def _setup_experiment_config(
|
||||
self, client: httpx.AsyncClient, api_url: str, sid: str, user_id: str
|
||||
):
|
||||
# Prevent circular import
|
||||
from openhands.experiments.experiment_manager import (
|
||||
ExperimentConfig,
|
||||
ExperimentManagerImpl,
|
||||
)
|
||||
|
||||
config: OpenHandsConfig = ExperimentManagerImpl.run_config_variant_test(
|
||||
user_id, sid, self.config
|
||||
)
|
||||
|
||||
experiment_config = ExperimentConfig(
|
||||
config={
|
||||
'system_prompt_filename': config.get_agent_config(
|
||||
config.default_agent
|
||||
).system_prompt_filename
|
||||
}
|
||||
)
|
||||
|
||||
response = await client.post(
|
||||
f'{api_url}/api/conversations/{sid}/exp-config',
|
||||
json=experiment_config.model_dump(),
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
async def _setup_nested_settings(
|
||||
self, client: httpx.AsyncClient, api_url: str, settings: Settings
|
||||
) -> None:
|
||||
|
||||
@@ -4,7 +4,6 @@ from storage.billing_session import BillingSession
|
||||
from storage.billing_session_type import BillingSessionType
|
||||
from storage.conversation_callback import CallbackStatus, ConversationCallback
|
||||
from storage.conversation_work import ConversationWork
|
||||
from storage.experiment_assignment import ExperimentAssignment
|
||||
from storage.feedback import ConversationFeedback, Feedback
|
||||
from storage.github_app_installation import GithubAppInstallation
|
||||
from storage.gitlab_webhook import GitlabWebhook, WebhookStatus
|
||||
@@ -50,7 +49,6 @@ __all__ = [
|
||||
'ConversationFeedback',
|
||||
'StoredConversationMetadataSaas',
|
||||
'ConversationWork',
|
||||
'ExperimentAssignment',
|
||||
'Feedback',
|
||||
'GithubAppInstallation',
|
||||
'GitlabWebhook',
|
||||
|
||||
@@ -1,41 +0,0 @@
|
||||
"""
|
||||
Database model for experiment assignments.
|
||||
|
||||
This model tracks which experiments a conversation is assigned to and what variant
|
||||
they received from PostHog feature flags.
|
||||
"""
|
||||
|
||||
import uuid
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from sqlalchemy import Column, DateTime, String, UniqueConstraint
|
||||
from storage.base import Base
|
||||
|
||||
|
||||
class ExperimentAssignment(Base): # type: ignore
|
||||
__tablename__ = 'experiment_assignments'
|
||||
|
||||
id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
conversation_id = Column(String, nullable=True, index=True)
|
||||
experiment_name = Column(String, nullable=False)
|
||||
variant = Column(String, nullable=False)
|
||||
|
||||
created_at = Column(
|
||||
DateTime(timezone=True),
|
||||
default=lambda: datetime.now(UTC), # type: ignore[attr-defined]
|
||||
nullable=False,
|
||||
)
|
||||
updated_at = Column(
|
||||
DateTime(timezone=True),
|
||||
default=lambda: datetime.now(UTC), # type: ignore[attr-defined]
|
||||
onupdate=lambda: datetime.now(UTC), # type: ignore[attr-defined]
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
'conversation_id',
|
||||
'experiment_name',
|
||||
name='uq_experiment_assignments_conversation_experiment',
|
||||
),
|
||||
)
|
||||
@@ -1,52 +0,0 @@
|
||||
"""
|
||||
Store for managing experiment assignments.
|
||||
|
||||
This store handles creating and updating experiment assignments for conversations.
|
||||
"""
|
||||
|
||||
from sqlalchemy.dialects.postgresql import insert
|
||||
from storage.database import session_maker
|
||||
from storage.experiment_assignment import ExperimentAssignment
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
|
||||
|
||||
class ExperimentAssignmentStore:
|
||||
"""Store for managing experiment assignments."""
|
||||
|
||||
def update_experiment_variant(
|
||||
self,
|
||||
conversation_id: str,
|
||||
experiment_name: str,
|
||||
variant: str,
|
||||
) -> None:
|
||||
"""
|
||||
Update the variant for a specific experiment.
|
||||
|
||||
Args:
|
||||
conversation_id: The conversation ID
|
||||
experiment_name: The name of the experiment
|
||||
variant: The variant assigned
|
||||
"""
|
||||
with session_maker() as session:
|
||||
# Use PostgreSQL's INSERT ... ON CONFLICT DO NOTHING to handle unique constraint
|
||||
stmt = insert(ExperimentAssignment).values(
|
||||
conversation_id=conversation_id,
|
||||
experiment_name=experiment_name,
|
||||
variant=variant,
|
||||
)
|
||||
stmt = stmt.on_conflict_do_nothing(
|
||||
constraint='uq_experiment_assignments_conversation_experiment'
|
||||
)
|
||||
|
||||
session.execute(stmt)
|
||||
session.commit()
|
||||
|
||||
logger.info(
|
||||
'experiment_assignment_store:upserted_variant',
|
||||
extra={
|
||||
'conversation_id': conversation_id,
|
||||
'experiment_name': experiment_name,
|
||||
'variant': variant,
|
||||
},
|
||||
)
|
||||
@@ -1 +0,0 @@
|
||||
"""Unit tests for experiments module."""
|
||||
@@ -1,149 +0,0 @@
|
||||
# tests/test_condenser_max_step_experiment_v1.py
|
||||
|
||||
from unittest.mock import patch
|
||||
from uuid import uuid4
|
||||
|
||||
from experiments.experiment_manager import SaaSExperimentManager
|
||||
|
||||
# SUT imports (update the module path if needed)
|
||||
from experiments.experiment_versions._004_condenser_max_step_experiment import (
|
||||
handle_condenser_max_step_experiment__v1,
|
||||
)
|
||||
from pydantic import SecretStr
|
||||
|
||||
from openhands.sdk import LLM, Agent
|
||||
from openhands.sdk.context.condenser import LLMSummarizingCondenser
|
||||
|
||||
|
||||
def make_agent() -> Agent:
|
||||
"""Build a minimal valid Agent."""
|
||||
llm = LLM(
|
||||
usage_id='primary-llm',
|
||||
model='provider/model',
|
||||
api_key=SecretStr('sk-test'),
|
||||
)
|
||||
return Agent(llm=llm)
|
||||
|
||||
|
||||
def _patch_variant(monkeypatch, return_value):
|
||||
"""Patch the internal variant getter to return a specific value."""
|
||||
monkeypatch.setattr(
|
||||
'experiments.experiment_versions._004_condenser_max_step_experiment._get_condenser_max_step_variant',
|
||||
lambda user_id, conv_id: return_value,
|
||||
raising=True,
|
||||
)
|
||||
|
||||
|
||||
def test_control_variant_sets_condenser_with_max_size_120(monkeypatch):
|
||||
_patch_variant(monkeypatch, 'control')
|
||||
agent = make_agent()
|
||||
conv_id = uuid4()
|
||||
|
||||
result = handle_condenser_max_step_experiment__v1('user-1', conv_id, agent)
|
||||
|
||||
# Should be a new Agent instance with a condenser installed
|
||||
assert result is not agent
|
||||
assert isinstance(result.condenser, LLMSummarizingCondenser)
|
||||
|
||||
# The condenser should have its own LLM (usage_id overridden to "condenser")
|
||||
assert result.condenser.llm.usage_id == 'condenser'
|
||||
# The original agent LLM remains unchanged
|
||||
assert agent.llm.usage_id == 'primary-llm'
|
||||
|
||||
# Control: max_size = 120, keep_first = 4
|
||||
assert result.condenser.max_size == 120
|
||||
assert result.condenser.keep_first == 4
|
||||
|
||||
|
||||
def test_treatment_variant_sets_condenser_with_max_size_80(monkeypatch):
|
||||
_patch_variant(monkeypatch, 'treatment')
|
||||
agent = make_agent()
|
||||
conv_id = uuid4()
|
||||
|
||||
result = handle_condenser_max_step_experiment__v1('user-2', conv_id, agent)
|
||||
|
||||
assert result is not agent
|
||||
assert isinstance(result.condenser, LLMSummarizingCondenser)
|
||||
assert result.condenser.llm.usage_id == 'condenser'
|
||||
assert result.condenser.max_size == 80
|
||||
assert result.condenser.keep_first == 4
|
||||
|
||||
|
||||
def test_none_variant_returns_original_agent_without_changes(monkeypatch):
|
||||
_patch_variant(monkeypatch, None)
|
||||
agent = make_agent()
|
||||
conv_id = uuid4()
|
||||
|
||||
result = handle_condenser_max_step_experiment__v1('user-3', conv_id, agent)
|
||||
|
||||
# No changes—same instance and no condenser attribute added
|
||||
assert result is agent
|
||||
assert getattr(result, 'condenser', None) is None
|
||||
|
||||
|
||||
def test_unknown_variant_returns_original_agent_without_changes(monkeypatch):
|
||||
_patch_variant(monkeypatch, 'weird-variant')
|
||||
agent = make_agent()
|
||||
conv_id = uuid4()
|
||||
|
||||
result = handle_condenser_max_step_experiment__v1('user-4', conv_id, agent)
|
||||
|
||||
assert result is agent
|
||||
assert getattr(result, 'condenser', None) is None
|
||||
|
||||
|
||||
@patch('experiments.experiment_manager.ENABLE_EXPERIMENT_MANAGER', False)
|
||||
def test_run_agent_variant_tests_v1_noop_when_manager_disabled():
|
||||
"""If ENABLE_EXPERIMENT_MANAGER is False, the method returns the exact same agent and does not call the handler."""
|
||||
agent = make_agent()
|
||||
conv_id = uuid4()
|
||||
|
||||
result = SaaSExperimentManager.run_agent_variant_tests__v1(
|
||||
user_id='user-123',
|
||||
conversation_id=conv_id,
|
||||
agent=agent,
|
||||
)
|
||||
|
||||
# Same object returned (no copy)
|
||||
assert result is agent
|
||||
|
||||
|
||||
@patch('experiments.experiment_manager.ENABLE_EXPERIMENT_MANAGER', True)
|
||||
@patch('experiments.experiment_manager.EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT', True)
|
||||
def test_run_agent_variant_tests_v1_calls_handler_and_sets_system_prompt(monkeypatch):
|
||||
"""When enabled, it should call the condenser experiment handler and set the long-horizon system prompt."""
|
||||
agent = make_agent()
|
||||
conv_id = uuid4()
|
||||
|
||||
_patch_variant(monkeypatch, 'treatment')
|
||||
|
||||
result: Agent = SaaSExperimentManager.run_agent_variant_tests__v1(
|
||||
user_id='user-abc',
|
||||
conversation_id=conv_id,
|
||||
agent=agent,
|
||||
)
|
||||
|
||||
# Should be a different instance than the original (copied after handler runs)
|
||||
assert result is not agent
|
||||
assert result.system_prompt_filename == 'system_prompt_long_horizon.j2'
|
||||
|
||||
|
||||
@patch('experiments.experiment_manager.ENABLE_EXPERIMENT_MANAGER', True)
|
||||
@patch('experiments.experiment_manager.EXPERIMENT_SYSTEM_PROMPT_EXPERIMENT', True)
|
||||
def test_run_agent_variant_tests_v1_preserves_planning_agent_system_prompt():
|
||||
"""Planning agents should retain their specialized system prompt and not be overwritten by the experiment."""
|
||||
# Arrange
|
||||
planning_agent = make_agent().model_copy(
|
||||
update={'system_prompt_filename': 'system_prompt_planning.j2'}
|
||||
)
|
||||
conv_id = uuid4()
|
||||
|
||||
# Act
|
||||
result: Agent = SaaSExperimentManager.run_agent_variant_tests__v1(
|
||||
user_id='user-planning',
|
||||
conversation_id=conv_id,
|
||||
agent=planning_agent,
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert result.system_prompt_filename == 'system_prompt_planning.j2'
|
||||
@@ -15,6 +15,7 @@ from uuid import uuid4
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
from github import GithubException
|
||||
from integrations.github.github_v1_callback_processor import (
|
||||
GithubV1CallbackProcessor,
|
||||
)
|
||||
@@ -734,6 +735,33 @@ class TestGithubV1CallbackProcessor:
|
||||
with pytest.raises(RuntimeError, match='Missing GitHub credentials'):
|
||||
await github_callback_processor._post_summary_to_github('Test summary')
|
||||
|
||||
@patch('integrations.github.github_v1_callback_processor.Auth')
|
||||
@patch('integrations.github.github_v1_callback_processor.Github')
|
||||
async def test_post_summary_to_github_deleted_issue_does_not_raise(
|
||||
self, mock_github, mock_auth, github_callback_processor
|
||||
):
|
||||
"""Test that 410 errors (deleted issues) are handled gracefully without raising."""
|
||||
mock_github_client = MagicMock()
|
||||
mock_repo = MagicMock()
|
||||
mock_repo.get_issue.side_effect = GithubException(
|
||||
status=410,
|
||||
data={'message': 'This issue was deleted'},
|
||||
headers={},
|
||||
)
|
||||
mock_github_client.get_repo.return_value = mock_repo
|
||||
mock_github.return_value.__enter__.return_value = mock_github_client
|
||||
|
||||
mock_token_auth = MagicMock()
|
||||
mock_auth.Token.return_value = mock_token_auth
|
||||
|
||||
with patch.object(
|
||||
github_callback_processor,
|
||||
'_get_installation_access_token',
|
||||
return_value='test_token',
|
||||
):
|
||||
# Should not raise - 410 errors are handled gracefully
|
||||
await github_callback_processor._post_summary_to_github('Test summary')
|
||||
|
||||
@patch(
|
||||
'integrations.github.github_v1_callback_processor.GITHUB_APP_CLIENT_ID',
|
||||
'test_client_id',
|
||||
@@ -809,3 +837,94 @@ class TestGithubV1CallbackProcessor:
|
||||
)
|
||||
assert f'conversations/{conversation_id}' in error_comment
|
||||
assert 'for more information.' in error_comment
|
||||
|
||||
@patch(
|
||||
'integrations.github.github_v1_callback_processor.GITHUB_APP_CLIENT_ID',
|
||||
'test_client_id',
|
||||
)
|
||||
@patch(
|
||||
'integrations.github.github_v1_callback_processor.GITHUB_APP_PRIVATE_KEY',
|
||||
'test_private_key',
|
||||
)
|
||||
@patch('integrations.github.github_v1_callback_processor.get_summary_instruction')
|
||||
@patch('openhands.app_server.config.get_httpx_client')
|
||||
@patch('openhands.app_server.config.get_sandbox_service')
|
||||
@patch('openhands.app_server.config.get_app_conversation_info_service')
|
||||
@patch('integrations.github.github_v1_callback_processor._logger')
|
||||
async def test_budget_exceeded_error_logs_info_and_sends_friendly_message(
|
||||
self,
|
||||
mock_logger,
|
||||
mock_get_app_conversation_info_service,
|
||||
mock_get_sandbox_service,
|
||||
mock_get_httpx_client,
|
||||
mock_get_summary_instruction,
|
||||
github_callback_processor,
|
||||
conversation_state_update_event,
|
||||
event_callback,
|
||||
mock_app_conversation_info,
|
||||
mock_sandbox_info,
|
||||
):
|
||||
"""Test that budget exceeded errors are logged at INFO level and user gets friendly message."""
|
||||
conversation_id = uuid4()
|
||||
|
||||
mock_httpx_client = await _setup_happy_path_services(
|
||||
mock_get_app_conversation_info_service,
|
||||
mock_get_sandbox_service,
|
||||
mock_get_httpx_client,
|
||||
mock_app_conversation_info,
|
||||
mock_sandbox_info,
|
||||
)
|
||||
# Simulate a budget exceeded error from the agent server
|
||||
budget_error_msg = (
|
||||
'HTTP 500 error: {"detail":"Internal Server Error",'
|
||||
'"exception":"litellm.BadRequestError: Litellm_proxyException - '
|
||||
'Budget has been exceeded! Current cost: 12.65, Max budget: 12.62"}'
|
||||
)
|
||||
mock_httpx_client.post.side_effect = Exception(budget_error_msg)
|
||||
mock_get_summary_instruction.return_value = 'Please provide a summary'
|
||||
|
||||
with (
|
||||
patch(
|
||||
'integrations.github.github_v1_callback_processor.GithubIntegration'
|
||||
) as mock_github_integration,
|
||||
patch(
|
||||
'integrations.github.github_v1_callback_processor.Github'
|
||||
) as mock_github,
|
||||
):
|
||||
mock_integration = MagicMock()
|
||||
mock_github_integration.return_value = mock_integration
|
||||
mock_integration.get_access_token.return_value.token = 'test_token'
|
||||
|
||||
mock_gh = MagicMock()
|
||||
mock_github.return_value.__enter__.return_value = mock_gh
|
||||
mock_repo = MagicMock()
|
||||
mock_issue = MagicMock()
|
||||
mock_repo.get_issue.return_value = mock_issue
|
||||
mock_gh.get_repo.return_value = mock_repo
|
||||
|
||||
result = await github_callback_processor(
|
||||
conversation_id=conversation_id,
|
||||
callback=event_callback,
|
||||
event=conversation_state_update_event,
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
assert result.status == EventCallbackResultStatus.ERROR
|
||||
|
||||
# Verify exception was NOT called (budget exceeded uses info instead)
|
||||
mock_logger.exception.assert_not_called()
|
||||
|
||||
# Verify budget exceeded info log was called
|
||||
info_calls = [str(call) for call in mock_logger.info.call_args_list]
|
||||
budget_log_found = any('Budget exceeded' in call for call in info_calls)
|
||||
assert budget_log_found, f'Expected budget exceeded log, got: {info_calls}'
|
||||
|
||||
# Verify user-friendly message was posted to GitHub
|
||||
mock_issue.create_comment.assert_called_once()
|
||||
call_args = mock_issue.create_comment.call_args
|
||||
posted_comment = call_args[1].get('body') or call_args[0][0]
|
||||
assert 'OpenHands encountered an error' in posted_comment
|
||||
assert 'LLM budget has been exceeded' in posted_comment
|
||||
assert 'please re-fill' in posted_comment
|
||||
# Should NOT contain the raw error message
|
||||
assert 'litellm.BadRequestError' not in posted_comment
|
||||
|
||||
@@ -374,3 +374,72 @@ class TestGitlabV1CallbackProcessor:
|
||||
assert 'OpenHands encountered an error' in error_comment
|
||||
assert 'Simulated agent server error' in error_comment
|
||||
assert f'conversations/{conversation_id}' in error_comment
|
||||
|
||||
@patch('openhands.app_server.config.get_app_conversation_info_service')
|
||||
@patch('openhands.app_server.config.get_sandbox_service')
|
||||
@patch('openhands.app_server.config.get_httpx_client')
|
||||
@patch('integrations.gitlab.gitlab_v1_callback_processor.get_summary_instruction')
|
||||
@patch('integrations.gitlab.gitlab_service.SaaSGitLabService')
|
||||
@patch('integrations.gitlab.gitlab_v1_callback_processor._logger')
|
||||
async def test_budget_exceeded_error_logs_info_and_sends_friendly_message(
|
||||
self,
|
||||
mock_logger,
|
||||
mock_saas_gitlab_service_cls,
|
||||
mock_get_summary_instruction,
|
||||
mock_get_httpx_client,
|
||||
mock_get_sandbox_service,
|
||||
mock_get_app_conversation_info_service,
|
||||
gitlab_callback_processor,
|
||||
conversation_state_update_event,
|
||||
event_callback,
|
||||
mock_app_conversation_info,
|
||||
mock_sandbox_info,
|
||||
):
|
||||
"""Test that budget exceeded errors are logged at INFO level and user gets friendly message."""
|
||||
conversation_id = uuid4()
|
||||
|
||||
mock_httpx_client = await _setup_happy_path_services(
|
||||
mock_get_app_conversation_info_service,
|
||||
mock_get_sandbox_service,
|
||||
mock_get_httpx_client,
|
||||
mock_app_conversation_info,
|
||||
mock_sandbox_info,
|
||||
)
|
||||
# Simulate a budget exceeded error from the agent server
|
||||
budget_error_msg = (
|
||||
'HTTP 500 error: {"detail":"Internal Server Error",'
|
||||
'"exception":"litellm.BadRequestError: Litellm_proxyException - '
|
||||
'Budget has been exceeded! Current cost: 12.65, Max budget: 12.62"}'
|
||||
)
|
||||
mock_httpx_client.post.side_effect = Exception(budget_error_msg)
|
||||
mock_get_summary_instruction.return_value = 'Please provide a summary'
|
||||
|
||||
mock_gitlab_service = AsyncMock()
|
||||
mock_saas_gitlab_service_cls.return_value = mock_gitlab_service
|
||||
|
||||
result = await gitlab_callback_processor(
|
||||
conversation_id=conversation_id,
|
||||
callback=event_callback,
|
||||
event=conversation_state_update_event,
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
assert result.status == EventCallbackResultStatus.ERROR
|
||||
|
||||
# Verify exception was NOT called (budget exceeded uses info instead)
|
||||
mock_logger.exception.assert_not_called()
|
||||
|
||||
# Verify budget exceeded info log was called
|
||||
info_calls = [str(call) for call in mock_logger.info.call_args_list]
|
||||
budget_log_found = any('Budget exceeded' in call for call in info_calls)
|
||||
assert budget_log_found, f'Expected budget exceeded log, got: {info_calls}'
|
||||
|
||||
# Verify user-friendly message was posted to GitLab
|
||||
mock_gitlab_service.reply_to_issue.assert_called_once()
|
||||
call_args = mock_gitlab_service.reply_to_issue.call_args
|
||||
posted_comment = call_args[0][3] # 4th positional arg is the body
|
||||
assert 'OpenHands encountered an error' in posted_comment
|
||||
assert 'LLM budget has been exceeded' in posted_comment
|
||||
assert 'please re-fill' in posted_comment
|
||||
# Should NOT contain the raw error message
|
||||
assert 'litellm.BadRequestError' not in posted_comment
|
||||
|
||||
@@ -429,3 +429,89 @@ class TestSlackV1CallbackProcessor:
|
||||
assert result is not None
|
||||
assert result.status == EventCallbackResultStatus.ERROR
|
||||
assert expected_error_fragment in result.detail
|
||||
|
||||
@patch('storage.slack_team_store.SlackTeamStore.get_instance')
|
||||
@patch('openhands.app_server.config.get_httpx_client')
|
||||
@patch('openhands.app_server.config.get_sandbox_service')
|
||||
@patch('openhands.app_server.config.get_app_conversation_info_service')
|
||||
@patch('integrations.slack.slack_v1_callback_processor.get_summary_instruction')
|
||||
@patch('integrations.slack.slack_v1_callback_processor._logger')
|
||||
@patch('integrations.slack.slack_v1_callback_processor.WebClient')
|
||||
async def test_budget_exceeded_error_logs_info_and_sends_friendly_message(
|
||||
self,
|
||||
mock_web_client_cls,
|
||||
mock_logger,
|
||||
mock_get_summary_instruction,
|
||||
mock_get_app_conversation_info_service,
|
||||
mock_get_sandbox_service,
|
||||
mock_get_httpx_client,
|
||||
mock_slack_team_store,
|
||||
slack_callback_processor,
|
||||
finish_event,
|
||||
event_callback,
|
||||
mock_app_conversation_info,
|
||||
mock_sandbox_info,
|
||||
):
|
||||
"""Test that budget exceeded errors are logged at INFO level and user gets friendly message."""
|
||||
conversation_id = uuid4()
|
||||
|
||||
# Mock SlackTeamStore
|
||||
mock_store = MagicMock()
|
||||
mock_store.get_team_bot_token = AsyncMock(return_value='xoxb-test-token')
|
||||
mock_slack_team_store.return_value = mock_store
|
||||
|
||||
mock_get_summary_instruction.return_value = 'Please provide a summary'
|
||||
|
||||
# Mock services
|
||||
mock_app_conversation_info_service = AsyncMock()
|
||||
mock_app_conversation_info_service.get_app_conversation_info.return_value = (
|
||||
mock_app_conversation_info
|
||||
)
|
||||
mock_get_app_conversation_info_service.return_value.__aenter__.return_value = (
|
||||
mock_app_conversation_info_service
|
||||
)
|
||||
|
||||
mock_sandbox_service = AsyncMock()
|
||||
mock_sandbox_service.get_sandbox.return_value = mock_sandbox_info
|
||||
mock_get_sandbox_service.return_value.__aenter__.return_value = (
|
||||
mock_sandbox_service
|
||||
)
|
||||
|
||||
# Simulate a budget exceeded error from the agent server
|
||||
budget_error_msg = (
|
||||
'HTTP 500 error: {"detail":"Internal Server Error",'
|
||||
'"exception":"litellm.BadRequestError: Litellm_proxyException - '
|
||||
'Budget has been exceeded! Current cost: 12.65, Max budget: 12.62"}'
|
||||
)
|
||||
mock_httpx_client = AsyncMock()
|
||||
mock_httpx_client.post.side_effect = Exception(budget_error_msg)
|
||||
mock_get_httpx_client.return_value.__aenter__.return_value = mock_httpx_client
|
||||
|
||||
# Mock Slack WebClient
|
||||
mock_slack_client = MagicMock()
|
||||
mock_web_client_cls.return_value = mock_slack_client
|
||||
|
||||
result = await slack_callback_processor(
|
||||
conversation_id, event_callback, finish_event
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
assert result.status == EventCallbackResultStatus.ERROR
|
||||
|
||||
# Verify exception was NOT called (budget exceeded uses info instead)
|
||||
mock_logger.exception.assert_not_called()
|
||||
|
||||
# Verify budget exceeded info log was called
|
||||
info_calls = [str(call) for call in mock_logger.info.call_args_list]
|
||||
budget_log_found = any('Budget exceeded' in call for call in info_calls)
|
||||
assert budget_log_found, f'Expected budget exceeded log, got: {info_calls}'
|
||||
|
||||
# Verify user-friendly message was posted to Slack
|
||||
mock_slack_client.chat_postMessage.assert_called_once()
|
||||
call_kwargs = mock_slack_client.chat_postMessage.call_args[1]
|
||||
posted_message = call_kwargs.get('text', '')
|
||||
assert 'OpenHands encountered an error' in posted_message
|
||||
assert 'LLM budget has been exceeded' in posted_message
|
||||
assert 'please re-fill' in posted_message
|
||||
# Should NOT contain the raw error message
|
||||
assert 'litellm.BadRequestError' not in posted_message
|
||||
|
||||
8
enterprise/tests/unit/server/routes/conftest.py
Normal file
8
enterprise/tests/unit/server/routes/conftest.py
Normal file
@@ -0,0 +1,8 @@
|
||||
"""Pytest configuration for server.routes tests.
|
||||
|
||||
This module sets up the test environment for server routes.
|
||||
|
||||
Note: The server.routes.integration.github module uses lazy initialization
|
||||
for external dependencies (TokenManager, GithubManager, etc.), so it can be
|
||||
imported directly without requiring environment variables to be set.
|
||||
"""
|
||||
@@ -0,0 +1,210 @@
|
||||
"""Unit tests for GitHub integration routes - ClientDisconnect handling.
|
||||
|
||||
These tests verify that ClientDisconnect exceptions are properly handled
|
||||
when the FastAPI endpoint times out before the request body can be fully
|
||||
received from the client.
|
||||
|
||||
These tests import and test the actual github_events endpoint from
|
||||
server.routes.integration.github, mocking only external dependencies.
|
||||
|
||||
Note: The github module uses lazy initialization for external dependencies,
|
||||
so it can be imported directly without requiring environment variables.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import hmac
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from fastapi import Request
|
||||
from server.routes.integration.github import github_events
|
||||
from starlette.requests import ClientDisconnect
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_request():
|
||||
"""Create a mock FastAPI Request object."""
|
||||
req = MagicMock(spec=Request)
|
||||
req.headers = {}
|
||||
return req
|
||||
|
||||
|
||||
def create_valid_signature(payload: bytes, secret: str = 'test-secret') -> str:
|
||||
"""Create a valid HMAC signature for the given payload."""
|
||||
signature = hmac.new(
|
||||
secret.encode('utf-8'),
|
||||
msg=payload,
|
||||
digestmod=hashlib.sha256,
|
||||
).hexdigest()
|
||||
return f'sha256={signature}'
|
||||
|
||||
|
||||
class TestClientDisconnect:
|
||||
"""Test cases for ClientDisconnect handling in github_events endpoint."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('server.routes.integration.github.logger')
|
||||
@patch('server.routes.integration.github._is_webhooks_enabled', return_value=True)
|
||||
async def test_client_disconnect_returns_499(
|
||||
self, mock_webhooks_enabled, mock_logger, mock_request
|
||||
):
|
||||
"""Test that ClientDisconnect is caught and returns 499 status code.
|
||||
|
||||
This tests the scenario where the FastAPI endpoint times out before
|
||||
the request body can be fully received, causing starlette to raise
|
||||
ClientDisconnect.
|
||||
"""
|
||||
# Create a mock request that raises ClientDisconnect when body() is called
|
||||
# This simulates what happens when the client disconnects or times out
|
||||
mock_request.body = AsyncMock(side_effect=ClientDisconnect())
|
||||
|
||||
# Call the endpoint
|
||||
response = await github_events(
|
||||
request=mock_request,
|
||||
x_hub_signature_256='sha256=test',
|
||||
)
|
||||
|
||||
assert response.status_code == 499
|
||||
assert response.body == b'{"error":"Client disconnected."}'
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('server.routes.integration.github.logger')
|
||||
@patch('server.routes.integration.github.verify_github_signature')
|
||||
@patch('server.routes.integration.github._is_webhooks_enabled', return_value=True)
|
||||
async def test_client_disconnect_during_json_parsing(
|
||||
self, mock_webhooks_enabled, mock_verify_sig, mock_logger, mock_request
|
||||
):
|
||||
"""Test ClientDisconnect during request.json() call returns 499."""
|
||||
payload = b'{"test": "data"}'
|
||||
mock_request.body = AsyncMock(return_value=payload)
|
||||
# ClientDisconnect can also happen during json parsing
|
||||
mock_request.json = AsyncMock(side_effect=ClientDisconnect())
|
||||
mock_verify_sig.return_value = None # Skip signature verification
|
||||
|
||||
response = await github_events(
|
||||
request=mock_request,
|
||||
x_hub_signature_256='sha256=test',
|
||||
)
|
||||
|
||||
assert response.status_code == 499
|
||||
assert response.body == b'{"error":"Client disconnected."}'
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('server.routes.integration.github.logger')
|
||||
@patch('server.routes.integration.github._is_webhooks_enabled', return_value=True)
|
||||
async def test_client_disconnect_does_not_propagate_as_unhandled_exception(
|
||||
self, mock_webhooks_enabled, mock_logger, mock_request
|
||||
):
|
||||
"""Test that ClientDisconnect doesn't cause unhandled exception logging."""
|
||||
mock_request.body = AsyncMock(side_effect=ClientDisconnect())
|
||||
|
||||
# The function should return normally without raising
|
||||
response = await github_events(
|
||||
request=mock_request,
|
||||
x_hub_signature_256='sha256=test',
|
||||
)
|
||||
|
||||
# The generic exception handler should NOT be triggered
|
||||
# (it uses logger.exception which includes 'Error processing GitHub event')
|
||||
mock_logger.exception.assert_not_called()
|
||||
|
||||
assert response.status_code == 499
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('server.routes.integration.github.logger')
|
||||
@patch('server.routes.integration.github._is_webhooks_enabled', return_value=True)
|
||||
async def test_client_disconnect_is_not_caught_by_generic_exception_handler(
|
||||
self, mock_webhooks_enabled, mock_logger, mock_request
|
||||
):
|
||||
"""Test that ClientDisconnect is caught by its specific handler, not the generic one.
|
||||
|
||||
The generic exception handler returns 400 and logs with exception().
|
||||
ClientDisconnect should return 499 and log with debug().
|
||||
"""
|
||||
mock_request.body = AsyncMock(side_effect=ClientDisconnect())
|
||||
|
||||
response = await github_events(
|
||||
request=mock_request,
|
||||
x_hub_signature_256='sha256=test',
|
||||
)
|
||||
|
||||
# Should be 499 (ClientDisconnect), not 400 (generic exception)
|
||||
assert response.status_code == 499
|
||||
|
||||
# Should use debug(), not exception()
|
||||
mock_logger.debug.assert_called_once()
|
||||
mock_logger.exception.assert_not_called()
|
||||
|
||||
|
||||
class TestWebhooksDisabled:
|
||||
"""Test cases for when webhooks are disabled."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('server.routes.integration.github.logger')
|
||||
@patch('server.routes.integration.github._is_webhooks_enabled', return_value=False)
|
||||
async def test_webhooks_disabled_returns_200(
|
||||
self, mock_webhooks_enabled, mock_logger, mock_request
|
||||
):
|
||||
"""Test that disabled webhooks return 200 with appropriate message."""
|
||||
response = await github_events(
|
||||
request=mock_request,
|
||||
x_hub_signature_256='sha256=test',
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
assert b'GitHub webhooks are currently disabled' in response.body
|
||||
|
||||
|
||||
class TestSuccessfulRequest:
|
||||
"""Test cases for successful webhook processing."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('server.routes.integration.github._get_github_manager')
|
||||
@patch('server.routes.integration.github.verify_github_signature')
|
||||
@patch('server.routes.integration.github.logger')
|
||||
@patch('server.routes.integration.github._is_webhooks_enabled', return_value=True)
|
||||
async def test_successful_request_returns_200(
|
||||
self,
|
||||
mock_webhooks_enabled,
|
||||
mock_logger,
|
||||
mock_verify_sig,
|
||||
mock_get_github_manager,
|
||||
mock_request,
|
||||
):
|
||||
"""Test that a successful request returns 200."""
|
||||
payload = b'{"installation": {"id": 123}}'
|
||||
mock_request.body = AsyncMock(return_value=payload)
|
||||
mock_request.json = AsyncMock(return_value={'installation': {'id': 123}})
|
||||
mock_verify_sig.return_value = None
|
||||
mock_github_manager = MagicMock()
|
||||
mock_github_manager.receive_message = AsyncMock()
|
||||
mock_get_github_manager.return_value = mock_github_manager
|
||||
|
||||
response = await github_events(
|
||||
request=mock_request,
|
||||
x_hub_signature_256='sha256=test',
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
assert b'GitHub events endpoint reached successfully' in response.body
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('server.routes.integration.github.verify_github_signature')
|
||||
@patch('server.routes.integration.github.logger')
|
||||
@patch('server.routes.integration.github._is_webhooks_enabled', return_value=True)
|
||||
async def test_missing_installation_id_returns_400(
|
||||
self, mock_webhooks_enabled, mock_logger, mock_verify_sig, mock_request
|
||||
):
|
||||
"""Test that missing installation ID returns 400."""
|
||||
payload = b'{"action": "opened"}'
|
||||
mock_request.body = AsyncMock(return_value=payload)
|
||||
mock_request.json = AsyncMock(return_value={'action': 'opened'})
|
||||
mock_verify_sig.return_value = None
|
||||
|
||||
response = await github_events(
|
||||
request=mock_request,
|
||||
x_hub_signature_256='sha256=test',
|
||||
)
|
||||
|
||||
assert response.status_code == 400
|
||||
assert b'Installation ID is missing' in response.body
|
||||
@@ -5,11 +5,43 @@ import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
|
||||
import { ConversationTabTitle } from "#/components/features/conversation/conversation-tabs/conversation-tab-title";
|
||||
import GitService from "#/api/git-service/git-service.api";
|
||||
import V1GitService from "#/api/git-service/v1-git-service.api";
|
||||
import { useConversationStore } from "#/stores/conversation-store";
|
||||
import { useAgentStore } from "#/stores/agent-store";
|
||||
import { useOptimisticUserMessageStore } from "#/stores/optimistic-user-message-store";
|
||||
import { AgentState } from "#/types/agent-state";
|
||||
import { createChatMessage } from "#/services/chat-service";
|
||||
|
||||
// Mock the services that the hook depends on
|
||||
vi.mock("#/api/git-service/git-service.api");
|
||||
vi.mock("#/api/git-service/v1-git-service.api");
|
||||
|
||||
// Mock i18n
|
||||
vi.mock("react-i18next", async (importOriginal) => {
|
||||
const actual = await importOriginal<typeof import("react-i18next")>();
|
||||
return {
|
||||
...actual,
|
||||
useTranslation: () => ({
|
||||
t: (key: string) => key,
|
||||
}),
|
||||
};
|
||||
});
|
||||
|
||||
// Mock services for Build button
|
||||
const mockSend = vi.fn();
|
||||
|
||||
vi.mock("#/hooks/use-send-message", () => ({
|
||||
useSendMessage: vi.fn(() => ({
|
||||
send: mockSend,
|
||||
})),
|
||||
}));
|
||||
|
||||
vi.mock("#/services/chat-service", () => ({
|
||||
createChatMessage: vi.fn((content, imageUrls, fileUrls, timestamp) => ({
|
||||
action: "message",
|
||||
args: { content, image_urls: imageUrls, file_urls: fileUrls, timestamp },
|
||||
})),
|
||||
}));
|
||||
|
||||
// Mock the hooks that useUnifiedGetGitChanges depends on
|
||||
vi.mock("#/hooks/use-conversation-id", () => ({
|
||||
useConversationId: () => ({
|
||||
@@ -51,11 +83,24 @@ describe("ConversationTabTitle", () => {
|
||||
// Mock GitService methods
|
||||
vi.mocked(GitService.getGitChanges).mockResolvedValue([]);
|
||||
vi.mocked(V1GitService.getGitChanges).mockResolvedValue([]);
|
||||
|
||||
// Reset stores for Build button tests
|
||||
useConversationStore.setState({
|
||||
planContent: null,
|
||||
conversationMode: "plan",
|
||||
});
|
||||
useAgentStore.setState({
|
||||
curAgentState: AgentState.AWAITING_USER_INPUT,
|
||||
});
|
||||
useOptimisticUserMessageStore.setState({
|
||||
optimisticUserMessage: null,
|
||||
});
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.clearAllMocks();
|
||||
queryClient.clear();
|
||||
localStorage.clear();
|
||||
});
|
||||
|
||||
const renderWithProviders = (ui: React.ReactElement) => {
|
||||
@@ -146,4 +191,94 @@ describe("ConversationTabTitle", () => {
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("Build Button", () => {
|
||||
it("should show Build button when conversationKey is 'planner' and planContent exists", () => {
|
||||
// Arrange
|
||||
useConversationStore.setState({ planContent: "# Plan content" });
|
||||
|
||||
// Act
|
||||
renderWithProviders(
|
||||
<ConversationTabTitle title="Planner" conversationKey="planner" />,
|
||||
);
|
||||
|
||||
// Assert
|
||||
const buildButton = screen.getByTestId("planner-tab-build-button");
|
||||
expect(buildButton).toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should not show Build button when conversationKey is not 'planner'", () => {
|
||||
// Arrange
|
||||
useConversationStore.setState({ planContent: "# Plan content" });
|
||||
|
||||
// Act
|
||||
renderWithProviders(
|
||||
<ConversationTabTitle title="Browser" conversationKey="browser" />,
|
||||
);
|
||||
|
||||
// Assert
|
||||
expect(
|
||||
screen.queryByTestId("planner-tab-build-button"),
|
||||
).not.toBeInTheDocument();
|
||||
});
|
||||
|
||||
it("should disable Build button when no planContent exists", () => {
|
||||
// Arrange
|
||||
useConversationStore.setState({ planContent: null });
|
||||
useAgentStore.setState({ curAgentState: AgentState.AWAITING_USER_INPUT });
|
||||
|
||||
// Act
|
||||
renderWithProviders(
|
||||
<ConversationTabTitle title="Planner" conversationKey="planner" />,
|
||||
);
|
||||
|
||||
// Assert
|
||||
const buildButton = screen.getByTestId("planner-tab-build-button");
|
||||
expect(buildButton).toBeDisabled();
|
||||
});
|
||||
|
||||
it("should disable Build button when agent is running", () => {
|
||||
// Arrange
|
||||
useConversationStore.setState({ planContent: "# Plan content" });
|
||||
useAgentStore.setState({ curAgentState: AgentState.RUNNING });
|
||||
|
||||
// Act
|
||||
renderWithProviders(
|
||||
<ConversationTabTitle title="Planner" conversationKey="planner" />,
|
||||
);
|
||||
|
||||
// Assert
|
||||
const buildButton = screen.getByTestId("planner-tab-build-button");
|
||||
expect(buildButton).toBeDisabled();
|
||||
});
|
||||
|
||||
it("should switch to code mode and send message when Build button is clicked", async () => {
|
||||
// Arrange
|
||||
const user = userEvent.setup();
|
||||
useConversationStore.setState({
|
||||
planContent: "# Plan content",
|
||||
conversationMode: "plan",
|
||||
});
|
||||
useAgentStore.setState({ curAgentState: AgentState.AWAITING_USER_INPUT });
|
||||
|
||||
renderWithProviders(
|
||||
<ConversationTabTitle title="Planner" conversationKey="planner" />,
|
||||
);
|
||||
|
||||
const buildButton = screen.getByTestId("planner-tab-build-button");
|
||||
|
||||
// Act
|
||||
await user.click(buildButton);
|
||||
|
||||
// Assert
|
||||
expect(useConversationStore.getState().conversationMode).toBe("code");
|
||||
expect(createChatMessage).toHaveBeenCalledWith(
|
||||
"Execute the plan based on the .agents_tmp/PLAN.md file.",
|
||||
[],
|
||||
[],
|
||||
expect.any(String),
|
||||
);
|
||||
expect(mockSend).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
55
frontend/__tests__/routes/planner-tab.test.tsx
Normal file
55
frontend/__tests__/routes/planner-tab.test.tsx
Normal file
@@ -0,0 +1,55 @@
|
||||
import { screen } from "@testing-library/react";
|
||||
import { describe, expect, it, vi, beforeEach } from "vitest";
|
||||
import PlannerTab from "#/routes/planner-tab";
|
||||
import { renderWithProviders } from "../../test-utils";
|
||||
import { useConversationStore } from "#/stores/conversation-store";
|
||||
|
||||
// Mock the handle plan click hook
|
||||
vi.mock("#/hooks/use-handle-plan-click", () => ({
|
||||
useHandlePlanClick: () => ({
|
||||
handlePlanClick: vi.fn(),
|
||||
}),
|
||||
}));
|
||||
|
||||
describe("PlannerTab", () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
// Reset store state to defaults
|
||||
useConversationStore.setState({
|
||||
planContent: null,
|
||||
conversationMode: "code",
|
||||
});
|
||||
});
|
||||
|
||||
describe("Create a plan button", () => {
|
||||
it("should be enabled when conversation mode is 'code'", () => {
|
||||
// Arrange
|
||||
useConversationStore.setState({
|
||||
planContent: null,
|
||||
conversationMode: "code",
|
||||
});
|
||||
|
||||
// Act
|
||||
renderWithProviders(<PlannerTab />);
|
||||
|
||||
// Assert
|
||||
const button = screen.getByRole("button");
|
||||
expect(button).not.toBeDisabled();
|
||||
});
|
||||
|
||||
it("should be disabled when conversation mode is 'plan'", () => {
|
||||
// Arrange
|
||||
useConversationStore.setState({
|
||||
planContent: null,
|
||||
conversationMode: "plan",
|
||||
});
|
||||
|
||||
// Act
|
||||
renderWithProviders(<PlannerTab />);
|
||||
|
||||
// Assert
|
||||
const button = screen.getByRole("button");
|
||||
expect(button).toBeDisabled();
|
||||
});
|
||||
});
|
||||
});
|
||||
18
frontend/package-lock.json
generated
18
frontend/package-lock.json
generated
@@ -8526,10 +8526,13 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/dompurify": {
|
||||
"version": "3.2.7",
|
||||
"resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.2.7.tgz",
|
||||
"integrity": "sha512-WhL/YuveyGXJaerVlMYGWhvQswa7myDG17P7Vu65EWC05o8vfeNbvNf4d/BOvH99+ZW+LlQsc1GDKMa1vNK6dw==",
|
||||
"version": "3.3.2",
|
||||
"resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.3.2.tgz",
|
||||
"integrity": "sha512-6obghkliLdmKa56xdbLOpUZ43pAR6xFy1uOrxBaIDjT+yaRuuybLjGS9eVBoSR/UPU5fq3OXClEHLJNGvbxKpQ==",
|
||||
"license": "(MPL-2.0 OR Apache-2.0)",
|
||||
"engines": {
|
||||
"node": ">=20"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@types/trusted-types": "^2.0.7"
|
||||
}
|
||||
@@ -13997,15 +14000,6 @@
|
||||
"web-vitals": "^5.1.0"
|
||||
}
|
||||
},
|
||||
"node_modules/posthog-js/node_modules/dompurify": {
|
||||
"version": "3.3.1",
|
||||
"resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.3.1.tgz",
|
||||
"integrity": "sha512-qkdCKzLNtrgPFP1Vo+98FRzJnBRGe4ffyCea9IwHB1fyxPOeNTHpLKYGd4Uk9xvNoH0ZoOjwZxNptyMwqrId1Q==",
|
||||
"license": "(MPL-2.0 OR Apache-2.0)",
|
||||
"optionalDependencies": {
|
||||
"@types/trusted-types": "^2.0.7"
|
||||
}
|
||||
},
|
||||
"node_modules/preact": {
|
||||
"version": "10.28.2",
|
||||
"resolved": "https://registry.npmjs.org/preact/-/preact-10.28.2.tgz",
|
||||
|
||||
@@ -127,5 +127,8 @@
|
||||
"workerDirectory": [
|
||||
"public"
|
||||
]
|
||||
},
|
||||
"overrides": {
|
||||
"dompurify": "3.3.2"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,21 +1,40 @@
|
||||
import { useTranslation } from "react-i18next";
|
||||
import RefreshIcon from "#/icons/u-refresh.svg?react";
|
||||
import { useUnifiedGetGitChanges } from "#/hooks/query/use-unified-get-git-changes";
|
||||
import { useHandleBuildPlanClick } from "#/hooks/use-handle-build-plan-click";
|
||||
import { useAgentState } from "#/hooks/use-agent-state";
|
||||
import { useConversationStore } from "#/stores/conversation-store";
|
||||
import { AgentState } from "#/types/agent-state";
|
||||
import { I18nKey } from "#/i18n/declaration";
|
||||
import { cn } from "#/utils/utils";
|
||||
import { Typography } from "#/ui/typography";
|
||||
|
||||
type ConversationTabTitleProps = {
|
||||
title: string;
|
||||
conversationKey: string;
|
||||
};
|
||||
|
||||
/* eslint-disable i18next/no-literal-string */
|
||||
export function ConversationTabTitle({
|
||||
title,
|
||||
conversationKey,
|
||||
}: ConversationTabTitleProps) {
|
||||
const { t } = useTranslation();
|
||||
const { refetch } = useUnifiedGetGitChanges();
|
||||
const { handleBuildPlanClick } = useHandleBuildPlanClick();
|
||||
const { curAgentState } = useAgentState();
|
||||
const { planContent } = useConversationStore();
|
||||
|
||||
const handleRefresh = () => {
|
||||
refetch();
|
||||
};
|
||||
|
||||
// Determine if Build button should be disabled
|
||||
const isAgentRunning =
|
||||
curAgentState === AgentState.RUNNING ||
|
||||
curAgentState === AgentState.LOADING;
|
||||
const isBuildDisabled = isAgentRunning || !planContent;
|
||||
|
||||
return (
|
||||
<div className="flex flex-row items-center justify-between border-b border-[#474A54] py-2 px-3">
|
||||
<span className="text-xs font-medium text-white">{title}</span>
|
||||
@@ -28,6 +47,24 @@ export function ConversationTabTitle({
|
||||
<RefreshIcon width={12.75} height={15} color="#ffffff" />
|
||||
</button>
|
||||
)}
|
||||
{conversationKey === "planner" && (
|
||||
<button
|
||||
type="button"
|
||||
onClick={handleBuildPlanClick}
|
||||
disabled={isBuildDisabled}
|
||||
className={cn(
|
||||
"flex items-center justify-center h-5 min-w-17 px-2 rounded bg-white transition-opacity",
|
||||
isBuildDisabled
|
||||
? "opacity-50 cursor-not-allowed"
|
||||
: "hover:opacity-90 cursor-pointer",
|
||||
)}
|
||||
data-testid="planner-tab-build-button"
|
||||
>
|
||||
<Typography.Text className="text-black text-[11px] font-medium leading-5">
|
||||
{t(I18nKey.COMMON$BUILD)} ⌘↩
|
||||
</Typography.Text>
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ import { useScrollToBottom } from "#/hooks/use-scroll-to-bottom";
|
||||
import { MarkdownRenderer } from "#/components/features/markdown/markdown-renderer";
|
||||
import { planComponents } from "#/components/features/markdown/plan-components";
|
||||
import { useHandlePlanClick } from "#/hooks/use-handle-plan-click";
|
||||
import { cn } from "#/utils/utils";
|
||||
|
||||
function PlannerTab() {
|
||||
const { t } = useTranslation();
|
||||
@@ -14,7 +15,8 @@ function PlannerTab() {
|
||||
React.useRef<HTMLDivElement>(null),
|
||||
);
|
||||
|
||||
const { planContent } = useConversationStore();
|
||||
const { planContent, conversationMode } = useConversationStore();
|
||||
const isPlanMode = conversationMode === "plan";
|
||||
const { handlePlanClick } = useHandlePlanClick();
|
||||
|
||||
if (planContent !== null && planContent !== undefined) {
|
||||
@@ -40,7 +42,13 @@ function PlannerTab() {
|
||||
<button
|
||||
type="button"
|
||||
onClick={handlePlanClick}
|
||||
className="flex w-[164px] h-[40px] p-2 justify-center items-center shrink-0 rounded-lg bg-white overflow-hidden text-black text-ellipsis font-sans text-[16px] not-italic font-normal leading-[20px] hover:cursor-pointer hover:opacity-80"
|
||||
disabled={isPlanMode}
|
||||
className={cn(
|
||||
"flex w-[164px] h-[40px] p-2 justify-center items-center shrink-0 rounded-lg bg-white overflow-hidden text-black text-ellipsis font-sans text-[16px] not-italic font-normal leading-[20px]",
|
||||
isPlanMode
|
||||
? "opacity-50 cursor-not-allowed"
|
||||
: "hover:cursor-pointer hover:opacity-80",
|
||||
)}
|
||||
>
|
||||
{t(I18nKey.COMMON$CREATE_A_PLAN)}
|
||||
</button>
|
||||
|
||||
@@ -10,6 +10,7 @@ export const VERIFIED_PROVIDERS = [
|
||||
export const VERIFIED_MODELS = [
|
||||
"claude-opus-4-6",
|
||||
"claude-opus-4-5-20251101",
|
||||
"claude-sonnet-4-6",
|
||||
"claude-sonnet-4-5-20250929",
|
||||
"gpt-5.2-codex",
|
||||
"gpt-5.2",
|
||||
@@ -19,8 +20,11 @@ export const VERIFIED_MODELS = [
|
||||
"deepseek-chat",
|
||||
"devstral-medium-2512",
|
||||
"kimi-k2-0711-preview",
|
||||
"kimi-k2.5",
|
||||
"qwen3-coder-480b",
|
||||
"qwen3-coder-next",
|
||||
"glm-4.7",
|
||||
"glm-5",
|
||||
];
|
||||
|
||||
// LiteLLM does not return OpenAI models with the provider, so we list them here to set them ourselves for consistency
|
||||
@@ -55,6 +59,7 @@ export const VERIFIED_MISTRAL_MODELS = ["devstral-medium-2512"];
|
||||
export const VERIFIED_OPENHANDS_MODELS = [
|
||||
"claude-opus-4-6",
|
||||
"claude-opus-4-5-20251101",
|
||||
"claude-sonnet-4-6",
|
||||
"claude-sonnet-4-5-20250929",
|
||||
"gpt-5.2-codex",
|
||||
"gpt-5.2",
|
||||
@@ -63,8 +68,11 @@ export const VERIFIED_OPENHANDS_MODELS = [
|
||||
"gemini-3-flash-preview",
|
||||
"devstral-medium-2512",
|
||||
"kimi-k2-0711-preview",
|
||||
"kimi-k2.5",
|
||||
"qwen3-coder-480b",
|
||||
"qwen3-coder-next",
|
||||
"glm-4.7",
|
||||
"glm-5",
|
||||
];
|
||||
|
||||
// Default model for OpenHands provider
|
||||
|
||||
@@ -77,7 +77,6 @@ from openhands.app_server.utils.llm_metadata import (
|
||||
get_llm_metadata,
|
||||
should_set_litellm_extra_body,
|
||||
)
|
||||
from openhands.experiments.experiment_manager import ExperimentManagerImpl
|
||||
from openhands.integrations.provider import ProviderType
|
||||
from openhands.integrations.service_types import SuggestedTask
|
||||
from openhands.sdk import Agent, AgentContext, LocalWorkspace
|
||||
@@ -1140,7 +1139,7 @@ class LiveStatusAppConversationService(AppConversationServiceBase):
|
||||
working_dir: str,
|
||||
plugins: list[PluginSpec] | None = None,
|
||||
) -> StartConversationRequest:
|
||||
"""Finalize the conversation request with experiment variants and skills.
|
||||
"""Finalize the conversation request with skills and metadata.
|
||||
|
||||
Args:
|
||||
agent: The configured agent
|
||||
@@ -1161,13 +1160,7 @@ class LiveStatusAppConversationService(AppConversationServiceBase):
|
||||
# Generate conversation ID if not provided
|
||||
conversation_id = conversation_id or uuid4()
|
||||
|
||||
# Apply experiment variants
|
||||
agent = ExperimentManagerImpl.run_agent_variant_tests__v1(
|
||||
user.id, conversation_id, agent
|
||||
)
|
||||
|
||||
# Update agent's LLM with litellm_extra_body metadata for tracing
|
||||
# This is done after experiment variants to ensure the final LLM config is used
|
||||
agent = self._update_agent_with_llm_metadata(agent, conversation_id, user.id)
|
||||
|
||||
# Load and merge skills if remote workspace is available
|
||||
@@ -1230,7 +1223,7 @@ class LiveStatusAppConversationService(AppConversationServiceBase):
|
||||
1. Setting up git provider secrets
|
||||
2. Configuring LLM and MCP settings
|
||||
3. Creating an agent with appropriate context
|
||||
4. Finalizing the request with skills and experiment variants
|
||||
4. Finalizing the request with skills and metadata
|
||||
5. Passing plugins to the agent server for remote plugin loading
|
||||
"""
|
||||
user = await self.user_context.get_user_info()
|
||||
|
||||
@@ -13,7 +13,7 @@ from openhands.sdk.utils.models import DiscriminatedUnionMixin
|
||||
|
||||
# The version of the agent server to use for deployments.
|
||||
# Typically this will be the same as the values from the pyproject.toml
|
||||
AGENT_SERVER_IMAGE = 'ghcr.io/openhands/agent-server:50d8f1b-python'
|
||||
AGENT_SERVER_IMAGE = 'ghcr.io/openhands/agent-server:2c1e72a-python'
|
||||
|
||||
|
||||
class SandboxSpecService(ABC):
|
||||
|
||||
@@ -1,72 +0,0 @@
|
||||
import os
|
||||
from uuid import UUID
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from openhands.core.config.openhands_config import OpenHandsConfig
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.sdk import Agent
|
||||
from openhands.server.session.conversation_init_data import ConversationInitData
|
||||
from openhands.server.shared import file_store
|
||||
from openhands.storage.locations import get_experiment_config_filename
|
||||
from openhands.utils.import_utils import get_impl
|
||||
|
||||
|
||||
class ExperimentConfig(BaseModel):
|
||||
config: dict[str, str] | None = None
|
||||
|
||||
|
||||
def load_experiment_config(conversation_id: str) -> ExperimentConfig | None:
|
||||
try:
|
||||
file_path = get_experiment_config_filename(conversation_id)
|
||||
exp_config = file_store.read(file_path)
|
||||
return ExperimentConfig.model_validate_json(exp_config)
|
||||
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.warning(f'Failed to load experiment config: {e}')
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class ExperimentManager:
|
||||
@staticmethod
|
||||
def run_agent_variant_tests__v1(
|
||||
user_id: str | None, conversation_id: UUID, agent: Agent
|
||||
) -> Agent:
|
||||
return agent
|
||||
|
||||
@staticmethod
|
||||
def run_conversation_variant_test(
|
||||
user_id: str | None,
|
||||
conversation_id: str,
|
||||
conversation_settings: ConversationInitData,
|
||||
) -> ConversationInitData:
|
||||
return conversation_settings
|
||||
|
||||
@staticmethod
|
||||
def run_config_variant_test(
|
||||
user_id: str | None, conversation_id: str, config: OpenHandsConfig
|
||||
) -> OpenHandsConfig:
|
||||
exp_config = load_experiment_config(conversation_id)
|
||||
if exp_config and exp_config.config:
|
||||
agent_cfg = config.get_agent_config(config.default_agent)
|
||||
try:
|
||||
for attr, value in exp_config.config.items():
|
||||
if hasattr(agent_cfg, attr):
|
||||
logger.info(
|
||||
f'Set attrib {attr} to {value} for {conversation_id}'
|
||||
)
|
||||
setattr(agent_cfg, attr, value)
|
||||
except Exception as e:
|
||||
logger.warning(f'Error processing exp config: {e}')
|
||||
|
||||
return config
|
||||
|
||||
|
||||
experiment_manager_cls = os.environ.get(
|
||||
'OPENHANDS_EXPERIMENT_MANAGER_CLS',
|
||||
'openhands.experiments.experiment_manager.ExperimentManager',
|
||||
)
|
||||
ExperimentManagerImpl = get_impl(ExperimentManager, experiment_manager_cls)
|
||||
@@ -93,12 +93,14 @@ FUNCTION_CALLING_PATTERNS: list[str] = [
|
||||
# Others
|
||||
'kimi-k2-0711-preview',
|
||||
'kimi-k2-instruct',
|
||||
'kimi-k2.5',
|
||||
'qwen3-coder*',
|
||||
'qwen3-coder-480b-a35b-instruct',
|
||||
'deepseek-chat',
|
||||
'grok-code-fast-1',
|
||||
# GLM series - verified via official docs and litellm config
|
||||
'glm-4*',
|
||||
'glm-5*',
|
||||
]
|
||||
|
||||
REASONING_EFFORT_PATTERNS: list[str] = [
|
||||
@@ -117,9 +119,13 @@ REASONING_EFFORT_PATTERNS: list[str] = [
|
||||
# DeepSeek reasoning family
|
||||
'deepseek-r1-0528*',
|
||||
'claude-sonnet-4-5*',
|
||||
'claude-sonnet-4-6*',
|
||||
'claude-haiku-4-5*',
|
||||
# Kimi series - verified via litellm config
|
||||
'kimi-k2.5',
|
||||
# GLM series - verified via litellm config
|
||||
'glm-4*',
|
||||
'glm-5*',
|
||||
]
|
||||
|
||||
PROMPT_CACHE_PATTERNS: list[str] = [
|
||||
@@ -133,8 +139,11 @@ PROMPT_CACHE_PATTERNS: list[str] = [
|
||||
'claude-3-opus-20240229',
|
||||
'claude-sonnet-4*',
|
||||
'claude-opus-4*',
|
||||
# Kimi series - verified via litellm config
|
||||
'kimi-k2.5',
|
||||
# GLM series - verified via litellm config
|
||||
'glm-4*',
|
||||
'glm-5*',
|
||||
]
|
||||
|
||||
SUPPORTS_STOP_WORDS_FALSE_PATTERNS: list[str] = [
|
||||
|
||||
@@ -28,7 +28,6 @@ from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.events.action import MessageAction
|
||||
from openhands.events.nested_event_store import NestedEventStore
|
||||
from openhands.events.stream import EventStream
|
||||
from openhands.experiments.experiment_manager import ExperimentManagerImpl
|
||||
from openhands.integrations.provider import PROVIDER_TOKEN_TYPE, ProviderHandler
|
||||
from openhands.runtime import get_runtime_cls
|
||||
from openhands.runtime.impl.docker.docker_runtime import DockerRuntime
|
||||
@@ -551,12 +550,8 @@ class DockerNestedConversationManager(ConversationManager):
|
||||
# This session is created here only because it is the easiest way to get a runtime, which
|
||||
# is the easiest way to create the needed docker container
|
||||
|
||||
config: OpenHandsConfig = ExperimentManagerImpl.run_config_variant_test(
|
||||
user_id, sid, self.config
|
||||
)
|
||||
|
||||
llm_registry, conversation_stats, config = (
|
||||
create_registry_and_conversation_stats(config, sid, user_id, settings)
|
||||
create_registry_and_conversation_stats(self.config, sid, user_id, settings)
|
||||
)
|
||||
|
||||
session = Session(
|
||||
|
||||
@@ -60,7 +60,6 @@ from openhands.events.observation import (
|
||||
AgentStateChangedObservation,
|
||||
NullObservation,
|
||||
)
|
||||
from openhands.experiments.experiment_manager import ExperimentConfig
|
||||
from openhands.integrations.provider import (
|
||||
PROVIDER_TOKEN_TYPE,
|
||||
ProviderHandler,
|
||||
@@ -109,7 +108,6 @@ from openhands.storage.data_models.conversation_metadata import (
|
||||
from openhands.storage.data_models.conversation_status import ConversationStatus
|
||||
from openhands.storage.data_models.secrets import Secrets
|
||||
from openhands.storage.data_models.settings import Settings
|
||||
from openhands.storage.locations import get_experiment_config_filename
|
||||
from openhands.storage.settings.settings_store import SettingsStore
|
||||
from openhands.utils.async_utils import wait_all
|
||||
from openhands.utils.conversation_summary import get_default_conversation_title
|
||||
@@ -1240,32 +1238,6 @@ async def update_conversation(
|
||||
)
|
||||
|
||||
|
||||
@app.post('/conversations/{conversation_id}/exp-config')
|
||||
def add_experiment_config_for_conversation(
|
||||
exp_config: ExperimentConfig,
|
||||
conversation_id: str = Depends(validate_conversation_id),
|
||||
) -> bool:
|
||||
exp_config_filepath = get_experiment_config_filename(conversation_id)
|
||||
exists = False
|
||||
try:
|
||||
file_store.read(exp_config_filepath)
|
||||
exists = True
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
# Don't modify again if it already exists
|
||||
if exists:
|
||||
return False
|
||||
|
||||
try:
|
||||
file_store.write(exp_config_filepath, exp_config.model_dump_json())
|
||||
except Exception as e:
|
||||
logger.info(f'Failed to write experiment config for {conversation_id}: {e}')
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _parse_combined_page_id(page_id: str | None) -> tuple[str | None, str | None]:
|
||||
"""Parse combined page_id to extract separate V0 and V1 page_ids.
|
||||
|
||||
|
||||
@@ -13,7 +13,6 @@ from typing import Any
|
||||
from openhands.core.config.mcp_config import MCPConfig
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.events.action.message import MessageAction
|
||||
from openhands.experiments.experiment_manager import ExperimentManagerImpl
|
||||
from openhands.integrations.provider import (
|
||||
CUSTOM_SECRETS_TYPE,
|
||||
PROVIDER_TOKEN_TYPE,
|
||||
@@ -142,10 +141,6 @@ async def start_conversation(
|
||||
|
||||
conversation_init_data = ConversationInitData(**session_init_args)
|
||||
|
||||
conversation_init_data = ExperimentManagerImpl.run_conversation_variant_test(
|
||||
user_id, conversation_id, conversation_init_data
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f'Starting agent loop for conversation {conversation_id}',
|
||||
extra={'user_id': user_id, 'session_id': conversation_id},
|
||||
@@ -281,8 +276,4 @@ async def setup_init_conversation_settings(
|
||||
if user_secrets:
|
||||
session_init_args['custom_secrets'] = user_secrets.custom_secrets
|
||||
|
||||
conversation_init_data = ConversationInitData(**session_init_args)
|
||||
# We should recreate the same experiment conditions when restarting a conversation
|
||||
return ExperimentManagerImpl.run_conversation_variant_test(
|
||||
user_id, conversation_id, conversation_init_data
|
||||
)
|
||||
return ConversationInitData(**session_init_args)
|
||||
|
||||
@@ -108,13 +108,6 @@ class WebSession:
|
||||
EventStreamSubscriber.SERVER, self.on_event, self.sid
|
||||
)
|
||||
self.config = config
|
||||
|
||||
# Lazy import to avoid circular dependency
|
||||
from openhands.experiments.experiment_manager import ExperimentManagerImpl
|
||||
|
||||
self.config = ExperimentManagerImpl.run_config_variant_test(
|
||||
user_id, sid, self.config
|
||||
)
|
||||
self.loop = asyncio.get_event_loop()
|
||||
self.user_id = user_id
|
||||
|
||||
|
||||
@@ -36,7 +36,3 @@ def get_conversation_llm_registry_filename(sid: str, user_id: str | None = None)
|
||||
|
||||
def get_conversation_stats_filename(sid: str, user_id: str | None = None) -> str:
|
||||
return f'{get_conversation_dir(sid, user_id)}conversation_stats.pkl'
|
||||
|
||||
|
||||
def get_experiment_config_filename(sid: str, user_id: str | None = None) -> str:
|
||||
return f'{get_conversation_dir(sid, user_id)}exp_config.json'
|
||||
|
||||
@@ -16,6 +16,7 @@ from openhands.llm import bedrock
|
||||
OPENHANDS_MODELS = [
|
||||
'openhands/claude-opus-4-6',
|
||||
'openhands/claude-opus-4-5-20251101',
|
||||
'openhands/claude-sonnet-4-6',
|
||||
'openhands/claude-sonnet-4-5-20250929',
|
||||
'openhands/gpt-5.2-codex',
|
||||
'openhands/gpt-5.2',
|
||||
@@ -25,8 +26,11 @@ OPENHANDS_MODELS = [
|
||||
'openhands/deepseek-chat',
|
||||
'openhands/devstral-medium-2512',
|
||||
'openhands/kimi-k2-0711-preview',
|
||||
'openhands/kimi-k2.5',
|
||||
'openhands/qwen3-coder-480b',
|
||||
'openhands/qwen3-coder-next',
|
||||
'openhands/glm-4.7',
|
||||
'openhands/glm-5',
|
||||
]
|
||||
|
||||
CLARIFAI_MODELS = [
|
||||
|
||||
8
poetry.lock
generated
8
poetry.lock
generated
@@ -573,14 +573,14 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "authlib"
|
||||
version = "1.6.6"
|
||||
version = "1.6.7"
|
||||
description = "The ultimate Python library in building OAuth and OpenID Connect servers and clients."
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "authlib-1.6.6-py2.py3-none-any.whl", hash = "sha256:7d9e9bc535c13974313a87f53e8430eb6ea3d1cf6ae4f6efcd793f2e949143fd"},
|
||||
{file = "authlib-1.6.6.tar.gz", hash = "sha256:45770e8e056d0f283451d9996fbb59b70d45722b45d854d58f32878d0a40c38e"},
|
||||
{file = "authlib-1.6.7-py2.py3-none-any.whl", hash = "sha256:c637340d9a02789d2efa1d003a7437d10d3e565237bcb5fcbc6c134c7b95bab0"},
|
||||
{file = "authlib-1.6.7.tar.gz", hash = "sha256:dbf10100011d1e1b34048c9d120e83f13b35d69a826ae762b93d2fb5aafc337b"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -14691,4 +14691,4 @@ third-party-runtimes = ["daytona", "e2b-code-interpreter", "modal", "runloop-api
|
||||
[metadata]
|
||||
lock-version = "2.1"
|
||||
python-versions = "^3.12,<3.14"
|
||||
content-hash = "8238ef4e4687e246f55f9d524b0b1d81df7187abdec0fc9f1b121ae0a9e0caa0"
|
||||
content-hash = "f51ce6271ad5a8141386895148e95b9e28a24ceadd0acd402220485a761f9e62"
|
||||
|
||||
@@ -25,6 +25,7 @@ dependencies = [
|
||||
"anthropic[vertex]",
|
||||
"anyio==4.9",
|
||||
"asyncpg>=0.30",
|
||||
"authlib>=1.6.7",
|
||||
"bashlex>=0.18",
|
||||
"boto3",
|
||||
"browsergym-core==0.13.3",
|
||||
@@ -33,7 +34,7 @@ dependencies = [
|
||||
"dirhash",
|
||||
"docker",
|
||||
"fastapi",
|
||||
"fastmcp>=2.12.4",
|
||||
"fastmcp>=2.12.4,<2.12.5",
|
||||
"google-api-python-client>=2.164",
|
||||
"google-auth-httplib2",
|
||||
"google-auth-oauthlib",
|
||||
@@ -160,6 +161,7 @@ include = [
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.12,<3.14"
|
||||
authlib = ">=1.6.7" # Pinned to fix CVE-2026-28802
|
||||
litellm = ">=1.74.3, !=1.64.4, !=1.67.*" # avoid 1.64.4 (known bug) & 1.67.* (known bug #10272)
|
||||
openai = "2.8.0" # Pin due to litellm incompatibility with >=1.100.0 (BerriAI/litellm#13711)
|
||||
aiohttp = ">=3.13.3" # Pin to avoid CVE-2025-69223 (vulnerable versions < 3.13.3)
|
||||
|
||||
@@ -129,9 +129,8 @@ def generate_openapi_spec():
|
||||
"""Generate the OpenAPI specification from the FastAPI app."""
|
||||
spec = app.openapi()
|
||||
|
||||
# Explicitly exclude certain endpoints that are operational, experimental, or UI-only convenience
|
||||
# Explicitly exclude certain endpoints that are operational or UI-only convenience
|
||||
excluded_endpoints = [
|
||||
'/api/conversations/{conversation_id}/exp-config', # Internal experimentation endpoint
|
||||
'/server_info', # Operational/system diagnostics
|
||||
'/api/conversations/{conversation_id}/vscode-url', # UI/runtime convenience
|
||||
'/api/conversations/{conversation_id}/web-hosts', # UI/runtime convenience
|
||||
|
||||
@@ -1032,27 +1032,17 @@ class TestLiveStatusAppConversationService:
|
||||
assert agent_context.system_message_suffix is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(
|
||||
'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
|
||||
)
|
||||
async def test_finalize_conversation_request_with_skills(
|
||||
self, mock_experiment_manager
|
||||
):
|
||||
async def test_finalize_conversation_request_with_skills(self):
|
||||
"""Test _finalize_conversation_request with skills loading."""
|
||||
# Arrange
|
||||
mock_agent = Mock(spec=Agent)
|
||||
|
||||
# Create mock LLM with required attributes for _update_agent_with_llm_metadata
|
||||
mock_llm = Mock(spec=LLM)
|
||||
mock_llm.model = 'gpt-4' # Non-openhands model, so no metadata update
|
||||
mock_llm.usage_id = 'agent'
|
||||
|
||||
mock_updated_agent = Mock(spec=Agent)
|
||||
mock_updated_agent.llm = mock_llm
|
||||
mock_updated_agent.condenser = None # No condenser
|
||||
mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
|
||||
mock_updated_agent
|
||||
)
|
||||
# Arrange
|
||||
mock_agent = Mock(spec=Agent)
|
||||
mock_agent.llm = mock_llm
|
||||
mock_agent.condenser = None # No condenser
|
||||
|
||||
conversation_id = uuid4()
|
||||
workspace = LocalWorkspace(working_dir='/test')
|
||||
@@ -1061,9 +1051,7 @@ class TestLiveStatusAppConversationService:
|
||||
remote_workspace = Mock(spec=AsyncRemoteWorkspace)
|
||||
|
||||
# Mock the skills loading method
|
||||
self.service._load_skills_and_update_agent = AsyncMock(
|
||||
return_value=mock_updated_agent
|
||||
)
|
||||
self.service._load_skills_and_update_agent = AsyncMock(return_value=mock_agent)
|
||||
|
||||
# Act
|
||||
result = await self.service._finalize_conversation_request(
|
||||
@@ -1082,44 +1070,24 @@ class TestLiveStatusAppConversationService:
|
||||
# Assert
|
||||
assert isinstance(result, StartConversationRequest)
|
||||
assert result.conversation_id == conversation_id
|
||||
assert result.agent == mock_updated_agent
|
||||
assert result.workspace == workspace
|
||||
assert result.initial_message == initial_message
|
||||
assert result.secrets == secrets
|
||||
|
||||
mock_experiment_manager.run_agent_variant_tests__v1.assert_called_once_with(
|
||||
self.mock_user.id, conversation_id, mock_agent
|
||||
)
|
||||
self.service._load_skills_and_update_agent.assert_called_once_with(
|
||||
self.mock_sandbox,
|
||||
mock_updated_agent,
|
||||
remote_workspace,
|
||||
'test_repo',
|
||||
'/test/dir',
|
||||
)
|
||||
self.service._load_skills_and_update_agent.assert_called_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(
|
||||
'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
|
||||
)
|
||||
async def test_finalize_conversation_request_without_skills(
|
||||
self, mock_experiment_manager
|
||||
):
|
||||
async def test_finalize_conversation_request_without_skills(self):
|
||||
"""Test _finalize_conversation_request without remote workspace (no skills)."""
|
||||
# Arrange
|
||||
mock_agent = Mock(spec=Agent)
|
||||
|
||||
# Create mock LLM with required attributes for _update_agent_with_llm_metadata
|
||||
mock_llm = Mock(spec=LLM)
|
||||
mock_llm.model = 'gpt-4' # Non-openhands model, so no metadata update
|
||||
mock_llm.usage_id = 'agent'
|
||||
|
||||
mock_updated_agent = Mock(spec=Agent)
|
||||
mock_updated_agent.llm = mock_llm
|
||||
mock_updated_agent.condenser = None # No condenser
|
||||
mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
|
||||
mock_updated_agent
|
||||
)
|
||||
# Arrange
|
||||
mock_agent = Mock(spec=Agent)
|
||||
mock_agent.llm = mock_llm
|
||||
mock_agent.condenser = None # No condenser
|
||||
|
||||
workspace = LocalWorkspace(working_dir='/test')
|
||||
secrets = {'test': StaticSecret(value='secret')}
|
||||
@@ -1141,31 +1109,18 @@ class TestLiveStatusAppConversationService:
|
||||
# Assert
|
||||
assert isinstance(result, StartConversationRequest)
|
||||
assert isinstance(result.conversation_id, UUID)
|
||||
assert result.agent == mock_updated_agent
|
||||
mock_experiment_manager.run_agent_variant_tests__v1.assert_called_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(
|
||||
'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
|
||||
)
|
||||
async def test_finalize_conversation_request_skills_loading_fails(
|
||||
self, mock_experiment_manager
|
||||
):
|
||||
async def test_finalize_conversation_request_skills_loading_fails(self):
|
||||
"""Test _finalize_conversation_request when skills loading fails."""
|
||||
# Arrange
|
||||
mock_agent = Mock(spec=Agent)
|
||||
|
||||
# Create mock LLM with required attributes for _update_agent_with_llm_metadata
|
||||
mock_llm = Mock(spec=LLM)
|
||||
mock_llm.model = 'gpt-4' # Non-openhands model, so no metadata update
|
||||
mock_llm.usage_id = 'agent'
|
||||
|
||||
mock_updated_agent = Mock(spec=Agent)
|
||||
mock_updated_agent.llm = mock_llm
|
||||
mock_updated_agent.condenser = None # No condenser
|
||||
mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
|
||||
mock_updated_agent
|
||||
)
|
||||
mock_agent = Mock(spec=Agent)
|
||||
mock_agent.llm = mock_llm
|
||||
mock_agent.condenser = None # No condenser
|
||||
|
||||
workspace = LocalWorkspace(working_dir='/test')
|
||||
secrets = {'test': StaticSecret(value='secret')}
|
||||
@@ -1195,9 +1150,6 @@ class TestLiveStatusAppConversationService:
|
||||
|
||||
# Assert
|
||||
assert isinstance(result, StartConversationRequest)
|
||||
assert (
|
||||
result.agent == mock_updated_agent
|
||||
) # Should still use the experiment-modified agent
|
||||
mock_logger.warning.assert_called_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -2266,12 +2218,7 @@ class TestPluginHandling:
|
||||
assert 'key2: value2' in text
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(
|
||||
'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
|
||||
)
|
||||
async def test_finalize_conversation_request_with_plugins(
|
||||
self, mock_experiment_manager
|
||||
):
|
||||
async def test_finalize_conversation_request_with_plugins(self):
|
||||
"""Test _finalize_conversation_request passes plugins list to StartConversationRequest."""
|
||||
from openhands.app_server.app_conversation.app_conversation_models import (
|
||||
PluginSpec,
|
||||
@@ -2282,13 +2229,13 @@ class TestPluginHandling:
|
||||
mock_llm = Mock(spec=LLM)
|
||||
mock_llm.model = 'gpt-4'
|
||||
mock_llm.usage_id = 'agent'
|
||||
mock_agent.llm = mock_llm
|
||||
mock_agent.condenser = None
|
||||
|
||||
mock_updated_agent = Mock(spec=Agent)
|
||||
mock_updated_agent.llm = mock_llm
|
||||
mock_updated_agent.condenser = None
|
||||
mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
|
||||
mock_updated_agent
|
||||
)
|
||||
mock_agent.model_copy = Mock(return_value=mock_updated_agent)
|
||||
|
||||
workspace = LocalWorkspace(working_dir='/test')
|
||||
secrets = {'test': StaticSecret(value='secret')}
|
||||
@@ -2330,25 +2277,20 @@ class TestPluginHandling:
|
||||
assert '- api_key: test123' in result.initial_message.content[0].text
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(
|
||||
'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
|
||||
)
|
||||
async def test_finalize_conversation_request_without_plugins(
|
||||
self, mock_experiment_manager
|
||||
):
|
||||
async def test_finalize_conversation_request_without_plugins(self):
|
||||
"""Test _finalize_conversation_request without plugins sets plugins to None."""
|
||||
# Arrange
|
||||
mock_agent = Mock(spec=Agent)
|
||||
mock_llm = Mock(spec=LLM)
|
||||
mock_llm.model = 'gpt-4'
|
||||
mock_llm.usage_id = 'agent'
|
||||
mock_agent.llm = mock_llm
|
||||
mock_agent.condenser = None
|
||||
|
||||
mock_updated_agent = Mock(spec=Agent)
|
||||
mock_updated_agent.llm = mock_llm
|
||||
mock_updated_agent.condenser = None
|
||||
mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
|
||||
mock_updated_agent
|
||||
)
|
||||
mock_agent.model_copy = Mock(return_value=mock_updated_agent)
|
||||
|
||||
workspace = LocalWorkspace(working_dir='/test')
|
||||
secrets = {}
|
||||
@@ -2373,12 +2315,7 @@ class TestPluginHandling:
|
||||
assert result.plugins is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(
|
||||
'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
|
||||
)
|
||||
async def test_finalize_conversation_request_plugin_without_ref(
|
||||
self, mock_experiment_manager
|
||||
):
|
||||
async def test_finalize_conversation_request_plugin_without_ref(self):
|
||||
"""Test _finalize_conversation_request with plugin that has no ref."""
|
||||
from openhands.app_server.app_conversation.app_conversation_models import (
|
||||
PluginSpec,
|
||||
@@ -2389,13 +2326,13 @@ class TestPluginHandling:
|
||||
mock_llm = Mock(spec=LLM)
|
||||
mock_llm.model = 'gpt-4'
|
||||
mock_llm.usage_id = 'agent'
|
||||
mock_agent.llm = mock_llm
|
||||
mock_agent.condenser = None
|
||||
|
||||
mock_updated_agent = Mock(spec=Agent)
|
||||
mock_updated_agent.llm = mock_llm
|
||||
mock_updated_agent.condenser = None
|
||||
mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
|
||||
mock_updated_agent
|
||||
)
|
||||
mock_agent.model_copy = Mock(return_value=mock_updated_agent)
|
||||
|
||||
workspace = LocalWorkspace(working_dir='/test')
|
||||
secrets = {}
|
||||
@@ -2428,12 +2365,7 @@ class TestPluginHandling:
|
||||
assert result.initial_message is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(
|
||||
'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
|
||||
)
|
||||
async def test_finalize_conversation_request_plugin_with_repo_path(
|
||||
self, mock_experiment_manager
|
||||
):
|
||||
async def test_finalize_conversation_request_plugin_with_repo_path(self):
|
||||
"""Test _finalize_conversation_request passes repo_path to PluginSource."""
|
||||
from openhands.app_server.app_conversation.app_conversation_models import (
|
||||
PluginSpec,
|
||||
@@ -2444,13 +2376,13 @@ class TestPluginHandling:
|
||||
mock_llm = Mock(spec=LLM)
|
||||
mock_llm.model = 'gpt-4'
|
||||
mock_llm.usage_id = 'agent'
|
||||
mock_agent.llm = mock_llm
|
||||
mock_agent.condenser = None
|
||||
|
||||
mock_updated_agent = Mock(spec=Agent)
|
||||
mock_updated_agent.llm = mock_llm
|
||||
mock_updated_agent.condenser = None
|
||||
mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
|
||||
mock_updated_agent
|
||||
)
|
||||
mock_agent.model_copy = Mock(return_value=mock_updated_agent)
|
||||
|
||||
workspace = LocalWorkspace(working_dir='/test')
|
||||
secrets = {}
|
||||
@@ -2488,12 +2420,7 @@ class TestPluginHandling:
|
||||
assert result.plugins[0].repo_path == 'plugins/city-weather'
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch(
|
||||
'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
|
||||
)
|
||||
async def test_finalize_conversation_request_multiple_plugins(
|
||||
self, mock_experiment_manager
|
||||
):
|
||||
async def test_finalize_conversation_request_multiple_plugins(self):
|
||||
"""Test _finalize_conversation_request with multiple plugins."""
|
||||
from openhands.app_server.app_conversation.app_conversation_models import (
|
||||
PluginSpec,
|
||||
@@ -2504,13 +2431,13 @@ class TestPluginHandling:
|
||||
mock_llm = Mock(spec=LLM)
|
||||
mock_llm.model = 'gpt-4'
|
||||
mock_llm.usage_id = 'agent'
|
||||
mock_agent.llm = mock_llm
|
||||
mock_agent.condenser = None
|
||||
|
||||
mock_updated_agent = Mock(spec=Agent)
|
||||
mock_updated_agent.llm = mock_llm
|
||||
mock_updated_agent.condenser = None
|
||||
mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
|
||||
mock_updated_agent
|
||||
)
|
||||
mock_agent.model_copy = Mock(return_value=mock_updated_agent)
|
||||
|
||||
workspace = LocalWorkspace(working_dir='/test')
|
||||
secrets = {}
|
||||
|
||||
@@ -1,264 +0,0 @@
|
||||
"""Unit tests for ExperimentManager class, focusing on the v1 agent method."""
|
||||
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import Mock, patch
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from openhands.app_server.app_conversation.live_status_app_conversation_service import (
|
||||
LiveStatusAppConversationService,
|
||||
)
|
||||
from openhands.app_server.sandbox.sandbox_models import SandboxInfo, SandboxStatus
|
||||
from openhands.experiments.experiment_manager import ExperimentManager
|
||||
from openhands.sdk import Agent
|
||||
from openhands.sdk.llm import LLM
|
||||
|
||||
|
||||
class TestExperimentManager:
|
||||
"""Test cases for ExperimentManager class."""
|
||||
|
||||
def setup_method(self):
|
||||
"""Set up test fixtures."""
|
||||
self.user_id = 'test_user_123'
|
||||
self.conversation_id = uuid4()
|
||||
|
||||
# Create a mock LLM
|
||||
self.mock_llm = Mock(spec=LLM)
|
||||
self.mock_llm.model = 'gpt-4'
|
||||
self.mock_llm.usage_id = 'agent'
|
||||
|
||||
# Create a mock Agent
|
||||
self.mock_agent = Mock(spec=Agent)
|
||||
self.mock_agent.llm = self.mock_llm
|
||||
self.mock_agent.system_prompt_filename = 'default_system_prompt.j2'
|
||||
self.mock_agent.model_copy = Mock(return_value=self.mock_agent)
|
||||
|
||||
def test_run_agent_variant_tests__v1_returns_agent_unchanged(self):
|
||||
"""Test that the base ExperimentManager returns the agent unchanged."""
|
||||
result = ExperimentManager.run_agent_variant_tests__v1(
|
||||
self.user_id, self.conversation_id, self.mock_agent
|
||||
)
|
||||
|
||||
assert result is self.mock_agent
|
||||
assert result == self.mock_agent
|
||||
|
||||
def test_run_agent_variant_tests__v1_with_none_user_id(self):
|
||||
"""Test that the method works with None user_id."""
|
||||
# Act
|
||||
result = ExperimentManager.run_agent_variant_tests__v1(
|
||||
None, self.conversation_id, self.mock_agent
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert result is self.mock_agent
|
||||
|
||||
def test_run_agent_variant_tests__v1_with_different_conversation_ids(self):
|
||||
"""Test that the method works with different conversation IDs."""
|
||||
conversation_id_1 = uuid4()
|
||||
conversation_id_2 = uuid4()
|
||||
|
||||
# Act
|
||||
result_1 = ExperimentManager.run_agent_variant_tests__v1(
|
||||
self.user_id, conversation_id_1, self.mock_agent
|
||||
)
|
||||
result_2 = ExperimentManager.run_agent_variant_tests__v1(
|
||||
self.user_id, conversation_id_2, self.mock_agent
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert result_1 is self.mock_agent
|
||||
assert result_2 is self.mock_agent
|
||||
|
||||
|
||||
class TestExperimentManagerIntegration:
|
||||
"""Integration tests for ExperimentManager with start_app_conversation."""
|
||||
|
||||
def setup_method(self):
|
||||
"""Set up test fixtures."""
|
||||
self.user_id = 'test_user_123'
|
||||
self.conversation_id = uuid4()
|
||||
|
||||
# Create a mock LLM
|
||||
self.mock_llm = Mock(spec=LLM)
|
||||
self.mock_llm.model = 'gpt-4'
|
||||
self.mock_llm.usage_id = 'agent'
|
||||
|
||||
# Create a mock Agent
|
||||
self.mock_agent = Mock(spec=Agent)
|
||||
self.mock_agent.llm = self.mock_llm
|
||||
self.mock_agent.system_prompt_filename = 'default_system_prompt.j2'
|
||||
self.mock_agent.model_copy = Mock(return_value=self.mock_agent)
|
||||
|
||||
@patch('openhands.experiments.experiment_manager.ExperimentManagerImpl')
|
||||
def test_start_app_conversation_calls_experiment_manager_v1(
|
||||
self, mock_experiment_manager_impl
|
||||
):
|
||||
"""Test that start_app_conversation calls the experiment manager v1 method with correct parameters."""
|
||||
# Arrange
|
||||
mock_experiment_manager_impl.run_agent_variant_tests__v1.return_value = (
|
||||
self.mock_agent
|
||||
)
|
||||
|
||||
# Create a mock service instance
|
||||
mock_service = Mock(spec=LiveStatusAppConversationService)
|
||||
|
||||
# Mock the _build_start_conversation_request_for_user method to simulate the call
|
||||
with patch.object(mock_service, '_build_start_conversation_request_for_user'):
|
||||
# Simulate the part of the code that calls the experiment manager
|
||||
from uuid import uuid4
|
||||
|
||||
conversation_id = uuid4()
|
||||
|
||||
# This simulates the call that happens in the actual service
|
||||
result_agent = mock_experiment_manager_impl.run_agent_variant_tests__v1(
|
||||
self.user_id, conversation_id, self.mock_agent
|
||||
)
|
||||
|
||||
# Assert
|
||||
mock_experiment_manager_impl.run_agent_variant_tests__v1.assert_called_once_with(
|
||||
self.user_id, conversation_id, self.mock_agent
|
||||
)
|
||||
assert result_agent == self.mock_agent
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_experiment_manager_called_with_correct_parameters_in_context__noop_pass_through(
|
||||
self,
|
||||
):
|
||||
"""
|
||||
Test that ExperimentManagerImpl.run_agent_variant_tests__v1 is called with correct parameters
|
||||
and returns the same agent instance (no copy/mutation) when building a StartConversationRequest.
|
||||
"""
|
||||
# --- Arrange: fixed UUID to assert call parameters deterministically
|
||||
fixed_conversation_id = UUID('00000000-0000-0000-0000-000000000001')
|
||||
|
||||
# Create a stable Agent (and LLM) we can identity-check later
|
||||
mock_llm = Mock(spec=LLM)
|
||||
mock_llm.model = 'gpt-4'
|
||||
mock_llm.usage_id = 'agent'
|
||||
|
||||
mock_agent = Mock(spec=Agent)
|
||||
mock_agent.llm = mock_llm
|
||||
mock_agent.condenser = None # No condenser for this test
|
||||
mock_agent.system_prompt_filename = 'default_system_prompt.j2'
|
||||
mock_agent.model_copy = Mock(return_value=mock_agent)
|
||||
|
||||
# Minimal, real-ish user context used by the service
|
||||
class DummyUserContext:
|
||||
async def get_user_info(self):
|
||||
# confirmation_mode=False -> NeverConfirm()
|
||||
return SimpleNamespace(
|
||||
id='test_user_123',
|
||||
llm_model='gpt-4',
|
||||
llm_base_url=None,
|
||||
llm_api_key=None,
|
||||
confirmation_mode=False,
|
||||
condenser_max_size=None,
|
||||
security_analyzer=None,
|
||||
)
|
||||
|
||||
async def get_secrets(self):
|
||||
return {}
|
||||
|
||||
async def get_latest_token(self, provider):
|
||||
return None
|
||||
|
||||
async def get_user_id(self):
|
||||
return 'test_user_123'
|
||||
|
||||
user_context = DummyUserContext()
|
||||
|
||||
# The service requires a lot of deps, but for this test we won't exercise them.
|
||||
app_conversation_info_service = Mock()
|
||||
app_conversation_start_task_service = Mock()
|
||||
event_callback_service = Mock()
|
||||
sandbox_service = Mock()
|
||||
sandbox_spec_service = Mock()
|
||||
jwt_service = Mock()
|
||||
httpx_client = Mock()
|
||||
|
||||
event_service = Mock()
|
||||
|
||||
service = LiveStatusAppConversationService(
|
||||
init_git_in_empty_workspace=False,
|
||||
user_context=user_context,
|
||||
app_conversation_info_service=app_conversation_info_service,
|
||||
app_conversation_start_task_service=app_conversation_start_task_service,
|
||||
event_callback_service=event_callback_service,
|
||||
event_service=event_service,
|
||||
sandbox_service=sandbox_service,
|
||||
sandbox_spec_service=sandbox_spec_service,
|
||||
jwt_service=jwt_service,
|
||||
sandbox_startup_timeout=30,
|
||||
sandbox_startup_poll_frequency=1,
|
||||
httpx_client=httpx_client,
|
||||
web_url=None,
|
||||
openhands_provider_base_url=None,
|
||||
access_token_hard_timeout=None,
|
||||
)
|
||||
|
||||
sandbox = SandboxInfo(
|
||||
id='mock-sandbox-id',
|
||||
created_by_user_id='mock-user-id',
|
||||
sandbox_spec_id='mock-sandbox-spec-id',
|
||||
status=SandboxStatus.RUNNING,
|
||||
session_api_key='mock-session-api-key',
|
||||
)
|
||||
|
||||
# Patch the pieces invoked by the service
|
||||
with (
|
||||
patch.object(
|
||||
service,
|
||||
'_setup_secrets_for_git_providers',
|
||||
return_value={},
|
||||
),
|
||||
patch.object(
|
||||
service,
|
||||
'_configure_llm_and_mcp',
|
||||
return_value=(mock_llm, {}),
|
||||
),
|
||||
patch.object(
|
||||
service,
|
||||
'_create_agent_with_context',
|
||||
return_value=mock_agent,
|
||||
),
|
||||
patch.object(
|
||||
service,
|
||||
'_load_skills_and_update_agent',
|
||||
return_value=mock_agent,
|
||||
),
|
||||
patch(
|
||||
'openhands.app_server.app_conversation.live_status_app_conversation_service.uuid4',
|
||||
return_value=fixed_conversation_id,
|
||||
),
|
||||
patch(
|
||||
'openhands.app_server.app_conversation.live_status_app_conversation_service.ExperimentManagerImpl'
|
||||
) as mock_experiment_manager,
|
||||
):
|
||||
# Configure the experiment manager mock to return the same agent
|
||||
mock_experiment_manager.run_agent_variant_tests__v1.return_value = (
|
||||
mock_agent
|
||||
)
|
||||
|
||||
# --- Act: build the start request
|
||||
start_req = await service._build_start_conversation_request_for_user(
|
||||
sandbox=sandbox,
|
||||
initial_message=None,
|
||||
system_message_suffix=None, # No additional system message suffix
|
||||
git_provider=None, # Keep secrets path simple
|
||||
working_dir='/tmp/project', # Arbitrary path
|
||||
)
|
||||
|
||||
# --- Assert: verify experiment manager was called with correct parameters
|
||||
mock_experiment_manager.run_agent_variant_tests__v1.assert_called_once_with(
|
||||
'test_user_123', # user_id
|
||||
fixed_conversation_id, # conversation_id
|
||||
mock_agent, # agent (after model_copy with agent_context)
|
||||
)
|
||||
|
||||
# The agent in the StartConversationRequest is the *same* object returned by experiment manager
|
||||
assert start_req.agent is mock_agent
|
||||
|
||||
# No tweaks to agent fields by the experiment manager (noop)
|
||||
assert start_req.agent.llm is mock_llm
|
||||
assert start_req.agent.system_prompt_filename == 'default_system_prompt.j2'
|
||||
25
uv.lock
generated
25
uv.lock
generated
@@ -336,14 +336,14 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "authlib"
|
||||
version = "1.6.6"
|
||||
version = "1.6.7"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "cryptography" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/bb/9b/b1661026ff24bc641b76b78c5222d614776b0c085bcfdac9bd15a1cb4b35/authlib-1.6.6.tar.gz", hash = "sha256:45770e8e056d0f283451d9996fbb59b70d45722b45d854d58f32878d0a40c38e", size = 164894, upload-time = "2025-12-12T08:01:41.464Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/49/dc/ed1681bf1339dd6ea1ce56136bad4baabc6f7ad466e375810702b0237047/authlib-1.6.7.tar.gz", hash = "sha256:dbf10100011d1e1b34048c9d120e83f13b35d69a826ae762b93d2fb5aafc337b", size = 164950, upload-time = "2026-02-06T14:04:14.171Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/54/51/321e821856452f7386c4e9df866f196720b1ad0c5ea1623ea7399969ae3b/authlib-1.6.6-py2.py3-none-any.whl", hash = "sha256:7d9e9bc535c13974313a87f53e8430eb6ea3d1cf6ae4f6efcd793f2e949143fd", size = 244005, upload-time = "2025-12-12T08:01:40.209Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f8/00/3ed12264094ec91f534fae429945efbaa9f8c666f3aa7061cc3b2a26a0cd/authlib-1.6.7-py2.py3-none-any.whl", hash = "sha256:c637340d9a02789d2efa1d003a7437d10d3e565237bcb5fcbc6c134c7b95bab0", size = 244115, upload-time = "2026-02-06T14:04:12.141Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1325,7 +1325,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "fastmcp"
|
||||
version = "2.12.5"
|
||||
version = "2.12.4"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "authlib" },
|
||||
@@ -1340,9 +1340,9 @@ dependencies = [
|
||||
{ name = "python-dotenv" },
|
||||
{ name = "rich" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/00/a6/e3b46cd3e228635e0064c2648788b6f66a53bf0d0ddbf5fb44cca951f908/fastmcp-2.12.5.tar.gz", hash = "sha256:2dfd02e255705a4afe43d26caddbc864563036e233dbc6870f389ee523b39a6a", size = 7190263, upload-time = "2025-10-17T13:24:58.896Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/a8/b2/57845353a9bc63002995a982e66f3d0be4ec761e7bcb89e7d0638518d42a/fastmcp-2.12.4.tar.gz", hash = "sha256:b55fe89537038f19d0f4476544f9ca5ac171033f61811cc8f12bdeadcbea5016", size = 7167745, upload-time = "2025-09-26T16:43:27.71Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/d8/c1/9fb98c9649e15ea8cc691b4b09558b61dafb3dc0345f7322f8c4a8991ade/fastmcp-2.12.5-py3-none-any.whl", hash = "sha256:b1e542f9b83dbae7cecfdc9c73b062f77074785abda9f2306799116121344133", size = 329099, upload-time = "2025-10-17T13:24:57.518Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e2/c7/562ff39f25de27caec01e4c1e88cbb5fcae5160802ba3d90be33165df24f/fastmcp-2.12.4-py3-none-any.whl", hash = "sha256:56188fbbc1a9df58c537063f25958c57b5c4d715f73e395c41b51550b247d140", size = 329090, upload-time = "2025-09-26T16:43:25.314Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3070,7 +3070,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "mcp"
|
||||
version = "1.16.0"
|
||||
version = "1.25.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "anyio" },
|
||||
@@ -3079,15 +3079,18 @@ dependencies = [
|
||||
{ name = "jsonschema" },
|
||||
{ name = "pydantic" },
|
||||
{ name = "pydantic-settings" },
|
||||
{ name = "pyjwt", extra = ["crypto"] },
|
||||
{ name = "python-multipart" },
|
||||
{ name = "pywin32", marker = "sys_platform == 'win32'" },
|
||||
{ name = "sse-starlette" },
|
||||
{ name = "starlette" },
|
||||
{ name = "typing-extensions" },
|
||||
{ name = "typing-inspection" },
|
||||
{ name = "uvicorn", marker = "sys_platform != 'emscripten'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/3d/a1/b1f328da3b153683d2ec34f849b4b6eac2790fb240e3aef06ff2fab3df9d/mcp-1.16.0.tar.gz", hash = "sha256:39b8ca25460c578ee2cdad33feeea122694cfdf73eef58bee76c42f6ef0589df", size = 472918, upload-time = "2025-10-02T16:58:20.631Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/d5/2d/649d80a0ecf6a1f82632ca44bec21c0461a9d9fc8934d38cb5b319f2db5e/mcp-1.25.0.tar.gz", hash = "sha256:56310361ebf0364e2d438e5b45f7668cbb124e158bb358333cd06e49e83a6802", size = 605387, upload-time = "2025-12-19T10:19:56.985Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/c9/0e/7cebc88e17daf94ebe28c95633af595ccb2864dc2ee7abd75542d98495cc/mcp-1.16.0-py3-none-any.whl", hash = "sha256:ec917be9a5d31b09ba331e1768aa576e0af45470d657a0319996a20a57d7d633", size = 167266, upload-time = "2025-10-02T16:58:19.039Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e2/fc/6dc7659c2ae5ddf280477011f4213a74f806862856b796ef08f028e664bf/mcp-1.25.0-py3-none-any.whl", hash = "sha256:b37c38144a666add0862614cc79ec276e97d72aa8ca26d622818d4e278b9721a", size = 233076, upload-time = "2025-12-19T10:19:55.416Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3632,6 +3635,7 @@ dependencies = [
|
||||
{ name = "anthropic", extra = ["vertex"] },
|
||||
{ name = "anyio" },
|
||||
{ name = "asyncpg" },
|
||||
{ name = "authlib" },
|
||||
{ name = "bashlex" },
|
||||
{ name = "boto3" },
|
||||
{ name = "browsergym-core" },
|
||||
@@ -3752,6 +3756,7 @@ requires-dist = [
|
||||
{ name = "anthropic", extras = ["vertex"] },
|
||||
{ name = "anyio", specifier = "==4.9" },
|
||||
{ name = "asyncpg", specifier = ">=0.30" },
|
||||
{ name = "authlib", specifier = ">=1.6.7" },
|
||||
{ name = "bashlex", specifier = ">=0.18" },
|
||||
{ name = "boto3" },
|
||||
{ name = "browsergym-core", specifier = "==0.13.3" },
|
||||
@@ -3762,7 +3767,7 @@ requires-dist = [
|
||||
{ name = "docker" },
|
||||
{ name = "e2b-code-interpreter", marker = "extra == 'third-party-runtimes'", specifier = ">=2" },
|
||||
{ name = "fastapi" },
|
||||
{ name = "fastmcp", specifier = ">=2.12.4" },
|
||||
{ name = "fastmcp", specifier = ">=2.12.4,<2.12.5" },
|
||||
{ name = "google-api-python-client", specifier = ">=2.164" },
|
||||
{ name = "google-auth-httplib2" },
|
||||
{ name = "google-auth-oauthlib" },
|
||||
|
||||
Reference in New Issue
Block a user