mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-01-08 22:38:05 -05:00
Fix: expose aggregated LLM metrics in State for evaluation scripts (#10537)
Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
@@ -15,6 +15,7 @@ from evaluation.utils.shared import (
|
||||
codeact_user_response,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -294,7 +295,7 @@ def process_instance(
|
||||
raise ValueError('State should not be None.')
|
||||
|
||||
test_result = complete_runtime(runtime, instance)
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
metrics = get_metrics(state)
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
# remove when it becomes unnecessary
|
||||
|
||||
Reference in New Issue
Block a user