mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-01-08 22:38:05 -05:00
Fix: expose aggregated LLM metrics in State for evaluation scripts (#10537)
Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
@@ -15,6 +15,7 @@ from evaluation.utils.shared import (
|
||||
EvalOutput,
|
||||
compatibility_for_eval_history_pairs,
|
||||
get_default_sandbox_config_for_eval,
|
||||
get_metrics,
|
||||
make_metadata,
|
||||
prepare_dataset,
|
||||
reset_logger_for_multiprocessing,
|
||||
@@ -205,7 +206,7 @@ def process_instance(
|
||||
task_state = state.extra_data['task_state']
|
||||
logger.info('Task state: ' + str(task_state.to_dict()))
|
||||
|
||||
metrics = state.metrics.get() if state.metrics else None
|
||||
metrics = get_metrics(state)
|
||||
|
||||
# history is now available as a stream of events, rather than list of pairs of (Action, Observation)
|
||||
# for compatibility with the existing output format, we can remake the pairs here
|
||||
|
||||
Reference in New Issue
Block a user