fix(direct_benchmark): track cost from LLM provider

Previously cost was hardcoded to 0.0. Now extracts cumulative cost from MultiProvider.get_incurred_cost() after each step execution. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-04-08 03:00:28 -04:00 · 2026-01-19 23:37:12 -06:00
parent a347bed0b1
commit d591f36c7b
1 changed files with 5 additions and 3 deletions
--- a/classic/direct_benchmark/direct_benchmark/runner.py
+++ b/classic/direct_benchmark/direct_benchmark/runner.py
@@ -36,6 +36,7 @@ class AgentRunner:
        self.step_callback = step_callback
        self._agent: Optional[Agent] = None
        self._workspace: Optional[Path] = None
+        self._llm_provider: Optional[MultiProvider] = None

    async def run_challenge(
        self, challenge: Challenge, attempt: int = 1
@@ -182,6 +183,7 @@ class AgentRunner:
        )

        self._agent = agent
+        self._llm_provider = llm_provider
        return agent

    async def _run_agent_loop(
@@ -211,9 +213,9 @@ class AgentRunner:
                # Execute the action
                result = await agent.execute(proposal)

-                # Track cost (if available from provider)
-                step_cost = 0.0  # TODO: Extract from LLM provider
-                cumulative_cost += step_cost
+                # Get cumulative cost from LLM provider
+                if self._llm_provider:
+                    cumulative_cost = self._llm_provider.get_incurred_cost()

                # Get result info
                result_str = str(