[eval,fix]: metrics get carried across eval instances (#3072)

* fix: make max_budget_per_task optional in `run_agent_controller` * update arg for each run infer * fix: metrics logging carried along; reset llm metric with the agent; --------- Co-authored-by: Graham Neubig <neubig@gmail.com>
2026-01-10 07:18:10 -05:00 · 2024-07-23 11:30:28 +08:00
parent da17665cab
commit 41a8bb3cf1
3 changed files with 8 additions and 2 deletions
--- a/opendevin/controller/agent.py
+++ b/opendevin/controller/agent.py
@@ -57,6 +57,9 @@ class Agent(ABC):
        # TODO clear history
        self._complete = False

+        if self.llm:
+            self.llm.reset()
+
    @property
    def name(self):
        return self.__class__.__name__
--- a/opendevin/controller/state/state.py
+++ b/opendevin/controller/state/state.py
@@ -97,9 +97,9 @@ class State:
    resume_state: AgentState | None = None
    traffic_control_state: TrafficControlState = TrafficControlState.NORMAL
    # global metrics for the current task
-    metrics: Metrics = Metrics()
+    metrics: Metrics = field(default_factory=Metrics)
    # local metrics for the current subtask
-    local_metrics: Metrics = Metrics()
+    local_metrics: Metrics = field(default_factory=Metrics)
    # root agent has level 0, and every delegate increases the level by one
    delegate_level: int = 0
    # start_id and end_id track the range of events in history
--- a/opendevin/llm/llm.py
+++ b/opendevin/llm/llm.py
@@ -252,3 +252,6 @@ class LLM:

    def __repr__(self):
        return str(self)
+
+    def reset(self):
+        self.metrics = Metrics()