fix(backend): Continue stats accounting on aborted or broken executions (#9921)

This is a follow-up to
https://github.com/Significant-Gravitas/AutoGPT/pull/9903

The continued graph execution restarted all the execution stats from
zero, making the execution stats misleading.

### Changes 🏗️

Continue the execution stats when continuing the graph execution.

### Checklist 📋

#### For code changes:
- [x] I have clearly listed my changes in the PR description
- [x] I have made a test plan
- [x] I have tested my changes according to the test plan:
  <!-- Put your test plan here: -->
- [x] Existing tests, manual graph run with the graph execution aborted
midway.
This commit is contained in:
Zamil Majdy
2025-05-09 13:46:30 +07:00
committed by GitHub
parent 82cf0bcde7
commit 8de88395f1
3 changed files with 67 additions and 7 deletions

View File

@@ -30,7 +30,7 @@ from prisma.types import (
AgentNodeExecutionUpdateInput,
AgentNodeExecutionWhereInput,
)
from pydantic import BaseModel
from pydantic import BaseModel, ConfigDict
from pydantic.fields import Field
from backend.server.v2.store.exceptions import DatabaseError
@@ -69,10 +69,55 @@ class GraphExecutionMeta(BaseDbModel):
ended_at: datetime
class Stats(BaseModel):
cost: int = Field(..., description="Execution cost (cents)")
duration: float = Field(..., description="Seconds from start to end of run")
node_exec_time: float = Field(..., description="Seconds of total node runtime")
node_exec_count: int = Field(..., description="Number of node executions")
model_config = ConfigDict(
extra="allow",
arbitrary_types_allowed=True,
)
cost: int = Field(
default=0,
description="Execution cost (cents)",
)
duration: float = Field(
default=0,
description="Seconds from start to end of run",
)
duration_cpu_only: float = Field(
default=0,
description="CPU sec of duration",
)
node_exec_time: float = Field(
default=0,
description="Seconds of total node runtime",
)
node_exec_time_cpu_only: float = Field(
default=0,
description="CPU sec of node_exec_time",
)
node_exec_count: int = Field(
default=0,
description="Number of node executions",
)
node_error_count: int = Field(
default=0,
description="Number of node errors",
)
error: str | None = Field(
default=None,
description="Error message if any",
)
def to_db(self) -> GraphExecutionStats:
return GraphExecutionStats(
cost=self.cost,
walltime=self.duration,
cputime=self.duration_cpu_only,
nodes_walltime=self.node_exec_time,
nodes_cputime=self.node_exec_time_cpu_only,
node_count=self.node_exec_count,
node_error_count=self.node_error_count,
error=self.error,
)
stats: Stats | None
@@ -106,8 +151,16 @@ class GraphExecutionMeta(BaseDbModel):
GraphExecutionMeta.Stats(
cost=stats.cost,
duration=stats.walltime,
duration_cpu_only=stats.cputime,
node_exec_time=stats.nodes_walltime,
node_exec_time_cpu_only=stats.nodes_cputime,
node_exec_count=stats.node_count,
node_error_count=stats.node_error_count,
error=(
str(stats.error)
if isinstance(stats.error, Exception)
else stats.error
),
)
if stats
else None

View File

@@ -633,7 +633,12 @@ class Executor:
return
timing_info, (exec_stats, status, error) = cls._on_graph_execution(
graph_exec, cancel, log_metadata
graph_exec=graph_exec,
cancel=cancel,
log_metadata=log_metadata,
execution_stats=(
exec_meta.stats.to_db() if exec_meta.stats else GraphExecutionStats()
),
)
exec_stats.walltime = timing_info.wall_time
exec_stats.cputime = timing_info.cpu_time
@@ -704,6 +709,7 @@ class Executor:
graph_exec: GraphExecutionEntry,
cancel: threading.Event,
log_metadata: LogMetadata,
execution_stats: GraphExecutionStats,
) -> tuple[GraphExecutionStats, ExecutionStatus, Exception | None]:
"""
Returns:
@@ -711,7 +717,6 @@ class Executor:
ExecutionStatus: The final status of the graph execution.
Exception | None: The error that occurred during the execution, if any.
"""
execution_stats = GraphExecutionStats()
execution_status = ExecutionStatus.RUNNING
error = None
finished = False

View File

@@ -282,7 +282,9 @@ export type GraphExecutionMeta = {
stats?: {
cost: number;
duration: number;
duration_cpu_only: number;
node_exec_time: number;
node_exec_time_cpu_only: number;
node_exec_count: number;
};
};