mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
feat(agent/llm): Add cost tracking and logging to AgentProtocolServer
This commit is contained in:
@@ -34,6 +34,7 @@ from autogpt.commands.user_interaction import ask_user
|
||||
from autogpt.config import Config
|
||||
from autogpt.core.resource.model_providers import ChatModelProvider
|
||||
from autogpt.core.resource.model_providers.openai import OpenAIProvider
|
||||
from autogpt.core.resource.model_providers.schema import ModelProviderBudget
|
||||
from autogpt.file_workspace import (
|
||||
FileWorkspace,
|
||||
FileWorkspaceBackendName,
|
||||
@@ -46,6 +47,8 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AgentProtocolServer:
|
||||
_task_budgets: dict[str, ModelProviderBudget]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
app_config: Config,
|
||||
@@ -56,6 +59,7 @@ class AgentProtocolServer:
|
||||
self.db = database
|
||||
self.llm_provider = llm_provider
|
||||
self.agent_manager = AgentManager(app_data_dir=app_config.app_data_dir)
|
||||
self._task_budgets = {}
|
||||
|
||||
async def start(self, port: int = 8000, router: APIRouter = base_router):
|
||||
"""Start the agent server."""
|
||||
@@ -127,10 +131,13 @@ class AgentProtocolServer:
|
||||
app_config=self.app_config,
|
||||
llm_provider=self._get_task_llm_provider(task),
|
||||
)
|
||||
|
||||
# Assign an ID and a folder to the Agent and persist it
|
||||
agent_id = task_agent.state.agent_id = task_agent_id(task.task_id)
|
||||
logger.debug(f"New agent ID: {agent_id}")
|
||||
task_agent.attach_fs(self.app_config.app_data_dir / "agents" / agent_id)
|
||||
task_agent.state.save_to_json_file(task_agent.file_manager.state_file_path)
|
||||
|
||||
return task
|
||||
|
||||
async def list_tasks(self, page: int = 1, pageSize: int = 10) -> TaskListResponse:
|
||||
@@ -224,6 +231,10 @@ class AgentProtocolServer:
|
||||
step_id=step.step_id,
|
||||
output=execute_command_args["reason"],
|
||||
)
|
||||
logger.info(
|
||||
f"Total LLM cost for task {task_id}: "
|
||||
f"${round(agent.llm_provider.get_incurred_cost(), 2)}"
|
||||
)
|
||||
return step
|
||||
|
||||
if execute_command == ask_user.__name__: # HACK
|
||||
@@ -310,6 +321,10 @@ class AgentProtocolServer:
|
||||
additional_output=additional_output,
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"Running total LLM cost for task {task_id}: "
|
||||
f"${round(agent.llm_provider.get_incurred_cost(), 2)}"
|
||||
)
|
||||
agent.state.save_to_json_file(agent.file_manager.state_file_path)
|
||||
return step
|
||||
|
||||
@@ -437,6 +452,12 @@ class AgentProtocolServer:
|
||||
task_llm_provider_config = self.llm_provider._configuration.copy(deep=True)
|
||||
_extra_request_headers = task_llm_provider_config.extra_request_headers
|
||||
|
||||
task_llm_budget = self._task_budgets.get(
|
||||
task.task_id, self.llm_provider.default_settings.budget.copy(deep=True)
|
||||
)
|
||||
if task.task_id not in self._task_budgets:
|
||||
self._task_budgets[task.task_id] = task_llm_budget
|
||||
|
||||
_extra_request_headers["AP-TaskID"] = task.task_id
|
||||
if step_id:
|
||||
_extra_request_headers["AP-StepID"] = step_id
|
||||
@@ -445,7 +466,8 @@ class AgentProtocolServer:
|
||||
|
||||
if isinstance(self.llm_provider, OpenAIProvider):
|
||||
settings = self.llm_provider._settings.copy()
|
||||
settings.configuration = task_llm_provider_config
|
||||
settings.budget = task_llm_budget
|
||||
settings.configuration = task_llm_provider_config # type: ignore
|
||||
return OpenAIProvider(
|
||||
settings=settings,
|
||||
logger=logger.getChild(f"Task-{task.task_id}_OpenAIProvider"),
|
||||
|
||||
@@ -371,6 +371,11 @@ async def run_auto_gpt_server(
|
||||
)
|
||||
await server.start(port=port)
|
||||
|
||||
logging.getLogger().info(
|
||||
f"Total OpenAI session cost: "
|
||||
f"${round(sum(b.total_cost for b in server._task_budgets.values()), 2)}"
|
||||
)
|
||||
|
||||
|
||||
def _configure_openai_provider(config: Config) -> OpenAIProvider:
|
||||
"""Create a configured OpenAIProvider object.
|
||||
|
||||
@@ -241,15 +241,10 @@ class OpenAICredentials(ModelProviderCredentials):
|
||||
return {"model": deployment_id}
|
||||
|
||||
|
||||
class OpenAIModelProviderBudget(ModelProviderBudget):
|
||||
graceful_shutdown_threshold: float = UserConfigurable()
|
||||
warning_threshold: float = UserConfigurable()
|
||||
|
||||
|
||||
class OpenAISettings(ModelProviderSettings):
|
||||
configuration: OpenAIConfiguration
|
||||
credentials: Optional[OpenAICredentials]
|
||||
budget: OpenAIModelProviderBudget
|
||||
budget: ModelProviderBudget
|
||||
|
||||
|
||||
class OpenAIProvider(
|
||||
@@ -262,7 +257,7 @@ class OpenAIProvider(
|
||||
retries_per_request=10,
|
||||
),
|
||||
credentials=None,
|
||||
budget=OpenAIModelProviderBudget(
|
||||
budget=ModelProviderBudget(
|
||||
total_budget=math.inf,
|
||||
total_cost=0.0,
|
||||
remaining_budget=math.inf,
|
||||
@@ -271,11 +266,10 @@ class OpenAIProvider(
|
||||
completion_tokens=0,
|
||||
total_tokens=0,
|
||||
),
|
||||
graceful_shutdown_threshold=0.005,
|
||||
warning_threshold=0.01,
|
||||
),
|
||||
)
|
||||
|
||||
_budget: ModelProviderBudget
|
||||
_configuration: OpenAIConfiguration
|
||||
|
||||
def __init__(
|
||||
@@ -307,10 +301,6 @@ class OpenAIProvider(
|
||||
"""Get the token limit for a given model."""
|
||||
return OPEN_AI_MODELS[model_name].max_tokens
|
||||
|
||||
def get_remaining_budget(self) -> float:
|
||||
"""Get the remaining budget."""
|
||||
return self._budget.remaining_budget
|
||||
|
||||
@classmethod
|
||||
def get_tokenizer(cls, model_name: OpenAIModelName) -> ModelTokenizer:
|
||||
return tiktoken.encoding_for_model(model_name)
|
||||
@@ -379,45 +369,60 @@ class OpenAIProvider(
|
||||
model_prompt += completion_kwargs["messages"]
|
||||
del completion_kwargs["messages"]
|
||||
|
||||
cost = 0.0
|
||||
attempts = 0
|
||||
while True:
|
||||
response = await self._create_chat_completion(
|
||||
_response = await self._create_chat_completion(
|
||||
messages=model_prompt,
|
||||
**completion_kwargs,
|
||||
)
|
||||
|
||||
response_message = response.choices[0].message
|
||||
_response_msg = _response.choices[0].message
|
||||
if (
|
||||
tool_calls_compat_mode
|
||||
and response_message.content
|
||||
and not response_message.tool_calls
|
||||
and _response_msg.content
|
||||
and not _response_msg.tool_calls
|
||||
):
|
||||
tool_calls = list(
|
||||
_tool_calls_compat_extract_calls(response_message.content)
|
||||
_tool_calls_compat_extract_calls(_response_msg.content)
|
||||
)
|
||||
elif response_message.tool_calls:
|
||||
elif _response_msg.tool_calls:
|
||||
tool_calls = [
|
||||
AssistantToolCall(**tc.dict()) for tc in response_message.tool_calls
|
||||
AssistantToolCall(**tc.dict()) for tc in _response_msg.tool_calls
|
||||
]
|
||||
else:
|
||||
tool_calls = None
|
||||
|
||||
assistant_message = AssistantChatMessage(
|
||||
content=response_message.content,
|
||||
content=_response_msg.content,
|
||||
tool_calls=tool_calls,
|
||||
)
|
||||
|
||||
response = ChatModelResponse(
|
||||
response=assistant_message,
|
||||
model_info=OPEN_AI_CHAT_MODELS[model_name],
|
||||
prompt_tokens_used=(
|
||||
_response.usage.prompt_tokens if _response.usage else 0
|
||||
),
|
||||
completion_tokens_used=(
|
||||
_response.usage.completion_tokens if _response.usage else 0
|
||||
),
|
||||
)
|
||||
cost += self._budget.update_usage_and_cost(response)
|
||||
self._logger.debug(
|
||||
f"Completion usage: {response.prompt_tokens_used} input, "
|
||||
f"{response.completion_tokens_used} output - ${round(cost, 2)}"
|
||||
)
|
||||
|
||||
# If parsing the response fails, append the error to the prompt, and let the
|
||||
# LLM fix its mistake(s).
|
||||
try:
|
||||
attempts += 1
|
||||
parsed_response = completion_parser(assistant_message)
|
||||
response.parsed_result = completion_parser(assistant_message)
|
||||
break
|
||||
except Exception as e:
|
||||
self._logger.warning(f"Parsing attempt #{attempts} failed: {e}")
|
||||
self._logger.debug(
|
||||
f"Parsing failed on response: '''{response_message}'''"
|
||||
)
|
||||
self._logger.debug(f"Parsing failed on response: '''{_response_msg}'''")
|
||||
if attempts < self._configuration.fix_failed_parse_tries:
|
||||
model_prompt.append(
|
||||
ChatMessage.system(f"ERROR PARSING YOUR RESPONSE:\n\n{e}")
|
||||
@@ -425,16 +430,9 @@ class OpenAIProvider(
|
||||
else:
|
||||
raise
|
||||
|
||||
response = ChatModelResponse(
|
||||
response=assistant_message,
|
||||
parsed_result=parsed_response,
|
||||
model_info=OPEN_AI_CHAT_MODELS[model_name],
|
||||
prompt_tokens_used=response.usage.prompt_tokens if response.usage else 0,
|
||||
completion_tokens_used=(
|
||||
response.usage.completion_tokens if response.usage else 0
|
||||
),
|
||||
)
|
||||
self._budget.update_usage_and_cost(response)
|
||||
if attempts > 1:
|
||||
self._logger.debug(f"Total cost for {attempts} attempts: {round(cost, 2)}")
|
||||
|
||||
return response
|
||||
|
||||
async def create_embedding(
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import abc
|
||||
import enum
|
||||
import math
|
||||
from typing import (
|
||||
Callable,
|
||||
ClassVar,
|
||||
@@ -208,8 +209,12 @@ class ModelProviderBudget(ProviderBudget):
|
||||
def update_usage_and_cost(
|
||||
self,
|
||||
model_response: ModelResponse,
|
||||
) -> None:
|
||||
"""Update the usage and cost of the provider."""
|
||||
) -> float:
|
||||
"""Update the usage and cost of the provider.
|
||||
|
||||
Returns:
|
||||
float: The (calculated) cost of the given model response.
|
||||
"""
|
||||
model_info = model_response.model_info
|
||||
self.usage.update_usage(model_response)
|
||||
incurred_cost = (
|
||||
@@ -218,6 +223,7 @@ class ModelProviderBudget(ProviderBudget):
|
||||
)
|
||||
self.total_cost += incurred_cost
|
||||
self.remaining_budget -= incurred_cost
|
||||
return incurred_cost
|
||||
|
||||
|
||||
class ModelProviderSettings(ProviderSettings):
|
||||
@@ -232,6 +238,7 @@ class ModelProvider(abc.ABC):
|
||||
|
||||
default_settings: ClassVar[ModelProviderSettings]
|
||||
|
||||
_budget: Optional[ModelProviderBudget]
|
||||
_configuration: ModelProviderConfiguration
|
||||
|
||||
@abc.abstractmethod
|
||||
@@ -246,9 +253,15 @@ class ModelProvider(abc.ABC):
|
||||
def get_token_limit(self, model_name: str) -> int:
|
||||
...
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_incurred_cost(self) -> float:
|
||||
if self._budget:
|
||||
return self._budget.total_cost
|
||||
return 0
|
||||
|
||||
def get_remaining_budget(self) -> float:
|
||||
...
|
||||
if self._budget:
|
||||
return self._budget.remaining_budget
|
||||
return math.inf
|
||||
|
||||
|
||||
class ModelTokenizer(Protocol):
|
||||
|
||||
@@ -31,8 +31,12 @@ class ProviderBudget(SystemConfiguration):
|
||||
usage: ProviderUsage
|
||||
|
||||
@abc.abstractmethod
|
||||
def update_usage_and_cost(self, *args, **kwargs) -> None:
|
||||
"""Update the usage and cost of the resource."""
|
||||
def update_usage_and_cost(self, *args, **kwargs) -> float:
|
||||
"""Update the usage and cost of the provider.
|
||||
|
||||
Returns:
|
||||
float: The (calculated) cost of the given model response.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user