feat(agent/llm): Add cost tracking and logging to AgentProtocolServer

This commit is contained in:
Reinier van der Leer
2024-01-19 17:31:59 +01:00
parent 9e4dfd8058
commit 354106be7b
5 changed files with 84 additions and 42 deletions

View File

@@ -34,6 +34,7 @@ from autogpt.commands.user_interaction import ask_user
from autogpt.config import Config
from autogpt.core.resource.model_providers import ChatModelProvider
from autogpt.core.resource.model_providers.openai import OpenAIProvider
from autogpt.core.resource.model_providers.schema import ModelProviderBudget
from autogpt.file_workspace import (
FileWorkspace,
FileWorkspaceBackendName,
@@ -46,6 +47,8 @@ logger = logging.getLogger(__name__)
class AgentProtocolServer:
_task_budgets: dict[str, ModelProviderBudget]
def __init__(
self,
app_config: Config,
@@ -56,6 +59,7 @@ class AgentProtocolServer:
self.db = database
self.llm_provider = llm_provider
self.agent_manager = AgentManager(app_data_dir=app_config.app_data_dir)
self._task_budgets = {}
async def start(self, port: int = 8000, router: APIRouter = base_router):
"""Start the agent server."""
@@ -127,10 +131,13 @@ class AgentProtocolServer:
app_config=self.app_config,
llm_provider=self._get_task_llm_provider(task),
)
# Assign an ID and a folder to the Agent and persist it
agent_id = task_agent.state.agent_id = task_agent_id(task.task_id)
logger.debug(f"New agent ID: {agent_id}")
task_agent.attach_fs(self.app_config.app_data_dir / "agents" / agent_id)
task_agent.state.save_to_json_file(task_agent.file_manager.state_file_path)
return task
async def list_tasks(self, page: int = 1, pageSize: int = 10) -> TaskListResponse:
@@ -224,6 +231,10 @@ class AgentProtocolServer:
step_id=step.step_id,
output=execute_command_args["reason"],
)
logger.info(
f"Total LLM cost for task {task_id}: "
f"${round(agent.llm_provider.get_incurred_cost(), 2)}"
)
return step
if execute_command == ask_user.__name__: # HACK
@@ -310,6 +321,10 @@ class AgentProtocolServer:
additional_output=additional_output,
)
logger.debug(
f"Running total LLM cost for task {task_id}: "
f"${round(agent.llm_provider.get_incurred_cost(), 2)}"
)
agent.state.save_to_json_file(agent.file_manager.state_file_path)
return step
@@ -437,6 +452,12 @@ class AgentProtocolServer:
task_llm_provider_config = self.llm_provider._configuration.copy(deep=True)
_extra_request_headers = task_llm_provider_config.extra_request_headers
task_llm_budget = self._task_budgets.get(
task.task_id, self.llm_provider.default_settings.budget.copy(deep=True)
)
if task.task_id not in self._task_budgets:
self._task_budgets[task.task_id] = task_llm_budget
_extra_request_headers["AP-TaskID"] = task.task_id
if step_id:
_extra_request_headers["AP-StepID"] = step_id
@@ -445,7 +466,8 @@ class AgentProtocolServer:
if isinstance(self.llm_provider, OpenAIProvider):
settings = self.llm_provider._settings.copy()
settings.configuration = task_llm_provider_config
settings.budget = task_llm_budget
settings.configuration = task_llm_provider_config # type: ignore
return OpenAIProvider(
settings=settings,
logger=logger.getChild(f"Task-{task.task_id}_OpenAIProvider"),

View File

@@ -371,6 +371,11 @@ async def run_auto_gpt_server(
)
await server.start(port=port)
logging.getLogger().info(
f"Total OpenAI session cost: "
f"${round(sum(b.total_cost for b in server._task_budgets.values()), 2)}"
)
def _configure_openai_provider(config: Config) -> OpenAIProvider:
"""Create a configured OpenAIProvider object.

View File

@@ -241,15 +241,10 @@ class OpenAICredentials(ModelProviderCredentials):
return {"model": deployment_id}
class OpenAIModelProviderBudget(ModelProviderBudget):
graceful_shutdown_threshold: float = UserConfigurable()
warning_threshold: float = UserConfigurable()
class OpenAISettings(ModelProviderSettings):
configuration: OpenAIConfiguration
credentials: Optional[OpenAICredentials]
budget: OpenAIModelProviderBudget
budget: ModelProviderBudget
class OpenAIProvider(
@@ -262,7 +257,7 @@ class OpenAIProvider(
retries_per_request=10,
),
credentials=None,
budget=OpenAIModelProviderBudget(
budget=ModelProviderBudget(
total_budget=math.inf,
total_cost=0.0,
remaining_budget=math.inf,
@@ -271,11 +266,10 @@ class OpenAIProvider(
completion_tokens=0,
total_tokens=0,
),
graceful_shutdown_threshold=0.005,
warning_threshold=0.01,
),
)
_budget: ModelProviderBudget
_configuration: OpenAIConfiguration
def __init__(
@@ -307,10 +301,6 @@ class OpenAIProvider(
"""Get the token limit for a given model."""
return OPEN_AI_MODELS[model_name].max_tokens
def get_remaining_budget(self) -> float:
"""Get the remaining budget."""
return self._budget.remaining_budget
@classmethod
def get_tokenizer(cls, model_name: OpenAIModelName) -> ModelTokenizer:
return tiktoken.encoding_for_model(model_name)
@@ -379,45 +369,60 @@ class OpenAIProvider(
model_prompt += completion_kwargs["messages"]
del completion_kwargs["messages"]
cost = 0.0
attempts = 0
while True:
response = await self._create_chat_completion(
_response = await self._create_chat_completion(
messages=model_prompt,
**completion_kwargs,
)
response_message = response.choices[0].message
_response_msg = _response.choices[0].message
if (
tool_calls_compat_mode
and response_message.content
and not response_message.tool_calls
and _response_msg.content
and not _response_msg.tool_calls
):
tool_calls = list(
_tool_calls_compat_extract_calls(response_message.content)
_tool_calls_compat_extract_calls(_response_msg.content)
)
elif response_message.tool_calls:
elif _response_msg.tool_calls:
tool_calls = [
AssistantToolCall(**tc.dict()) for tc in response_message.tool_calls
AssistantToolCall(**tc.dict()) for tc in _response_msg.tool_calls
]
else:
tool_calls = None
assistant_message = AssistantChatMessage(
content=response_message.content,
content=_response_msg.content,
tool_calls=tool_calls,
)
response = ChatModelResponse(
response=assistant_message,
model_info=OPEN_AI_CHAT_MODELS[model_name],
prompt_tokens_used=(
_response.usage.prompt_tokens if _response.usage else 0
),
completion_tokens_used=(
_response.usage.completion_tokens if _response.usage else 0
),
)
cost += self._budget.update_usage_and_cost(response)
self._logger.debug(
f"Completion usage: {response.prompt_tokens_used} input, "
f"{response.completion_tokens_used} output - ${round(cost, 2)}"
)
# If parsing the response fails, append the error to the prompt, and let the
# LLM fix its mistake(s).
try:
attempts += 1
parsed_response = completion_parser(assistant_message)
response.parsed_result = completion_parser(assistant_message)
break
except Exception as e:
self._logger.warning(f"Parsing attempt #{attempts} failed: {e}")
self._logger.debug(
f"Parsing failed on response: '''{response_message}'''"
)
self._logger.debug(f"Parsing failed on response: '''{_response_msg}'''")
if attempts < self._configuration.fix_failed_parse_tries:
model_prompt.append(
ChatMessage.system(f"ERROR PARSING YOUR RESPONSE:\n\n{e}")
@@ -425,16 +430,9 @@ class OpenAIProvider(
else:
raise
response = ChatModelResponse(
response=assistant_message,
parsed_result=parsed_response,
model_info=OPEN_AI_CHAT_MODELS[model_name],
prompt_tokens_used=response.usage.prompt_tokens if response.usage else 0,
completion_tokens_used=(
response.usage.completion_tokens if response.usage else 0
),
)
self._budget.update_usage_and_cost(response)
if attempts > 1:
self._logger.debug(f"Total cost for {attempts} attempts: {round(cost, 2)}")
return response
async def create_embedding(

View File

@@ -1,5 +1,6 @@
import abc
import enum
import math
from typing import (
Callable,
ClassVar,
@@ -208,8 +209,12 @@ class ModelProviderBudget(ProviderBudget):
def update_usage_and_cost(
self,
model_response: ModelResponse,
) -> None:
"""Update the usage and cost of the provider."""
) -> float:
"""Update the usage and cost of the provider.
Returns:
float: The (calculated) cost of the given model response.
"""
model_info = model_response.model_info
self.usage.update_usage(model_response)
incurred_cost = (
@@ -218,6 +223,7 @@ class ModelProviderBudget(ProviderBudget):
)
self.total_cost += incurred_cost
self.remaining_budget -= incurred_cost
return incurred_cost
class ModelProviderSettings(ProviderSettings):
@@ -232,6 +238,7 @@ class ModelProvider(abc.ABC):
default_settings: ClassVar[ModelProviderSettings]
_budget: Optional[ModelProviderBudget]
_configuration: ModelProviderConfiguration
@abc.abstractmethod
@@ -246,9 +253,15 @@ class ModelProvider(abc.ABC):
def get_token_limit(self, model_name: str) -> int:
...
@abc.abstractmethod
def get_incurred_cost(self) -> float:
if self._budget:
return self._budget.total_cost
return 0
def get_remaining_budget(self) -> float:
...
if self._budget:
return self._budget.remaining_budget
return math.inf
class ModelTokenizer(Protocol):

View File

@@ -31,8 +31,12 @@ class ProviderBudget(SystemConfiguration):
usage: ProviderUsage
@abc.abstractmethod
def update_usage_and_cost(self, *args, **kwargs) -> None:
"""Update the usage and cost of the resource."""
def update_usage_and_cost(self, *args, **kwargs) -> float:
"""Update the usage and cost of the provider.
Returns:
float: The (calculated) cost of the given model response.
"""
...