refactor(agent): Add ChatModelProvider.get_available_models() and remove ApiManager

2026-01-10 23:58:06 -05:00 · 2024-04-20 21:39:23 +02:00
parent b77451bb3a
commit 35ebb10378
10 changed files with 77 additions and 294 deletions
--- a/autogpts/autogpt/autogpt/agents/agent.py
+++ b/autogpts/autogpt/autogpt/agents/agent.py
@@ -17,7 +17,6 @@ from autogpt.core.resource.model_providers import (
    ChatModelProvider,
 )
 from autogpt.file_storage.base import FileStorage
-from autogpt.llm.api_manager import ApiManager
 from autogpt.logs.log_cycle import (
    CURRENT_CONTEXT_FILE_NAME,
    NEXT_ACTION_FILE_NAME,
@@ -129,30 +128,6 @@ class Agent(
            ChatMessage.system(f"The current time and date is {time.strftime('%c')}"),
        )

-        # Add budget information (if any) to prompt
-        api_manager = ApiManager()
-        if api_manager.get_total_budget() > 0.0:
-            remaining_budget = (
-                api_manager.get_total_budget() - api_manager.get_total_cost()
-            )
-            if remaining_budget < 0:
-                remaining_budget = 0
-
-            budget_msg = ChatMessage.system(
-                f"Your remaining API budget is ${remaining_budget:.3f}"
-                + (
-                    " BUDGET EXCEEDED! SHUT DOWN!\n\n"
-                    if remaining_budget == 0
-                    else " Budget very nearly exceeded! Shut down gracefully!\n\n"
-                    if remaining_budget < 0.005
-                    else " Budget nearly exceeded. Finish up.\n\n"
-                    if remaining_budget < 0.01
-                    else ""
-                ),
-            )
-            logger.debug(budget_msg)
-            extra_messages.append(budget_msg)
-
        if include_os_info is None:
            include_os_info = self.legacy_config.execute_local_commands

--- a/autogpts/autogpt/autogpt/app/configurator.py
+++ b/autogpts/autogpt/autogpt/app/configurator.py
@@ -3,7 +3,7 @@ from __future__ import annotations

 import logging
 from pathlib import Path
-from typing import TYPE_CHECKING, Literal, Optional
+from typing import Literal, Optional

 import click
 from colorama import Back, Fore, Style
@@ -11,17 +11,14 @@ from colorama import Back, Fore, Style
 from autogpt import utils
 from autogpt.config import Config
 from autogpt.config.config import GPT_3_MODEL, GPT_4_MODEL
-from autogpt.llm.api_manager import ApiManager
+from autogpt.core.resource.model_providers.openai import OpenAIModelName, OpenAIProvider
 from autogpt.logs.helpers import request_user_double_check
 from autogpt.memory.vector import get_supported_memory_backends

-if TYPE_CHECKING:
-    from autogpt.core.resource.model_providers.openai import OpenAICredentials
-
 logger = logging.getLogger(__name__)


-def apply_overrides_to_config(
+async def apply_overrides_to_config(
    config: Config,
    continuous: bool = False,
    continuous_limit: Optional[int] = None,
@@ -80,23 +77,14 @@ def apply_overrides_to_config(
        config.smart_llm = GPT_3_MODEL
    elif (
        gpt4only
-        and check_model(
-            GPT_4_MODEL,
-            model_type="smart_llm",
-            api_credentials=config.openai_credentials,
-        )
-        == GPT_4_MODEL
+        and (await check_model(GPT_4_MODEL, model_type="smart_llm")) == GPT_4_MODEL
    ):
        # --gpt4only should always use gpt-4, despite user's SMART_LLM config
        config.fast_llm = GPT_4_MODEL
        config.smart_llm = GPT_4_MODEL
    else:
-        config.fast_llm = check_model(
-            config.fast_llm, "fast_llm", api_credentials=config.openai_credentials
-        )
-        config.smart_llm = check_model(
-            config.smart_llm, "smart_llm", api_credentials=config.openai_credentials
-        )
+        config.fast_llm = await check_model(config.fast_llm, "fast_llm")
+        config.smart_llm = await check_model(config.smart_llm, "smart_llm")

    if memory_type:
        supported_memory = get_supported_memory_backends()
@@ -161,19 +149,17 @@ def apply_overrides_to_config(
        config.skip_news = True


-def check_model(
-    model_name: str,
-    model_type: Literal["smart_llm", "fast_llm"],
-    api_credentials: OpenAICredentials,
-) -> str:
+async def check_model(
+    model_name: OpenAIModelName, model_type: Literal["smart_llm", "fast_llm"]
+) -> OpenAIModelName:
    """Check if model is available for use. If not, return gpt-3.5-turbo."""
-    api_manager = ApiManager()
-    models = api_manager.get_models(api_credentials)
+    openai = OpenAIProvider()
+    models = await openai.get_available_models()

-    if any(model_name == m.id for m in models):
+    if any(model_name == m.name for m in models):
        return model_name

    logger.warning(
-        f"You don't have access to {model_name}. Setting {model_type} to gpt-3.5-turbo."
+        f"You don't have access to {model_name}. Setting {model_type} to {GPT_3_MODEL}."
    )
-    return "gpt-3.5-turbo"
+    return GPT_3_MODEL
--- a/autogpts/autogpt/autogpt/app/main.py
+++ b/autogpts/autogpt/autogpt/app/main.py
@@ -108,7 +108,7 @@ async def run_auto_gpt(
    # TODO: fill in llm values here
    assert_config_has_openai_api_key(config)

-    apply_overrides_to_config(
+    await apply_overrides_to_config(
        config=config,
        continuous=continuous,
        continuous_limit=continuous_limit,
@@ -390,7 +390,7 @@ async def run_auto_gpt_server(
    # TODO: fill in llm values here
    assert_config_has_openai_api_key(config)

-    apply_overrides_to_config(
+    await apply_overrides_to_config(
        config=config,
        prompt_settings_file=prompt_settings,
        gpt3only=gpt3only,
--- a/autogpts/autogpt/autogpt/config/config.py
+++ b/autogpts/autogpt/autogpt/config/config.py
@@ -21,6 +21,7 @@ from autogpt.core.configuration.schema import (
 from autogpt.core.resource.model_providers.openai import (
    OPEN_AI_CHAT_MODELS,
    OpenAICredentials,
+    OpenAIModelName,
 )
 from autogpt.file_storage import FileStorageBackendName
 from autogpt.plugins.plugins_config import PluginsConfig
@@ -34,8 +35,8 @@ AZURE_CONFIG_FILE = Path("azure.yaml")
 PLUGINS_CONFIG_FILE = Path("plugins_config.yaml")
 PROMPT_SETTINGS_FILE = Path("prompt_settings.yaml")

-GPT_4_MODEL = "gpt-4"
-GPT_3_MODEL = "gpt-3.5-turbo"
+GPT_4_MODEL = OpenAIModelName.GPT4
+GPT_3_MODEL = OpenAIModelName.GPT3


 class Config(SystemSettings, arbitrary_types_allowed=True):
@@ -77,12 +78,12 @@ class Config(SystemSettings, arbitrary_types_allowed=True):
    )

    # Model configuration
-    fast_llm: str = UserConfigurable(
-        default="gpt-3.5-turbo-0125",
+    fast_llm: OpenAIModelName = UserConfigurable(
+        default=OpenAIModelName.GPT3,
        from_env="FAST_LLM",
    )
-    smart_llm: str = UserConfigurable(
-        default="gpt-4-turbo-preview",
+    smart_llm: OpenAIModelName = UserConfigurable(
+        default=OpenAIModelName.GPT4_TURBO,
        from_env="SMART_LLM",
    )
    temperature: float = UserConfigurable(default=0, from_env="TEMPERATURE")
--- a/autogpts/autogpt/autogpt/core/resource/model_providers/openai.py
+++ b/autogpts/autogpt/autogpt/core/resource/model_providers/openai.py
@@ -350,6 +350,10 @@ class OpenAIProvider(

        self._logger = logger or logging.getLogger(__name__)

+    async def get_available_models(self) -> list[ChatModelInfo]:
+        _models = (await self._client.models.list()).data
+        return [OPEN_AI_MODELS[m.id] for m in _models if m.id in OPEN_AI_MODELS]
+
    def get_token_limit(self, model_name: str) -> int:
        """Get the token limit for a given model."""
        return OPEN_AI_MODELS[model_name].max_tokens
--- a/autogpts/autogpt/autogpt/core/resource/model_providers/schema.py
+++ b/autogpts/autogpt/autogpt/core/resource/model_providers/schema.py
@@ -338,6 +338,10 @@ class ChatModelResponse(ModelResponse, Generic[_T]):


 class ChatModelProvider(ModelProvider):
+    @abc.abstractmethod
+    async def get_available_models(self) -> list[ChatModelInfo]:
+        ...
+
    @abc.abstractmethod
    def count_message_tokens(
        self,
--- a/autogpts/autogpt/autogpt/llm/api_manager.py
+++ b/autogpts/autogpt/autogpt/llm/api_manager.py
@@ -1,130 +0,0 @@
-from __future__ import annotations
-
-import logging
-from typing import List, Optional
-
-from openai import APIError, AzureOpenAI, OpenAI
-from openai.types import Model
-
-from autogpt.core.resource.model_providers.openai import (
-    OPEN_AI_MODELS,
-    OpenAICredentials,
-)
-from autogpt.core.resource.model_providers.schema import ChatModelInfo
-from autogpt.singleton import Singleton
-
-logger = logging.getLogger(__name__)
-
-
-class ApiManager(metaclass=Singleton):
-    def __init__(self):
-        self.total_prompt_tokens = 0
-        self.total_completion_tokens = 0
-        self.total_cost = 0
-        self.total_budget = 0
-        self.models: Optional[list[Model]] = None
-
-    def reset(self):
-        self.total_prompt_tokens = 0
-        self.total_completion_tokens = 0
-        self.total_cost = 0
-        self.total_budget = 0.0
-        self.models = None
-
-    def update_cost(self, prompt_tokens, completion_tokens, model):
-        """
-        Update the total cost, prompt tokens, and completion tokens.
-
-        Args:
-        prompt_tokens (int): The number of tokens used in the prompt.
-        completion_tokens (int): The number of tokens used in the completion.
-        model (str): The model used for the API call.
-        """
-        # the .model property in API responses can contain version suffixes like -v2
-        model = model[:-3] if model.endswith("-v2") else model
-        model_info = OPEN_AI_MODELS[model]
-
-        self.total_prompt_tokens += prompt_tokens
-        self.total_completion_tokens += completion_tokens
-        self.total_cost += prompt_tokens * model_info.prompt_token_cost / 1000
-        if isinstance(model_info, ChatModelInfo):
-            self.total_cost += (
-                completion_tokens * model_info.completion_token_cost / 1000
-            )
-
-        logger.debug(f"Total running cost: ${self.total_cost:.3f}")
-
-    def set_total_budget(self, total_budget):
-        """
-        Sets the total user-defined budget for API calls.
-
-        Args:
-        total_budget (float): The total budget for API calls.
-        """
-        self.total_budget = total_budget
-
-    def get_total_prompt_tokens(self):
-        """
-        Get the total number of prompt tokens.
-
-        Returns:
-        int: The total number of prompt tokens.
-        """
-        return self.total_prompt_tokens
-
-    def get_total_completion_tokens(self):
-        """
-        Get the total number of completion tokens.
-
-        Returns:
-        int: The total number of completion tokens.
-        """
-        return self.total_completion_tokens
-
-    def get_total_cost(self):
-        """
-        Get the total cost of API calls.
-
-        Returns:
-        float: The total cost of API calls.
-        """
-        return self.total_cost
-
-    def get_total_budget(self):
-        """
-        Get the total user-defined budget for API calls.
-
-        Returns:
-        float: The total budget for API calls.
-        """
-        return self.total_budget
-
-    def get_models(self, openai_credentials: OpenAICredentials) -> List[Model]:
-        """
-        Get list of available GPT models.
-
-        Returns:
-            list[Model]: List of available GPT models.
-        """
-        if self.models is not None:
-            return self.models
-
-        try:
-            if openai_credentials.api_type == "azure":
-                all_models = (
-                    AzureOpenAI(**openai_credentials.get_api_access_kwargs())
-                    .models.list()
-                    .data
-                )
-            else:
-                all_models = (
-                    OpenAI(**openai_credentials.get_api_access_kwargs())
-                    .models.list()
-                    .data
-                )
-            self.models = [model for model in all_models if "gpt" in model.id]
-        except APIError as e:
-            logger.error(e.message)
-            exit(1)
-
-        return self.models
--- a/autogpts/autogpt/tests/conftest.py
+++ b/autogpts/autogpt/tests/conftest.py
@@ -18,7 +18,6 @@ from autogpt.file_storage.local import (
    FileStorageConfiguration,
    LocalFileStorage,
 )
-from autogpt.llm.api_manager import ApiManager
 from autogpt.logs.config import configure_logging
 from autogpt.models.command_registry import CommandRegistry

@@ -102,13 +101,6 @@ def setup_logger(config: Config):
    )


-@pytest.fixture()
-def api_manager() -> ApiManager:
-    if ApiManager in ApiManager._instances:
-        del ApiManager._instances[ApiManager]
-    return ApiManager()
-
-
@pytest.fixture
 def llm_provider(config: Config) -> OpenAIProvider:
    return _configure_openai_provider(config)
--- a/autogpts/autogpt/tests/unit/test_api_manager.py
+++ b/autogpts/autogpt/tests/unit/test_api_manager.py
@@ -1,77 +0,0 @@
-import pytest
-from pytest_mock import MockerFixture
-
-from autogpt.core.resource.model_providers import (
-    OPEN_AI_CHAT_MODELS,
-    OPEN_AI_EMBEDDING_MODELS,
-)
-from autogpt.llm.api_manager import ApiManager
-
-api_manager = ApiManager()
-
-
-@pytest.fixture(autouse=True)
-def reset_api_manager():
-    api_manager.reset()
-    yield
-
-
-@pytest.fixture(autouse=True)
-def mock_costs(mocker: MockerFixture):
-    mocker.patch.multiple(
-        OPEN_AI_CHAT_MODELS["gpt-3.5-turbo"],
-        prompt_token_cost=0.0013,
-        completion_token_cost=0.0025,
-    )
-    mocker.patch.multiple(
-        OPEN_AI_EMBEDDING_MODELS["text-embedding-ada-002"],
-        prompt_token_cost=0.0004,
-    )
-    yield
-
-
-class TestApiManager:
-    def test_getter_methods(self):
-        """Test the getter methods for total tokens, cost, and budget."""
-        api_manager.update_cost(600, 1200, "gpt-3.5-turbo")
-        api_manager.set_total_budget(10.0)
-        assert api_manager.get_total_prompt_tokens() == 600
-        assert api_manager.get_total_completion_tokens() == 1200
-        assert api_manager.get_total_cost() == (600 * 0.0013 + 1200 * 0.0025) / 1000
-        assert api_manager.get_total_budget() == 10.0
-
-    @staticmethod
-    def test_set_total_budget():
-        """Test if setting the total budget works correctly."""
-        total_budget = 10.0
-        api_manager.set_total_budget(total_budget)
-
-        assert api_manager.get_total_budget() == total_budget
-
-    @staticmethod
-    def test_update_cost_completion_model():
-        """Test if updating the cost works correctly."""
-        prompt_tokens = 50
-        completion_tokens = 100
-        model = "gpt-3.5-turbo"
-
-        api_manager.update_cost(prompt_tokens, completion_tokens, model)
-
-        assert api_manager.get_total_prompt_tokens() == prompt_tokens
-        assert api_manager.get_total_completion_tokens() == completion_tokens
-        assert (
-            api_manager.get_total_cost()
-            == (prompt_tokens * 0.0013 + completion_tokens * 0.0025) / 1000
-        )
-
-    @staticmethod
-    def test_update_cost_embedding_model():
-        """Test if updating the cost works correctly."""
-        prompt_tokens = 1337
-        model = "text-embedding-ada-002"
-
-        api_manager.update_cost(prompt_tokens, 0, model)
-
-        assert api_manager.get_total_prompt_tokens() == prompt_tokens
-        assert api_manager.get_total_completion_tokens() == 0
-        assert api_manager.get_total_cost() == (prompt_tokens * 0.0004) / 1000
--- a/autogpts/autogpt/tests/unit/test_config.py
+++ b/autogpts/autogpt/tests/unit/test_config.py
@@ -2,18 +2,24 @@
 Test cases for the config class, which handles the configuration settings
 for the AI and ensures it behaves as a singleton.
 """
+import asyncio
 import os
 from typing import Any
 from unittest import mock
-from unittest.mock import patch

 import pytest
-from openai.pagination import SyncPage
+from openai.pagination import AsyncPage
 from openai.types import Model
 from pydantic import SecretStr

 from autogpt.app.configurator import GPT_3_MODEL, GPT_4_MODEL, apply_overrides_to_config
 from autogpt.config import Config, ConfigBuilder
+from autogpt.core.resource.model_providers.openai import OpenAIModelName
+from autogpt.core.resource.model_providers.schema import (
+    ChatModelInfo,
+    ModelProviderName,
+    ModelProviderService,
+)


 def test_initial_values(config: Config) -> None:
@@ -26,22 +32,26 @@ def test_initial_values(config: Config) -> None:
    assert config.smart_llm.startswith("gpt-4")


-@patch("openai.resources.models.Models.list")
-def test_fallback_to_gpt3_if_gpt4_not_available(
+@pytest.mark.asyncio
+@mock.patch("openai.resources.models.AsyncModels.list")
+async def test_fallback_to_gpt3_if_gpt4_not_available(
    mock_list_models: Any, config: Config
 ) -> None:
    """
    Test if models update to gpt-3.5-turbo if gpt-4 is not available.
    """
-    config.fast_llm = "gpt-4"
-    config.smart_llm = "gpt-4"
+    config.fast_llm = OpenAIModelName.GPT4_TURBO
+    config.smart_llm = OpenAIModelName.GPT4_TURBO

-    mock_list_models.return_value = SyncPage(
-        data=[Model(id=GPT_3_MODEL, created=0, object="model", owned_by="AutoGPT")],
-        object="Models",  # no idea what this should be, but irrelevant
+    mock_list_models.return_value = asyncio.Future()
+    mock_list_models.return_value.set_result(
+        AsyncPage(
+            data=[Model(id=GPT_3_MODEL, created=0, object="model", owned_by="AutoGPT")],
+            object="Models",  # no idea what this should be, but irrelevant
+        )
    )

-    apply_overrides_to_config(
+    await apply_overrides_to_config(
        config=config,
        gpt3only=False,
        gpt4only=False,
@@ -136,12 +146,20 @@ def test_azure_config(config_with_azure: Config) -> None:
    )


-def test_create_config_gpt4only(config: Config) -> None:
-    with mock.patch("autogpt.llm.api_manager.ApiManager.get_models") as mock_get_models:
+@pytest.mark.asyncio
+async def test_create_config_gpt4only(config: Config) -> None:
+    with mock.patch(
+        "autogpt.core.resource.model_providers.openai.OpenAIProvider.get_available_models"
+    ) as mock_get_models:
        mock_get_models.return_value = [
-            Model(id=GPT_4_MODEL, created=0, object="model", owned_by="AutoGPT")
+            ChatModelInfo(
+                service=ModelProviderService.CHAT,
+                name=GPT_4_MODEL,
+                provider_name=ModelProviderName.OPENAI,
+                max_tokens=4096,
+            )
        ]
-        apply_overrides_to_config(
+        await apply_overrides_to_config(
            config=config,
            gpt4only=True,
        )
@@ -149,10 +167,20 @@ def test_create_config_gpt4only(config: Config) -> None:
        assert config.smart_llm == GPT_4_MODEL


-def test_create_config_gpt3only(config: Config) -> None:
-    with mock.patch("autogpt.llm.api_manager.ApiManager.get_models") as mock_get_models:
-        mock_get_models.return_value = [{"id": GPT_3_MODEL}]
-        apply_overrides_to_config(
+@pytest.mark.asyncio
+async def test_create_config_gpt3only(config: Config) -> None:
+    with mock.patch(
+        "autogpt.core.resource.model_providers.openai.OpenAIProvider.get_available_models"
+    ) as mock_get_models:
+        mock_get_models.return_value = [
+            ChatModelInfo(
+                service=ModelProviderService.CHAT,
+                name=GPT_3_MODEL,
+                provider_name=ModelProviderName.OPENAI,
+                max_tokens=4096,
+            )
+        ]
+        await apply_overrides_to_config(
            config=config,
            gpt3only=True,
        )