Use GPT-4 in Agent loop by default (#4899)

* Use GPT-4 as default smart LLM in Agent

* Rename (smart|fast)_llm_model to (smart|fast)_llm everywhere

* Fix test_config.py::test_initial_values

* Fix test_config.py::test_azure_config

* Fix Azure config backwards compatibility
This commit is contained in:
Reinier van der Leer
2023-07-07 03:42:18 +02:00
committed by GitHub
parent ac17518663
commit bde007e6f7
16 changed files with 109 additions and 112 deletions

View File

@@ -66,11 +66,11 @@ OPENAI_API_KEY=your-openai-api-key
### LLM MODELS
################################################################################
## SMART_LLM_MODEL - Smart language model (Default: gpt-3.5-turbo)
# SMART_LLM_MODEL=gpt-3.5-turbo
## SMART_LLM - Smart language model (Default: gpt-4)
# SMART_LLM=gpt-4
## FAST_LLM_MODEL - Fast language model (Default: gpt-3.5-turbo)
# FAST_LLM_MODEL=gpt-3.5-turbo
## FAST_LLM - Fast language model (Default: gpt-3.5-turbo)
# FAST_LLM=gpt-3.5-turbo
## EMBEDDING_MODEL - Model to use for creating embeddings
# EMBEDDING_MODEL=text-embedding-ada-002

View File

@@ -81,9 +81,7 @@ class Agent:
self.created_at = datetime.now().strftime("%Y%m%d_%H%M%S")
self.cycle_count = 0
self.log_cycle_handler = LogCycleHandler()
self.fast_token_limit = OPEN_AI_CHAT_MODELS.get(
config.fast_llm_model
).max_tokens
self.smart_token_limit = OPEN_AI_CHAT_MODELS.get(config.smart_llm).max_tokens
def start_interaction_loop(self):
# Avoid circular imports
@@ -138,8 +136,8 @@ class Agent:
self,
self.system_prompt,
self.triggering_prompt,
self.fast_token_limit,
self.config.fast_llm_model,
self.smart_token_limit,
self.config.smart_llm,
)
try:
@@ -283,12 +281,12 @@ class Agent:
result = f"Command {command_name} returned: " f"{command_result}"
result_tlength = count_string_tokens(
str(command_result), self.config.fast_llm_model
str(command_result), self.config.smart_llm
)
memory_tlength = count_string_tokens(
str(self.history.summary_message()), self.config.fast_llm_model
str(self.history.summary_message()), self.config.smart_llm
)
if result_tlength + memory_tlength + 600 > self.fast_token_limit:
if result_tlength + memory_tlength + 600 > self.smart_token_limit:
result = f"Failure: command {command_name} returned too much output. \
Do not execute this command again with the same arguments."

View File

@@ -17,8 +17,8 @@ from typing import Optional
class Config(SystemSettings):
fast_llm_model: str
smart_llm_model: str
fast_llm: str
smart_llm: str
continuous_mode: bool
skip_news: bool
workspace_path: Optional[str] = None
@@ -88,11 +88,17 @@ class Config(SystemSettings):
def get_azure_kwargs(self, model: str) -> dict[str, str]:
"""Get the kwargs for the Azure API."""
deployment_id = {
self.fast_llm_model: self.azure_model_to_deployment_id_map.get(
"fast_llm_model_deployment_id"
self.fast_llm: self.azure_model_to_deployment_id_map.get(
"fast_llm_deployment_id",
self.azure_model_to_deployment_id_map.get(
"fast_llm_model_deployment_id" # backwards compatibility
),
),
self.smart_llm_model: self.azure_model_to_deployment_id_map.get(
"smart_llm_model_deployment_id"
self.smart_llm: self.azure_model_to_deployment_id_map.get(
"smart_llm_deployment_id",
self.azure_model_to_deployment_id_map.get(
"smart_llm_model_deployment_id" # backwards compatibility
),
),
"text-embedding-ada-002": self.azure_model_to_deployment_id_map.get(
"embedding_model_deployment_id"
@@ -129,8 +135,8 @@ class ConfigBuilder(Configurable[Config]):
default_settings = Config(
name="Default Server Config",
description="This is a default server configuration",
smart_llm_model="gpt-3.5-turbo",
fast_llm_model="gpt-3.5-turbo",
smart_llm="gpt-4",
fast_llm="gpt-3.5-turbo",
continuous_mode=False,
continuous_limit=0,
skip_news=False,
@@ -190,8 +196,8 @@ class ConfigBuilder(Configurable[Config]):
"shell_command_control": os.getenv("SHELL_COMMAND_CONTROL"),
"ai_settings_file": os.getenv("AI_SETTINGS_FILE"),
"prompt_settings_file": os.getenv("PROMPT_SETTINGS_FILE"),
"fast_llm_model": os.getenv("FAST_LLM_MODEL"),
"smart_llm_model": os.getenv("SMART_LLM_MODEL"),
"fast_llm": os.getenv("FAST_LLM", os.getenv("FAST_LLM_MODEL")),
"smart_llm": os.getenv("SMART_LLM", os.getenv("SMART_LLM_MODEL")),
"embedding_model": os.getenv("EMBEDDING_MODEL"),
"browse_spacy_language_model": os.getenv("BROWSE_SPACY_LANGUAGE_MODEL"),
"openai_api_key": os.getenv("OPENAI_API_KEY"),

View File

@@ -87,21 +87,18 @@ def create_config(
# Set the default LLM models
if gpt3only:
logger.typewriter_log("GPT3.5 Only Mode: ", Fore.GREEN, "ENABLED")
# --gpt3only should always use gpt-3.5-turbo, despite user's FAST_LLM_MODEL config
config.fast_llm_model = GPT_3_MODEL
config.smart_llm_model = GPT_3_MODEL
# --gpt3only should always use gpt-3.5-turbo, despite user's FAST_LLM config
config.fast_llm = GPT_3_MODEL
config.smart_llm = GPT_3_MODEL
elif (
gpt4only
and check_model(GPT_4_MODEL, model_type="smart_llm_model") == GPT_4_MODEL
):
elif gpt4only and check_model(GPT_4_MODEL, model_type="smart_llm") == GPT_4_MODEL:
logger.typewriter_log("GPT4 Only Mode: ", Fore.GREEN, "ENABLED")
# --gpt4only should always use gpt-4, despite user's SMART_LLM_MODEL config
config.fast_llm_model = GPT_4_MODEL
config.smart_llm_model = GPT_4_MODEL
# --gpt4only should always use gpt-4, despite user's SMART_LLM config
config.fast_llm = GPT_4_MODEL
config.smart_llm = GPT_4_MODEL
else:
config.fast_llm_model = check_model(config.fast_llm_model, "fast_llm_model")
config.smart_llm_model = check_model(config.smart_llm_model, "smart_llm_model")
config.fast_llm = check_model(config.fast_llm, "fast_llm")
config.smart_llm = check_model(config.smart_llm, "smart_llm")
if memory_type:
supported_memory = get_supported_memory_backends()

View File

@@ -35,13 +35,13 @@ def chat_with_ai(
system_prompt (str): The prompt explaining the rules to the AI.
triggering_prompt (str): The input from the user.
token_limit (int): The maximum number of tokens allowed in the API call.
model (str, optional): The model to use. If None, the config.fast_llm_model will be used. Defaults to None.
model (str, optional): The model to use. By default, the config.smart_llm will be used.
Returns:
str: The AI's response.
"""
if model is None:
model = config.fast_llm_model
model = config.smart_llm
# Reserve 1000 tokens for the response
logger.debug(f"Token limit: {token_limit}")

View File

@@ -21,8 +21,8 @@ def call_ai_function(
function: str,
args: list,
description: str,
config: Config,
model: Optional[str] = None,
config: Optional[Config] = None,
) -> str:
"""Call an AI function
@@ -39,7 +39,7 @@ def call_ai_function(
str: The response from the function
"""
if model is None:
model = config.smart_llm_model
model = config.smart_llm
# For each arg, if any are None, convert to "None":
args = [str(arg) if arg is not None else "None" for arg in args]
# parse args to comma separated string
@@ -67,7 +67,7 @@ def create_text_completion(
max_output_tokens: Optional[int],
) -> str:
if model is None:
model = config.fast_llm_model
model = config.fast_llm
if temperature is None:
temperature = config.temperature
@@ -173,9 +173,7 @@ def create_chat_completion(
)
def check_model(
model_name: str, model_type: Literal["smart_llm_model", "fast_llm_model"]
) -> str:
def check_model(model_name: str, model_type: Literal["smart_llm", "fast_llm"]) -> str:
"""Check if model is available for use. If not, return gpt-3.5-turbo."""
api_manager = ApiManager()
models = api_manager.get_models()

View File

@@ -171,14 +171,14 @@ class MessageHistory:
# Assume an upper bound length for the summary prompt template, i.e. Your task is to create a concise running summary...., in summarize_batch func
# TODO make this default dynamic
prompt_template_length = 100
max_tokens = OPEN_AI_CHAT_MODELS.get(config.fast_llm_model).max_tokens
summary_tlength = count_string_tokens(str(self.summary), config.fast_llm_model)
max_tokens = OPEN_AI_CHAT_MODELS.get(config.fast_llm).max_tokens
summary_tlength = count_string_tokens(str(self.summary), config.fast_llm)
batch = []
batch_tlength = 0
# TODO Can put a cap on length of total new events and drop some previous events to save API cost, but need to think thru more how to do it without losing the context
for event in new_events:
event_tlength = count_string_tokens(str(event), config.fast_llm_model)
event_tlength = count_string_tokens(str(event), config.fast_llm)
if (
batch_tlength + event_tlength
@@ -187,7 +187,7 @@ class MessageHistory:
# The batch is full. Summarize it and start a new one.
self.summarize_batch(batch, config)
summary_tlength = count_string_tokens(
str(self.summary), config.fast_llm_model
str(self.summary), config.fast_llm
)
batch = [event]
batch_tlength = event_tlength
@@ -217,9 +217,7 @@ Latest Development:
"""
'''
prompt = ChatSequence.for_model(
config.fast_llm_model, [Message("user", prompt)]
)
prompt = ChatSequence.for_model(config.fast_llm, [Message("user", prompt)])
self.agent.log_cycle_handler.log_cycle(
self.agent.ai_name,
self.agent.created_at,

View File

@@ -82,7 +82,7 @@ def summarize_text(
if instruction and question:
raise ValueError("Parameters 'question' and 'instructions' cannot both be set")
model = config.fast_llm_model
model = config.fast_llm
if question:
instruction = (

View File

@@ -178,7 +178,7 @@ def generate_aiconfig_automatic(user_prompt: str, config: Config) -> AIConfig:
# Call LLM with the string as user input
output = create_chat_completion(
ChatSequence.for_model(
config.fast_llm_model,
config.fast_llm,
[
Message("system", system_prompt),
Message("user", prompt_ai_config_automatic),

View File

@@ -2,6 +2,6 @@ azure_api_type: azure
azure_api_base: your-base-url-for-azure
azure_api_version: api-version-for-azure
azure_model_map:
fast_llm_model_deployment_id: gpt35-deployment-id-for-azure
smart_llm_model_deployment_id: gpt4-deployment-id-for-azure
fast_llm_deployment_id: gpt35-deployment-id-for-azure
smart_llm_deployment_id: gpt4-deployment-id-for-azure
embedding_model_deployment_id: embedding-deployment-id-for-azure

View File

@@ -16,7 +16,7 @@ Configuration is controlled through the `Config` object. You can set configurati
- `EMBEDDING_MODEL`: LLM Model to use for embedding tasks. Default: text-embedding-ada-002
- `EXECUTE_LOCAL_COMMANDS`: If shell commands should be executed locally. Default: False
- `EXIT_KEY`: Exit key accepted to exit. Default: n
- `FAST_LLM_MODEL`: LLM Model to use for most tasks. Default: gpt-3.5-turbo
- `FAST_LLM`: LLM Model to use for most tasks. Default: gpt-3.5-turbo
- `GITHUB_API_KEY`: [Github API Key](https://github.com/settings/tokens). Optional.
- `GITHUB_USERNAME`: GitHub Username. Optional.
- `GOOGLE_API_KEY`: Google API key. Optional.
@@ -43,7 +43,7 @@ Configuration is controlled through the `Config` object. You can set configurati
- `SHELL_ALLOWLIST`: List of shell commands that ARE allowed to be executed by Auto-GPT. Only applies if `SHELL_COMMAND_CONTROL` is set to `allowlist`. Default: None
- `SHELL_COMMAND_CONTROL`: Whether to use `allowlist` or `denylist` to determine what shell commands can be executed (Default: denylist)
- `SHELL_DENYLIST`: List of shell commands that ARE NOT allowed to be executed by Auto-GPT. Only applies if `SHELL_COMMAND_CONTROL` is set to `denylist`. Default: sudo,su
- `SMART_LLM_MODEL`: LLM Model to use for "smart" tasks. Default: gpt-3.5-turbo
- `SMART_LLM`: LLM Model to use for "smart" tasks. Default: gpt-4
- `STREAMELEMENTS_VOICE`: StreamElements voice to use. Default: Brian
- `TEMPERATURE`: Value of temperature given to OpenAI. Value from 0 to 2. Lower is more deterministic, higher is more random. See https://platform.openai.com/docs/api-reference/completions/create#completions/create-temperature
- `TEXT_TO_SPEECH_PROVIDER`: Text to Speech Provider. Options are `gtts`, `macos`, `elevenlabs`, and `streamelements`. Default: gtts

View File

@@ -133,8 +133,8 @@ Get your OpenAI API key from: [https://platform.openai.com/account/api-keys](htt
make an Azure configuration file:
- Rename `azure.yaml.template` to `azure.yaml` and provide the relevant `azure_api_base`, `azure_api_version` and all the deployment IDs for the relevant models in the `azure_model_map` section:
- `fast_llm_model_deployment_id`: your gpt-3.5-turbo or gpt-4 deployment ID
- `smart_llm_model_deployment_id`: your gpt-4 deployment ID
- `fast_llm_deployment_id`: your gpt-3.5-turbo or gpt-4 deployment ID
- `smart_llm_deployment_id`: your gpt-4 deployment ID
- `embedding_model_deployment_id`: your text-embedding-ada-002 v2 deployment ID
Example:
@@ -143,7 +143,7 @@ Get your OpenAI API key from: [https://platform.openai.com/account/api-keys](htt
# Please specify all of these values as double-quoted strings
# Replace string in angled brackets (<>) to your own deployment Name
azure_model_map:
fast_llm_model_deployment_id: "<auto-gpt-deployment>"
fast_llm_deployment_id: "<auto-gpt-deployment>"
...
Details can be found in the [openai-python docs], and in the [Azure OpenAI docs] for the embedding model.

View File

@@ -72,7 +72,7 @@ If you don't have access to GPT-4, this mode allows you to use Auto-GPT!
./run.sh --gpt3only
```
You can achieve the same by setting `SMART_LLM_MODEL` in `.env` to `gpt-3.5-turbo`.
You can achieve the same by setting `SMART_LLM` in `.env` to `gpt-3.5-turbo`.
### GPT-4 ONLY Mode

View File

@@ -35,7 +35,7 @@ def mock_create_chat_completion(mocker, config):
wraps=create_chat_completion,
)
mock_create_chat_completion.return_value = ChatModelResponse(
model_info=OPEN_AI_CHAT_MODELS[config.fast_llm_model],
model_info=OPEN_AI_CHAT_MODELS[config.fast_llm],
content="irrelevant",
function_call={},
)

View File

@@ -20,8 +20,8 @@ def test_initial_values(config: Config):
assert config.debug_mode == False
assert config.continuous_mode == False
assert config.speak_mode == False
assert config.fast_llm_model == "gpt-3.5-turbo"
assert config.smart_llm_model == "gpt-3.5-turbo"
assert config.fast_llm == "gpt-3.5-turbo"
assert config.smart_llm == "gpt-4"
def test_set_continuous_mode(config: Config):
@@ -52,32 +52,32 @@ def test_set_speak_mode(config: Config):
config.speak_mode = speak_mode
def test_set_fast_llm_model(config: Config):
def test_set_fast_llm(config: Config):
"""
Test if the set_fast_llm_model() method updates the fast_llm_model attribute.
Test if the set_fast_llm() method updates the fast_llm attribute.
"""
# Store model name to reset it after the test
fast_llm_model = config.fast_llm_model
fast_llm = config.fast_llm
config.fast_llm_model = "gpt-3.5-turbo-test"
assert config.fast_llm_model == "gpt-3.5-turbo-test"
config.fast_llm = "gpt-3.5-turbo-test"
assert config.fast_llm == "gpt-3.5-turbo-test"
# Reset model name
config.fast_llm_model = fast_llm_model
config.fast_llm = fast_llm
def test_set_smart_llm_model(config: Config):
def test_set_smart_llm(config: Config):
"""
Test if the set_smart_llm_model() method updates the smart_llm_model attribute.
Test if the set_smart_llm() method updates the smart_llm attribute.
"""
# Store model name to reset it after the test
smart_llm_model = config.smart_llm_model
smart_llm = config.smart_llm
config.smart_llm_model = "gpt-4-test"
assert config.smart_llm_model == "gpt-4-test"
config.smart_llm = "gpt-4-test"
assert config.smart_llm == "gpt-4-test"
# Reset model name
config.smart_llm_model = smart_llm_model
config.smart_llm = smart_llm
def test_set_debug_mode(config: Config):
@@ -95,15 +95,15 @@ def test_set_debug_mode(config: Config):
@patch("openai.Model.list")
def test_smart_and_fast_llm_models_set_to_gpt4(mock_list_models, config: Config):
def test_smart_and_fast_llms_set_to_gpt4(mock_list_models, config: Config):
"""
Test if models update to gpt-3.5-turbo if both are set to gpt-4.
"""
fast_llm_model = config.fast_llm_model
smart_llm_model = config.smart_llm_model
fast_llm = config.fast_llm
smart_llm = config.smart_llm
config.fast_llm_model = "gpt-4"
config.smart_llm_model = "gpt-4"
config.fast_llm = "gpt-4"
config.smart_llm = "gpt-4"
mock_list_models.return_value = {"data": [{"id": "gpt-3.5-turbo"}]}
@@ -124,12 +124,12 @@ def test_smart_and_fast_llm_models_set_to_gpt4(mock_list_models, config: Config)
skip_news=False,
)
assert config.fast_llm_model == "gpt-3.5-turbo"
assert config.smart_llm_model == "gpt-3.5-turbo"
assert config.fast_llm == "gpt-3.5-turbo"
assert config.smart_llm == "gpt-3.5-turbo"
# Reset config
config.fast_llm_model = fast_llm_model
config.smart_llm_model = smart_llm_model
config.fast_llm = fast_llm
config.smart_llm = smart_llm
def test_missing_azure_config(workspace: Workspace):
@@ -152,8 +152,8 @@ azure_api_type: azure
azure_api_base: https://dummy.openai.azure.com
azure_api_version: 2023-06-01-preview
azure_model_map:
fast_llm_model_deployment_id: gpt-3.5-turbo
smart_llm_model_deployment_id: gpt-4
fast_llm_deployment_id: gpt-3.5-turbo
smart_llm_deployment_id: gpt-4
embedding_model_deployment_id: embedding-deployment-id-for-azure
"""
config_file = workspace.get_path("azure.yaml")
@@ -166,8 +166,8 @@ azure_model_map:
assert config.openai_api_base == "https://dummy.openai.azure.com"
assert config.openai_api_version == "2023-06-01-preview"
assert config.azure_model_to_deployment_id_map == {
"fast_llm_model_deployment_id": "gpt-3.5-turbo",
"smart_llm_model_deployment_id": "gpt-4",
"fast_llm_deployment_id": "gpt-3.5-turbo",
"smart_llm_deployment_id": "gpt-4",
"embedding_model_deployment_id": "embedding-deployment-id-for-azure",
}
@@ -181,8 +181,8 @@ azure_api_type: azure
azure_api_base: https://dummy.openai.azure.com
azure_api_version: 2023-06-01-preview
azure_model_map:
fast_llm_model_deployment_id: gpt-3.5-turbo
smart_llm_model_deployment_id: gpt-4
fast_llm_deployment_id: gpt-3.5-turbo
smart_llm_deployment_id: gpt-4
embedding_model_deployment_id: embedding-deployment-id-for-azure
"""
config_file = workspace.get_path("azure.yaml")
@@ -191,15 +191,15 @@ azure_model_map:
os.environ["AZURE_CONFIG_FILE"] = str(config_file)
config = ConfigBuilder.build_config_from_env()
config.fast_llm_model = "fast_llm_model"
config.smart_llm_model = "smart_llm_model"
config.fast_llm = "fast_llm"
config.smart_llm = "smart_llm"
def _get_deployment_id(model):
kwargs = config.get_azure_kwargs(model)
return kwargs.get("deployment_id", kwargs.get("engine"))
assert _get_deployment_id(config.fast_llm_model) == "gpt-3.5-turbo"
assert _get_deployment_id(config.smart_llm_model) == "gpt-4"
assert _get_deployment_id(config.fast_llm) == "gpt-3.5-turbo"
assert _get_deployment_id(config.smart_llm) == "gpt-4"
assert (
_get_deployment_id("text-embedding-ada-002")
== "embedding-deployment-id-for-azure"
@@ -211,8 +211,8 @@ azure_model_map:
def test_create_config_gpt4only(config: Config) -> None:
fast_llm_model = config.fast_llm_model
smart_llm_model = config.smart_llm_model
fast_llm = config.fast_llm
smart_llm = config.smart_llm
with mock.patch("autogpt.llm.api_manager.ApiManager.get_models") as mock_get_models:
mock_get_models.return_value = [{"id": GPT_4_MODEL}]
create_config(
@@ -231,17 +231,17 @@ def test_create_config_gpt4only(config: Config) -> None:
allow_downloads=False,
skip_news=False,
)
assert config.fast_llm_model == GPT_4_MODEL
assert config.smart_llm_model == GPT_4_MODEL
assert config.fast_llm == GPT_4_MODEL
assert config.smart_llm == GPT_4_MODEL
# Reset config
config.fast_llm_model = fast_llm_model
config.smart_llm_model = smart_llm_model
config.fast_llm = fast_llm
config.smart_llm = smart_llm
def test_create_config_gpt3only(config: Config) -> None:
fast_llm_model = config.fast_llm_model
smart_llm_model = config.smart_llm_model
fast_llm = config.fast_llm
smart_llm = config.smart_llm
with mock.patch("autogpt.llm.api_manager.ApiManager.get_models") as mock_get_models:
mock_get_models.return_value = [{"id": GPT_3_MODEL}]
create_config(
@@ -260,9 +260,9 @@ def test_create_config_gpt3only(config: Config) -> None:
allow_downloads=False,
skip_news=False,
)
assert config.fast_llm_model == GPT_3_MODEL
assert config.smart_llm_model == GPT_3_MODEL
assert config.fast_llm == GPT_3_MODEL
assert config.smart_llm == GPT_3_MODEL
# Reset config
config.fast_llm_model = fast_llm_model
config.smart_llm_model = smart_llm_model
config.fast_llm = fast_llm
config.smart_llm = smart_llm

View File

@@ -40,7 +40,7 @@ def agent(config: Config):
def test_message_history_batch_summary(mocker, agent, config):
history = MessageHistory(agent)
model = config.fast_llm_model
model = config.fast_llm
message_tlength = 0
message_count = 0
@@ -73,7 +73,7 @@ def test_message_history_batch_summary(mocker, agent, config):
assistant_reply = '{\n "thoughts": {\n "text": "I will use the \'google_search\' command to find more websites with job openings for software engineering manager role.",\n "reasoning": "Since the previous website did not provide any relevant information, I will use the \'google_search\' command to find more websites with job openings for software engineer role.",\n "plan": "- Use \'google_search\' command to find more websites with job openings for software engineer role",\n "criticism": "I need to ensure that I am able to extract the relevant information from each website and job opening.",\n "speak": "I will now use the \'google_search\' command to find more websites with job openings for software engineer role."\n },\n "command": {\n "name": "google_search",\n "args": {\n "query": "software engineer job openings"\n }\n }\n}'
msg = Message("assistant", assistant_reply, "ai_response")
history.append(msg)
message_tlength += count_string_tokens(str(msg), config.fast_llm_model)
message_tlength += count_string_tokens(str(msg), config.fast_llm)
message_count += 1
# mock some websites returned from google search command in the past
@@ -83,7 +83,7 @@ def test_message_history_batch_summary(mocker, agent, config):
result += "]"
msg = Message("system", result, "action_result")
history.append(msg)
message_tlength += count_string_tokens(str(msg), config.fast_llm_model)
message_tlength += count_string_tokens(str(msg), config.fast_llm)
message_count += 1
user_input = "Determine which next command to use, and respond using the format specified above:'"
@@ -99,7 +99,7 @@ def test_message_history_batch_summary(mocker, agent, config):
)
msg = Message("assistant", assistant_reply, "ai_response")
history.append(msg)
message_tlength += count_string_tokens(str(msg), config.fast_llm_model)
message_tlength += count_string_tokens(str(msg), config.fast_llm)
message_count += 1
result = (
@@ -109,7 +109,7 @@ def test_message_history_batch_summary(mocker, agent, config):
)
msg = Message("system", result, "action_result")
history.append(msg)
message_tlength += count_string_tokens(str(msg), config.fast_llm_model)
message_tlength += count_string_tokens(str(msg), config.fast_llm)
message_count += 1
user_input = "Determine which next command to use, and respond using the format specified above:'"
@@ -125,7 +125,7 @@ def test_message_history_batch_summary(mocker, agent, config):
# count the expected token length of the trimmed message by reducing the token length of messages in the last cycle
for message in messages_to_add:
if message.role != "user":
message_tlength -= count_string_tokens(str(message), config.fast_llm_model)
message_tlength -= count_string_tokens(str(message), config.fast_llm)
message_count -= 1
# test the main trim_message function
@@ -134,7 +134,7 @@ def test_message_history_batch_summary(mocker, agent, config):
)
expected_call_count = math.ceil(
message_tlength / (OPEN_AI_CHAT_MODELS.get(config.fast_llm_model).max_tokens)
message_tlength / (OPEN_AI_CHAT_MODELS.get(config.fast_llm).max_tokens)
)
# Expecting 2 batches because of over max token
assert mock_summary.call_count == expected_call_count # 2 at the time of writing