Coverage for autogpt/chat.py: 19%
70 statements
« prev ^ index » next coverage.py v7.2.3, created at 2023-04-22 05:45 +0000
« prev ^ index » next coverage.py v7.2.3, created at 2023-04-22 05:45 +0000
1import time
3from openai.error import RateLimitError
5from autogpt import token_counter
6from autogpt.config import Config
7from autogpt.llm_utils import create_chat_completion
8from autogpt.logs import logger
9from autogpt.types.openai import Message
11cfg = Config()
14def create_chat_message(role, content) -> Message:
15 """
16 Create a chat message with the given role and content.
18 Args:
19 role (str): The role of the message sender, e.g., "system", "user", or "assistant".
20 content (str): The content of the message.
22 Returns:
23 dict: A dictionary containing the role and content of the message.
24 """
25 return {"role": role, "content": content}
28def generate_context(prompt, relevant_memory, full_message_history, model):
29 current_context = [
30 create_chat_message("system", prompt),
31 create_chat_message(
32 "system", f"The current time and date is {time.strftime('%c')}"
33 ),
34 create_chat_message(
35 "system",
36 f"This reminds you of these events from your past:\n{relevant_memory}\n\n",
37 ),
38 ]
40 # Add messages from the full message history until we reach the token limit
41 next_message_to_add_index = len(full_message_history) - 1
42 insertion_index = len(current_context)
43 # Count the currently used tokens
44 current_tokens_used = token_counter.count_message_tokens(current_context, model)
45 return (
46 next_message_to_add_index,
47 current_tokens_used,
48 insertion_index,
49 current_context,
50 )
53# TODO: Change debug from hardcode to argument
54def chat_with_ai(
55 agent, prompt, user_input, full_message_history, permanent_memory, token_limit
56):
57 """Interact with the OpenAI API, sending the prompt, user input, message history,
58 and permanent memory."""
59 while True:
60 try:
61 """
62 Interact with the OpenAI API, sending the prompt, user input,
63 message history, and permanent memory.
65 Args:
66 prompt (str): The prompt explaining the rules to the AI.
67 user_input (str): The input from the user.
68 full_message_history (list): The list of all messages sent between the
69 user and the AI.
70 permanent_memory (Obj): The memory object containing the permanent
71 memory.
72 token_limit (int): The maximum number of tokens allowed in the API call.
74 Returns:
75 str: The AI's response.
76 """
77 model = cfg.fast_llm_model # TODO: Change model from hardcode to argument
78 # Reserve 1000 tokens for the response
80 logger.debug(f"Token limit: {token_limit}")
81 send_token_limit = token_limit - 1000
83 relevant_memory = (
84 ""
85 if len(full_message_history) == 0
86 else permanent_memory.get_relevant(str(full_message_history[-9:]), 10)
87 )
89 logger.debug(f"Memory Stats: {permanent_memory.get_stats()}")
91 (
92 next_message_to_add_index,
93 current_tokens_used,
94 insertion_index,
95 current_context,
96 ) = generate_context(prompt, relevant_memory, full_message_history, model)
98 while current_tokens_used > 2500:
99 # remove memories until we are under 2500 tokens
100 relevant_memory = relevant_memory[:-1]
101 (
102 next_message_to_add_index,
103 current_tokens_used,
104 insertion_index,
105 current_context,
106 ) = generate_context(
107 prompt, relevant_memory, full_message_history, model
108 )
110 current_tokens_used += token_counter.count_message_tokens(
111 [create_chat_message("user", user_input)], model
112 ) # Account for user input (appended later)
114 while next_message_to_add_index >= 0:
115 # print (f"CURRENT TOKENS USED: {current_tokens_used}")
116 message_to_add = full_message_history[next_message_to_add_index]
118 tokens_to_add = token_counter.count_message_tokens(
119 [message_to_add], model
120 )
121 if current_tokens_used + tokens_to_add > send_token_limit:
122 break
124 # Add the most recent message to the start of the current context,
125 # after the two system prompts.
126 current_context.insert(
127 insertion_index, full_message_history[next_message_to_add_index]
128 )
130 # Count the currently used tokens
131 current_tokens_used += tokens_to_add
133 # Move to the next most recent message in the full message history
134 next_message_to_add_index -= 1
136 # Append user input, the length of this is accounted for above
137 current_context.extend([create_chat_message("user", user_input)])
139 plugin_count = len(cfg.plugins)
140 for i, plugin in enumerate(cfg.plugins):
141 if not plugin.can_handle_on_planning():
142 continue
143 plugin_response = plugin.on_planning(
144 agent.prompt_generator, current_context
145 )
146 if not plugin_response or plugin_response == "":
147 continue
148 tokens_to_add = token_counter.count_message_tokens(
149 [create_chat_message("system", plugin_response)], model
150 )
151 if current_tokens_used + tokens_to_add > send_token_limit:
152 if cfg.debug_mode:
153 print("Plugin response too long, skipping:", plugin_response)
154 print("Plugins remaining at stop:", plugin_count - i)
155 break
156 current_context.append(create_chat_message("system", plugin_response))
158 # Calculate remaining tokens
159 tokens_remaining = token_limit - current_tokens_used
160 # assert tokens_remaining >= 0, "Tokens remaining is negative.
161 # This should never happen, please submit a bug report at
162 # https://www.github.com/Torantulino/Auto-GPT"
164 # Debug print the current context
165 logger.debug(f"Token limit: {token_limit}")
166 logger.debug(f"Send Token Count: {current_tokens_used}")
167 logger.debug(f"Tokens remaining for response: {tokens_remaining}")
168 logger.debug("------------ CONTEXT SENT TO AI ---------------")
169 for message in current_context:
170 # Skip printing the prompt
171 if message["role"] == "system" and message["content"] == prompt:
172 continue
173 logger.debug(f"{message['role'].capitalize()}: {message['content']}")
174 logger.debug("")
175 logger.debug("----------- END OF CONTEXT ----------------")
177 # TODO: use a model defined elsewhere, so that model can contain
178 # temperature and other settings we care about
179 assistant_reply = create_chat_completion(
180 model=model,
181 messages=current_context,
182 max_tokens=tokens_remaining,
183 )
185 # Update full message history
186 full_message_history.append(create_chat_message("user", user_input))
187 full_message_history.append(
188 create_chat_message("assistant", assistant_reply)
189 )
191 return assistant_reply
192 except RateLimitError:
193 # TODO: When we switch to langchain, this is built in
194 print("Error: ", "API Rate Limit Reached. Waiting 10 seconds...")
195 time.sleep(10)