Coverage for autogpt/chat.py: 19%

1import time

3from openai.error import RateLimitError

5from autogpt import token_counter

6from autogpt.config import Config

7from autogpt.llm_utils import create_chat_completion

8from autogpt.logs import logger

9from autogpt.types.openai import Message

11cfg = Config()

14def create_chat_message(role, content) -> Message:

15 """

16 Create a chat message with the given role and content.

18 Args:

19 role (str): The role of the message sender, e.g., "system", "user", or "assistant".

20 content (str): The content of the message.

22 Returns:

23 dict: A dictionary containing the role and content of the message.

24 """

25 return {"role": role, "content": content}

28def generate_context(prompt, relevant_memory, full_message_history, model):

29 current_context = [

30 create_chat_message("system", prompt),

31 create_chat_message(

32 "system", f"The current time and date is {time.strftime('%c')}"

33 ),

34 create_chat_message(

35 "system",

36 f"This reminds you of these events from your past:\n{relevant_memory}\n\n",

37 ),

38 ]

40 # Add messages from the full message history until we reach the token limit

41 next_message_to_add_index = len(full_message_history) - 1

42 insertion_index = len(current_context)

43 # Count the currently used tokens

44 current_tokens_used = token_counter.count_message_tokens(current_context, model)

45 return (

46 next_message_to_add_index,

47 current_tokens_used,

48 insertion_index,

49 current_context,

50 )

53# TODO: Change debug from hardcode to argument

54def chat_with_ai(

55 agent, prompt, user_input, full_message_history, permanent_memory, token_limit

56):

57 """Interact with the OpenAI API, sending the prompt, user input, message history,

58 and permanent memory."""

59 while True:

60 try:

61 """

62 Interact with the OpenAI API, sending the prompt, user input,

63 message history, and permanent memory.

65 Args:

66 prompt (str): The prompt explaining the rules to the AI.

67 user_input (str): The input from the user.

68 full_message_history (list): The list of all messages sent between the

69 user and the AI.

70 permanent_memory (Obj): The memory object containing the permanent

71 memory.

72 token_limit (int): The maximum number of tokens allowed in the API call.

74 Returns:

75 str: The AI's response.

76 """

77 model = cfg.fast_llm_model # TODO: Change model from hardcode to argument

78 # Reserve 1000 tokens for the response

80 logger.debug(f"Token limit: {token_limit}")

81 send_token_limit = token_limit - 1000

83 relevant_memory = (

84 ""

85 if len(full_message_history) == 0

86 else permanent_memory.get_relevant(str(full_message_history[-9:]), 10)

87 )

89 logger.debug(f"Memory Stats: {permanent_memory.get_stats()}")

91 (

92 next_message_to_add_index,

93 current_tokens_used,

94 insertion_index,

95 current_context,

96 ) = generate_context(prompt, relevant_memory, full_message_history, model)

98 while current_tokens_used > 2500:

99 # remove memories until we are under 2500 tokens

100 relevant_memory = relevant_memory[:-1]

101 (

102 next_message_to_add_index,

103 current_tokens_used,

104 insertion_index,

105 current_context,

106 ) = generate_context(

107 prompt, relevant_memory, full_message_history, model

108 )

109

110 current_tokens_used += token_counter.count_message_tokens(

111 [create_chat_message("user", user_input)], model

112 ) # Account for user input (appended later)

113

114 while next_message_to_add_index >= 0:

115 # print (f"CURRENT TOKENS USED: {current_tokens_used}")

116 message_to_add = full_message_history[next_message_to_add_index]

117

118 tokens_to_add = token_counter.count_message_tokens(

119 [message_to_add], model

120 )

121 if current_tokens_used + tokens_to_add > send_token_limit:

122 break

123

124 # Add the most recent message to the start of the current context,

125 # after the two system prompts.

126 current_context.insert(

127 insertion_index, full_message_history[next_message_to_add_index]

128 )

129

130 # Count the currently used tokens

131 current_tokens_used += tokens_to_add

132

133 # Move to the next most recent message in the full message history

134 next_message_to_add_index -= 1

135

136 # Append user input, the length of this is accounted for above

137 current_context.extend([create_chat_message("user", user_input)])

138

139 plugin_count = len(cfg.plugins)

140 for i, plugin in enumerate(cfg.plugins):

141 if not plugin.can_handle_on_planning():

142 continue

143 plugin_response = plugin.on_planning(

144 agent.prompt_generator, current_context

145 )

146 if not plugin_response or plugin_response == "":

147 continue

148 tokens_to_add = token_counter.count_message_tokens(

149 [create_chat_message("system", plugin_response)], model

150 )

151 if current_tokens_used + tokens_to_add > send_token_limit:

152 if cfg.debug_mode:

153 print("Plugin response too long, skipping:", plugin_response)

154 print("Plugins remaining at stop:", plugin_count - i)

155 break

156 current_context.append(create_chat_message("system", plugin_response))

157

158 # Calculate remaining tokens

159 tokens_remaining = token_limit - current_tokens_used

160 # assert tokens_remaining >= 0, "Tokens remaining is negative.

161 # This should never happen, please submit a bug report at

162 # https://www.github.com/Torantulino/Auto-GPT"

163

164 # Debug print the current context

165 logger.debug(f"Token limit: {token_limit}")

166 logger.debug(f"Send Token Count: {current_tokens_used}")

167 logger.debug(f"Tokens remaining for response: {tokens_remaining}")

168 logger.debug("------------ CONTEXT SENT TO AI ---------------")

169 for message in current_context:

170 # Skip printing the prompt

171 if message["role"] == "system" and message["content"] == prompt:

172 continue

173 logger.debug(f"{message['role'].capitalize()}: {message['content']}")

174 logger.debug("")

175 logger.debug("----------- END OF CONTEXT ----------------")

176

177 # TODO: use a model defined elsewhere, so that model can contain

178 # temperature and other settings we care about

179 assistant_reply = create_chat_completion(

180 model=model,

181 messages=current_context,

182 max_tokens=tokens_remaining,

183 )

184

185 # Update full message history

186 full_message_history.append(create_chat_message("user", user_input))

187 full_message_history.append(

188 create_chat_message("assistant", assistant_reply)

189 )

190

191 return assistant_reply

192 except RateLimitError:

193 # TODO: When we switch to langchain, this is built in

194 print("Error: ", "API Rate Limit Reached. Waiting 10 seconds...")

195 time.sleep(10)