Coverage for autogpt/chat.py: 19%

70 statements  

« prev     ^ index     » next       coverage.py v7.2.3, created at 2023-04-22 05:45 +0000

1import time 

2 

3from openai.error import RateLimitError 

4 

5from autogpt import token_counter 

6from autogpt.config import Config 

7from autogpt.llm_utils import create_chat_completion 

8from autogpt.logs import logger 

9from autogpt.types.openai import Message 

10 

11cfg = Config() 

12 

13 

14def create_chat_message(role, content) -> Message: 

15 """ 

16 Create a chat message with the given role and content. 

17 

18 Args: 

19 role (str): The role of the message sender, e.g., "system", "user", or "assistant". 

20 content (str): The content of the message. 

21 

22 Returns: 

23 dict: A dictionary containing the role and content of the message. 

24 """ 

25 return {"role": role, "content": content} 

26 

27 

28def generate_context(prompt, relevant_memory, full_message_history, model): 

29 current_context = [ 

30 create_chat_message("system", prompt), 

31 create_chat_message( 

32 "system", f"The current time and date is {time.strftime('%c')}" 

33 ), 

34 create_chat_message( 

35 "system", 

36 f"This reminds you of these events from your past:\n{relevant_memory}\n\n", 

37 ), 

38 ] 

39 

40 # Add messages from the full message history until we reach the token limit 

41 next_message_to_add_index = len(full_message_history) - 1 

42 insertion_index = len(current_context) 

43 # Count the currently used tokens 

44 current_tokens_used = token_counter.count_message_tokens(current_context, model) 

45 return ( 

46 next_message_to_add_index, 

47 current_tokens_used, 

48 insertion_index, 

49 current_context, 

50 ) 

51 

52 

53# TODO: Change debug from hardcode to argument 

54def chat_with_ai( 

55 agent, prompt, user_input, full_message_history, permanent_memory, token_limit 

56): 

57 """Interact with the OpenAI API, sending the prompt, user input, message history, 

58 and permanent memory.""" 

59 while True: 

60 try: 

61 """ 

62 Interact with the OpenAI API, sending the prompt, user input, 

63 message history, and permanent memory. 

64 

65 Args: 

66 prompt (str): The prompt explaining the rules to the AI. 

67 user_input (str): The input from the user. 

68 full_message_history (list): The list of all messages sent between the 

69 user and the AI. 

70 permanent_memory (Obj): The memory object containing the permanent 

71 memory. 

72 token_limit (int): The maximum number of tokens allowed in the API call. 

73 

74 Returns: 

75 str: The AI's response. 

76 """ 

77 model = cfg.fast_llm_model # TODO: Change model from hardcode to argument 

78 # Reserve 1000 tokens for the response 

79 

80 logger.debug(f"Token limit: {token_limit}") 

81 send_token_limit = token_limit - 1000 

82 

83 relevant_memory = ( 

84 "" 

85 if len(full_message_history) == 0 

86 else permanent_memory.get_relevant(str(full_message_history[-9:]), 10) 

87 ) 

88 

89 logger.debug(f"Memory Stats: {permanent_memory.get_stats()}") 

90 

91 ( 

92 next_message_to_add_index, 

93 current_tokens_used, 

94 insertion_index, 

95 current_context, 

96 ) = generate_context(prompt, relevant_memory, full_message_history, model) 

97 

98 while current_tokens_used > 2500: 

99 # remove memories until we are under 2500 tokens 

100 relevant_memory = relevant_memory[:-1] 

101 ( 

102 next_message_to_add_index, 

103 current_tokens_used, 

104 insertion_index, 

105 current_context, 

106 ) = generate_context( 

107 prompt, relevant_memory, full_message_history, model 

108 ) 

109 

110 current_tokens_used += token_counter.count_message_tokens( 

111 [create_chat_message("user", user_input)], model 

112 ) # Account for user input (appended later) 

113 

114 while next_message_to_add_index >= 0: 

115 # print (f"CURRENT TOKENS USED: {current_tokens_used}") 

116 message_to_add = full_message_history[next_message_to_add_index] 

117 

118 tokens_to_add = token_counter.count_message_tokens( 

119 [message_to_add], model 

120 ) 

121 if current_tokens_used + tokens_to_add > send_token_limit: 

122 break 

123 

124 # Add the most recent message to the start of the current context, 

125 # after the two system prompts. 

126 current_context.insert( 

127 insertion_index, full_message_history[next_message_to_add_index] 

128 ) 

129 

130 # Count the currently used tokens 

131 current_tokens_used += tokens_to_add 

132 

133 # Move to the next most recent message in the full message history 

134 next_message_to_add_index -= 1 

135 

136 # Append user input, the length of this is accounted for above 

137 current_context.extend([create_chat_message("user", user_input)]) 

138 

139 plugin_count = len(cfg.plugins) 

140 for i, plugin in enumerate(cfg.plugins): 

141 if not plugin.can_handle_on_planning(): 

142 continue 

143 plugin_response = plugin.on_planning( 

144 agent.prompt_generator, current_context 

145 ) 

146 if not plugin_response or plugin_response == "": 

147 continue 

148 tokens_to_add = token_counter.count_message_tokens( 

149 [create_chat_message("system", plugin_response)], model 

150 ) 

151 if current_tokens_used + tokens_to_add > send_token_limit: 

152 if cfg.debug_mode: 

153 print("Plugin response too long, skipping:", plugin_response) 

154 print("Plugins remaining at stop:", plugin_count - i) 

155 break 

156 current_context.append(create_chat_message("system", plugin_response)) 

157 

158 # Calculate remaining tokens 

159 tokens_remaining = token_limit - current_tokens_used 

160 # assert tokens_remaining >= 0, "Tokens remaining is negative. 

161 # This should never happen, please submit a bug report at 

162 # https://www.github.com/Torantulino/Auto-GPT" 

163 

164 # Debug print the current context 

165 logger.debug(f"Token limit: {token_limit}") 

166 logger.debug(f"Send Token Count: {current_tokens_used}") 

167 logger.debug(f"Tokens remaining for response: {tokens_remaining}") 

168 logger.debug("------------ CONTEXT SENT TO AI ---------------") 

169 for message in current_context: 

170 # Skip printing the prompt 

171 if message["role"] == "system" and message["content"] == prompt: 

172 continue 

173 logger.debug(f"{message['role'].capitalize()}: {message['content']}") 

174 logger.debug("") 

175 logger.debug("----------- END OF CONTEXT ----------------") 

176 

177 # TODO: use a model defined elsewhere, so that model can contain 

178 # temperature and other settings we care about 

179 assistant_reply = create_chat_completion( 

180 model=model, 

181 messages=current_context, 

182 max_tokens=tokens_remaining, 

183 ) 

184 

185 # Update full message history 

186 full_message_history.append(create_chat_message("user", user_input)) 

187 full_message_history.append( 

188 create_chat_message("assistant", assistant_reply) 

189 ) 

190 

191 return assistant_reply 

192 except RateLimitError: 

193 # TODO: When we switch to langchain, this is built in 

194 print("Error: ", "API Rate Limit Reached. Waiting 10 seconds...") 

195 time.sleep(10)