Feature/test summarization against memory challenge (#3567)

Co-authored-by: Toran Bruce Richards <toran.richards@gmail.com>
This commit is contained in:
merwanehamadi
2023-04-30 01:56:57 -07:00
committed by GitHub
parent 064ac5c742
commit dd96d98fa1
12 changed files with 1847 additions and 2494 deletions

View File

@@ -56,6 +56,10 @@ class Agent:
cfg = Config()
self.ai_name = ai_name
self.memory = memory
self.summary_memory = (
"I was created." # Initial memory necessary to avoid hilucination
)
self.last_memory_index = 0
self.full_message_history = full_message_history
self.next_action_count = next_action_count
self.command_registry = command_registry

View File

@@ -12,6 +12,10 @@ from autogpt.logs import logger
from autogpt.memory_management.store_memory import (
save_memory_trimmed_from_context_window,
)
from autogpt.memory_management.summary_memory import (
get_newly_trimmed_messages,
update_running_summary,
)
cfg = Config()
@@ -36,10 +40,10 @@ def generate_context(prompt, relevant_memory, full_message_history, model):
create_chat_message(
"system", f"The current time and date is {time.strftime('%c')}"
),
create_chat_message(
"system",
f"This reminds you of these events from your past:\n{relevant_memory}\n\n",
),
# create_chat_message(
# "system",
# f"This reminds you of these events from your past:\n{relevant_memory}\n\n",
# ),
]
# Add messages from the full message history until we reach the token limit
@@ -81,21 +85,21 @@ def chat_with_ai(
"""
model = cfg.fast_llm_model # TODO: Change model from hardcode to argument
# Reserve 1000 tokens for the response
logger.debug(f"Token limit: {token_limit}")
send_token_limit = token_limit - 1000
if len(full_message_history) == 0:
relevant_memory = ""
else:
recent_history = full_message_history[-5:]
shuffle(recent_history)
relevant_memories = permanent_memory.get_relevant(
str(recent_history), 5
)
if relevant_memories:
shuffle(relevant_memories)
relevant_memory = str(relevant_memories)
# if len(full_message_history) == 0:
# relevant_memory = ""
# else:
# recent_history = full_message_history[-5:]
# shuffle(recent_history)
# relevant_memories = permanent_memory.get_relevant(
# str(recent_history), 5
# )
# if relevant_memories:
# shuffle(relevant_memories)
# relevant_memory = str(relevant_memories)
relevant_memory = ""
logger.debug(f"Memory Stats: {permanent_memory.get_stats()}")
(
@@ -105,33 +109,36 @@ def chat_with_ai(
current_context,
) = generate_context(prompt, relevant_memory, full_message_history, model)
while current_tokens_used > 2500:
# remove memories until we are under 2500 tokens
relevant_memory = relevant_memory[:-1]
(
next_message_to_add_index,
current_tokens_used,
insertion_index,
current_context,
) = generate_context(
prompt, relevant_memory, full_message_history, model
)
# while current_tokens_used > 2500:
# # remove memories until we are under 2500 tokens
# relevant_memory = relevant_memory[:-1]
# (
# next_message_to_add_index,
# current_tokens_used,
# insertion_index,
# current_context,
# ) = generate_context(
# prompt, relevant_memory, full_message_history, model
# )
current_tokens_used += count_message_tokens(
[create_chat_message("user", user_input)], model
) # Account for user input (appended later)
current_tokens_used += 500 # Account for memory (appended later) TODO: The final memory may be less than 500 tokens
# Add Messages until the token limit is reached or there are no more messages to add.
while next_message_to_add_index >= 0:
# print (f"CURRENT TOKENS USED: {current_tokens_used}")
message_to_add = full_message_history[next_message_to_add_index]
tokens_to_add = count_message_tokens([message_to_add], model)
if current_tokens_used + tokens_to_add > send_token_limit:
save_memory_trimmed_from_context_window(
full_message_history,
next_message_to_add_index,
permanent_memory,
)
# save_memory_trimmed_from_context_window(
# full_message_history,
# next_message_to_add_index,
# permanent_memory,
# )
break
# Add the most recent message to the start of the current context,
@@ -146,6 +153,22 @@ def chat_with_ai(
# Move to the next most recent message in the full message history
next_message_to_add_index -= 1
# Insert Memories
if len(full_message_history) > 0:
(
newly_trimmed_messages,
agent.last_memory_index,
) = get_newly_trimmed_messages(
full_message_history=full_message_history,
current_context=current_context,
last_memory_index=agent.last_memory_index,
)
agent.summary_memory = update_running_summary(
current_memory=agent.summary_memory,
new_events=newly_trimmed_messages,
)
current_context.insert(insertion_index, agent.summary_memory)
api_manager = ApiManager()
# inform the AI about its remaining budget (if it has one)
if api_manager.get_total_budget() > 0.0:

View File

@@ -0,0 +1,112 @@
import json
from typing import Dict, List, Tuple
from autogpt.config import Config
from autogpt.llm.llm_utils import create_chat_completion
cfg = Config()
def get_newly_trimmed_messages(
full_message_history: List[Dict[str, str]],
current_context: List[Dict[str, str]],
last_memory_index: int,
) -> Tuple[List[Dict[str, str]], int]:
"""
This function returns a list of dictionaries contained in full_message_history
with an index higher than prev_index that are absent from current_context.
Args:
full_message_history (list): A list of dictionaries representing the full message history.
current_context (list): A list of dictionaries representing the current context.
last_memory_index (int): An integer representing the previous index.
Returns:
list: A list of dictionaries that are in full_message_history with an index higher than last_memory_index and absent from current_context.
int: The new index value for use in the next loop.
"""
# Select messages in full_message_history with an index higher than last_memory_index
new_messages = [
msg for i, msg in enumerate(full_message_history) if i > last_memory_index
]
# Remove messages that are already present in current_context
new_messages_not_in_context = [
msg for msg in new_messages if msg not in current_context
]
# Find the index of the last message processed
new_index = last_memory_index
if new_messages_not_in_context:
last_message = new_messages_not_in_context[-1]
new_index = full_message_history.index(last_message)
return new_messages_not_in_context, new_index
def update_running_summary(current_memory: str, new_events: List[Dict]) -> str:
"""
This function takes a list of dictionaries representing new events and combines them with the current summary,
focusing on key and potentially important information to remember. The updated summary is returned in a message
formatted in the 1st person past tense.
Args:
new_events (List[Dict]): A list of dictionaries containing the latest events to be added to the summary.
Returns:
str: A message containing the updated summary of actions, formatted in the 1st person past tense.
Example:
new_events = [{"event": "entered the kitchen."}, {"event": "found a scrawled note with the number 7"}]
update_running_summary(new_events)
# Returns: "This reminds you of these events from your past: \nI entered the kitchen and found a scrawled note saying 7."
"""
# Replace "assistant" with "you". This produces much better first person past tense results.
for event in new_events:
if event["role"].lower() == "assistant":
event["role"] = "you"
# Remove "thoughts" dictionary from "content"
content_dict = json.loads(event["content"])
if "thoughts" in content_dict:
del content_dict["thoughts"]
event["content"] = json.dumps(content_dict)
elif event["role"].lower() == "system":
event["role"] = "your computer"
# Delete all user messages
elif event["role"] == "user":
new_events.remove(event)
# This can happen at any point during execturion, not just the beginning
if len(new_events) == 0:
new_events = "Nothing new happened."
prompt = f'''Your task is to create a concise running summary of actions and information results in the provided text, focusing on key and potentially important information to remember.
You will receive the current summary and the your latest actions. Combine them, adding relevant key information from the latest development in 1st person past tense and keeping the summary concise.
Summary So Far:
"""
{current_memory}
"""
Latest Development:
"""
{new_events}
"""
'''
messages = [
{
"role": "user",
"content": prompt,
}
]
current_memory = create_chat_completion(messages, cfg.fast_llm_model)
message_to_return = {
"role": "system",
"content": f"This reminds you of these events from your past: \n{current_memory}",
}
return message_to_return

View File

@@ -332,4 +332,174 @@ interactions:
status:
code: 200
message: OK
- request:
body: '{"input": ["Assistant Reply: { \"thoughts\": { \"text\": \"thoughts\", \"reasoning\":
\"reasoning\", \"plan\": \"plan\", \"criticism\": \"criticism\", \"speak\":
\"speak\" }, \"command\": { \"name\": \"google\", \"args\":
{ \"query\": \"google_query\" } } } Result: None Human
Feedback:Command Result: Important Information."], "model": "text-embedding-ada-002",
"encoding_format": "base64"}'
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
Content-Length:
- '483'
Content-Type:
- application/json
method: POST
uri: https://api.openai.com/v1/embeddings
response:
body:
string: !!binary |
H4sIAAAAAAAAA1R6WxOyurbl+/4Vq9YrvUtEJWG9IfeLJihesKurCxC5iSCQBHLq/Pcu/E6d7n7x
AfFCMuaYY4yZ//jXX3/93SZVlo5///PX3+9yGP/+H8u1ZzzGf//z1//8119//fXXf/xe/787sybJ
ns/yk/9u/71Zfp7Z9Pc/f4n/feX/3vTPX3/fopdD0flZVNPUPC8yT+sT3gt8pY/uh6XKIzl8CTzP
74Rhggh4m3hNHTU6VhN7kC18XyqDotgb+zmfIhk+DKen3kq1uXg3HwJceS8R25JtA15/gkgZ7UnA
TptGFXfomsGpRFfq7XDr94G9VxVzjTbUY1RLlu+XQXJ53LE261NCuyg4w6QiDZHiXKrKTRMg+KJl
RuBWrkPyiQQGU2j55ASHhnM1HnIomOyK/UP46VlvDDGM50TFfqDyiqUNOyuv9Lahhye6hzPnWQTX
0DngC2N2zxHqDMiu8I6RNGxBvy4uKRxflxRn7lr0yfd0G6D9kXSMwuTT8yG7s13IOgHbO5FzPiY1
gUPXrSiGmtF3X3RSYWolBLEi6qohbVigrDwhJErOdX924KlWtIgcqepU34SIL3sLt1W6RYA/hpBd
XlYN7qJ2JXxb0rAtJOBAr1hv0EbURZ88Qa3CO68k7Hjw40+nMUeKEphbspncgg9QLSM4xqs3Vldq
2Q+N4auwuBY11W3fqDYwfjLZQOOZ7vX+0k8nZT9AZH0YdYzjPpl1ZEiwdpQNUjYbLZwf5gZBr0EE
yXfb1ScRHBFEb4QQUJTGZ6etZ0E8ZQeq7nDJSSh3AVTF3UR1cQQ9f/Rghrt9s8HqIbgl3dGzIazi
zZaqvQjC6fM+11AAnUH9G90l8yRLGdRbsSJSo0nhDx874yTL2DtEVcjelmDJT1utcDaLec/ZfmvA
q9po1EykIZnDzhRhfMgDnJovE4xm6NTQEsIj+azevJpv+HSAard6oM333erTls0GWGnUwK7DN9W4
G0wLNifhjI3tM+x5FRdQcTV5Qq0ajRV7R48cesn5RtYAv8NJj9sWyhZbY0MqPsm8CzADv/U/lPtJ
58q9HGQeHWO0RpHaSyvCNeW9eiUYqXcnmd9lxIAZrwKkMA2B6f18OHA9SAoBtI56LhFPlsVTOC37
U4SDaHcGkG+WgS1SQp9kzrlUhq5fYZWlnc+d/BLAfZhjanrVwOe5oRA+SHTCqLmvknnclNqqt8SC
PrEE+Dhf1Bk2gD6wE+GGz1BKLEj6JsbHfez5bGUypnjl7Ua9ZwX0Sex7AhJpeyTysy8BewRWBpUU
vwhJd2FPuDHJ8Ni+Juyvdmd/krtYkyV7ulJNqO468UPYwMo4R3SfayEgD3vnycA/a9R7H7Vqs5dg
Bm/9l1L9/IbVpE4lgsVoaTjQFKlqjuMjg7f0nqMdqu/JFL/cFFTxekujyH75v/oGIJ4q6th2mHDz
+4kg/OgFKRa804MxEvg9zh49qmmt840Xy2B0zk9qW49rwkmbOFAJ6js2d/6pkqxSJNAs9YAoJ6r3
kzp9EVhfZwGrRdT1k51oGiw0KaWOcDcr8aoEKVzwgrVCf3N2TcNSMYm3wQi1Rz6JwERwT9071a/p
N6GMpY2MOqPBL1xs9bFwijOM5Rbj+4m1nKNMa2C9P9rk/TG3Pnt9DwN0b1ZEfXx89Pw5PD14LjSb
yDa79mx3r6Bsf0Qd8chnCZ3dqYSZc3FJMSeFzp6lHgAxkjC1nncjHGCgy5CBtUXtjXnkE1lFIjSb
yMbJBuX6JB7aARhXfiav/vjWp1XHY8ir9kQtfPLAdNzwmyy9wQXJFLbJYH0DWV7qjXqm2PXvfDgF
UIZ9jTjTuoocTm4OdkOFyMZ4dsngzucblDo+4MN12FSD7YYQipvvih78wOxJJq1UmCm5TdXhc03m
/KWk8NrnD2qiSK1Y9Vx9wRoRAXt3u/PnPAgh9ByLUkshVTIJxz6TrbI1sakIfcIOm5xAaY0qatw8
J2HnS2wASyqf1Oi9kPPrVhXhFHfSgvfE5/k1rJXUehBqrjOz38jn9Ca3uxWhe20kFWvRqoRSLECs
CyHWqcwfZ+iO3p2sG/us98q9JPC7YgVF/u7DZ8N/zMBxpZii4/fDl342w+YxaljXbwFgZeh7snLJ
KMY38wQ2xf5ogU14KxEzh7QfLRqVcH1lAjZipfQndJPiHx4w/kSnkHu7yVHgtNphu2GXZFY/6gVe
lCimzjXPOVvpkgh2R5LRvQwswPZP14Af/J6wrl7uOnNuWgzkx0vBfgfUipw0N4LwYLvYzldiz/N6
bUH3ARn1TXoE8+68J4pyjCk2Nd32e+IGDsw3jzeNEncLSK7OKRDM+UpxbyrJV+G5BovhXWEru36T
WX1bMTQPTo5NfT/qfE4tA0p3o6aHVef5ZK8HN3jt9zLe36ioT+uWfeGzEABGluPobGKrA6wvMcH7
9GqD4RX3B3BzvxE+HmXK5yGJLVlzYwEfsVD1VC4UCU6NMWLrLEh85CwgCtiCErvtgVWt8662CkPy
HW220hGw2yNP4b19Z+RtstDnJWEZHJPMoOb82XDii1MOtnPYkt0pVMJB6FoGX5K9wobmlLwvj6AE
76AdcLgtcULWaSrCQGq93//3OV3vG8iFuqX7jvOeDF/ewMIaRGp06APGBDxi+MkOGGuf2e8JausU
quQWYLyJJT6sdEEEXzMH+IilBLBG7FKoSdIB47TzK/4ZX7J8bJ8T3UvOUZ/XIzsr95paGEXWA/Dp
UNfwZGYyVY8oDbnnngcZefCD1dqmVafvqhaEN2Ri119PITkmkgZHVUfY2vf3ZNA+6xo4JtTwq6qU
P3gBjzL40iDuqorI2daD/WnekTk7RDrHj2sDz+28IcL+SkJO7NsBiunwodqqqwFd9AJY+IKsiHzS
6VtcCZAe64nMq67m/VWytsr3yDycLHphoNtXAOyAGdT8mJE+m1Y9K5aRW/R4j5Jq4UNBeU6Bt/QD
Ho5XJrYQN/qVwPbtcdFD3xvklTuTebN6huyqRCmkFVSp8RxafT6dShk8DK/HnpRrfp9LQQmOg9hS
zXicQbeK7hDGssvQSrXVZD3Z8Q1UzZliJznlPW9ldIH6cevTxM+PlfTZgQFe4l1NUZ2EIa3iQgC9
9vwifo9uIV+d1wN0+LukjisUPnXy9AzyTfJGz12rhRsv+Zwh/5QddS3jVJEvbRtwFY4Fja/6OpkN
/8RgkcOEquWN9ryYqhKa9tSRKcovPquemy/YyuWGuhp+Av7a6Q0kJj5QdKhjn0MQE3AKkvOfz7M5
K1vF3mdrJFbtxR/vMtOU7bFxkLD4iW9OMwi6NKsRsPsdmL5wXYI6KG/YfzQfn83K+6vUczki/s4o
6OazJcK7NlBq4Ab1FMZXBndPvCK7avAq3s/NGQYX94JE1DoVHQ86gnorVWitKrtw8gwnBZMInngv
tGdAUeY10E6TA2EGkXpuRbEE1fLQIUF/lDqtvnomE9euETDpkfO19kiBgI4SNaIdCZlyDCQ4Cv6B
VGveJQufDUr9MSVq80DW5wM75NB9v3dk/epVf1P5TgYPfaVR1RxgNY1zGUH3jFTyDty8mstdfIEn
93omKVKmnt22fQZKH+3JdtFXvVa+BTB8nYyej6smIQLeavBPP86uXjgDr/hC13dU+uPb8Zq1FsRM
GSl6rorkexxPKWisKqF+EYZ6W9HEgJ8zf2C0PC/fWzcV5quviPXvye03fcAOskbnA6psHQL+rQz2
01dYj7Ghc7ZnhrL0M7wvnKKnWfc1YBz5D4zI3uK8liYZuhZ3yXolbxKiWnoEvcm3iLTxumoQdt4Z
yKh4Yzu9lH1XWp8bXNYXY30/h81aOTEQnqsV9fy3m7CWwgyGxRrhoxYFPqnfvgiFDRvoLclg9X1d
VajYwWwgmB2UhFy1coaege5k3uzvCdOvnQrXloCwxa9rPudTICvZ95KjMeiuYMr0eAupOzywfau0
iknnaw1HmwuLf+l0XpJtKluqi7HabWky3Qa73Sr0YVGNXvqEi+tSgEUtcuq/HQu0YeGUMKmGhh4K
3dene3epYXnzHti5ZRYQv+4pVqbGGolSwHVS39csUqrkcscnMxWr+Tg+Ujjf43B5XqYv+BTBhMeM
rJ3XQ59zeoNQf1QXtE1Oas/w6uvB8+PVEvDMViEr3i8ChgJgqn7cLuS1laSy3PID9jZZ69Ov4c7A
V7MrPSq5XI07QBoonk4T9fvNoZrCyzAAKNg5PeZc1zfppDkQIWmF1lc5D6crg1+QEbTFqBiO4eB/
5zO8CoOK789zW01FpLGfHqUvTR9ClkonBuWnJJLVTGvA3NMzgsAPNDQnZ5NvhlH1lMva9ala27if
b8U4w5dwRxhd0L6X/HlVA/O9a7G+y9fJbOf7r3w9AICNXfKouAtrBMPYelJPUxJAgEMH8A6+w8/v
6iTqOwd2VVkTZR9//blB3hl0fXghEzJKfURrJ4DgBnwiJsDRx/M2HaBFFIEiDxSgrZNDA/eVzjC+
2j5Y+9ktg6e03KGJ3ceEX5sxAj2fAuqxj9yzraeUYNH/2L9OPBykYmWAvhZybNpiwydDlFoIJmem
etFBfXxePxl8SeYK+9tV7lN19ylhaW08/OO7yQsLAQqrIqWaH+Qhf+ttJL+PT4NmdcLDrjWuLZS3
VoL3F+74U1R/Gnh+Si/qnkIlIdnnWcLr3DywRm5myNMi/8Ky4CX2x/ZQzcXQbyEP0i8+2n7Haez4
M4Ty0aHGuioSlsR7SdH2X0Td3SvQKUKFofz46YteST9rdD8r7vuzI1BvMp+l0mOG2zTe0r0VxxWr
/d6Ac7MTqd19Sp+VetEqb34XqRraFZ+f/rGBw9qQ8D7UHH/hx/inf5d6aQBrg10GR8E9/MmP6PMY
e5BI+Uz44vfyzctQ4Qd/JrLkJ/1M2R4CdR0Fv/WrJrR2znCpL6qNwaYf7N0cw0uZ6Wi3ewX+FJyC
M2wQ+yJZUxI+tbOrwsW/UJfv0pD89OAFKweyWfKb2V4ZX7Dw4+In8op9wroEBtycSOevp4QDUgfw
tK9LqhaRWzEY+FvI0PZOnyda9VMVPmdwRlWAJNq6YP20TAFKxKko3ptW8k2FlwX53k/QuoNvTsHL
Y3CXJx8EorbwGWbqF4p7FuDjwh+0e68kqCZOR7PFH9DKRTl8m+RC91apJZLjAwdepWzCtv7FiYhX
XwfGezPG1Pk0/vAgwUVxt1OOzZ9eDbMKwuzu7LAty/uKH9aGB8LCe1L19XH7GblFCpf+SJ01jXzy
MFcHeHkFCdmF98mfrmqG/uh999D6yXrlbSOwZtobbR/HAYys125Qr6GPo6P0SHiIvQyGt4NJ1be5
rsgNPw5gdZV7ii2p8VnQJy1Q3PJOVoPw9kdlSjLgPIcZ+3TIQ17sTQNOZ2CRpf/3w7e9xPB2LBlp
9fiTzAA4Bvwo6mrJD8uEb+/uDdqCrJFNFqCKb7qrDFfaaGBLp76+IRuxVcJzsSJzzU5g0eslgNKF
Ud3WU8DKcmjBJXTEBY8fzk5t3gBzZ2P882stuxWHH38TLo5JP2fSSgMfBiIEPe8O6OG298BJmnMC
+09djY8ClnCvXlp6cdeIr6P6U//05sLXjr7x3DOB7pbnJFj0Qz8pvgZ+eaDdFmLCrY94UWjAHtiL
tq4+tcazBbt6zqjx2FySce2RGTBtZtRY+I1rM07Bc6fJSIi9Y8Wcdy/DpX6R/D5qPTtfzobyW8/5
TTacdclbgB7evAgrb7iiJWEpzMhhS8JtSZORZKEMxWcSoQ/Y2n2VFnkLzVgJ6GEKIKCwW523Gyf2
fn7Cn9/B+gYvShxjx9Vf/XSXmQqgmu2xO6y+Cdsn2IPkgyuqM83tZydJ/2v/+cOVwkV/WErIeoFc
jXn02TYNINxevg6CU1RXk7wFOXgFOCDi4qemqClyeAR9hC3adqDwwk6A+BsfsHk2h55stgMDzenF
MarY3herIDDgq28tmsnFoVry7QHUqZtR20OdPifqyVA0ehoJS98YTMP6mQGLrAR0nboTEPW3V+6e
O1XG6g5rnMBm10Dn+qqoeTYPPVj6Mdwk5hmbxjzqTUKUGrKrcEfwzvpkxNpHhhGCA6lNbe0Pevgx
4LGFAdWliSZTs/l4MBqnlCIlOFd9/EEzIJcyIehdFPq8P+9EyE6fHCORDWDG710E05P4xv5bfenf
X5658DkR7Z1WLev7BdZF/1DD2bSAd5vkDIon41hzD0PFSTS1ytKPiOhsWj5/ImkGwcW/0IPeCDqf
xiJXyt1Fxd7FxOG08li8e1XRhp7bpwLI5TYFUEubN9aDzEho+bxqsO6zkShouwaMNRqBi/6nP7wv
eaMKNcoOCBQh19n67jVQSY8vNITgXv34E17lq4s976Mns7nREfzVozU0gS9mQnIAPz1gy/ODj5E5
NPIzs0oyPZqCT6SwIOi3no3y/Aw4rT9BrAD7Df/U2+K/blsihCWiggs5D7NegCczlbHpvhveCccq
hc5GeGC8+DeedaUFsyZzyIL/amMIyhZsmtz95U096y/m9sc32MfXIpzNmxXALmoOhOAm5Fy8EA1Y
nyNa8Ff47PZoU6Aaxhcb4LLXf35febxsjYjNa883lYtKeAtIROZKufNf3gTAbefjfbPbJ5K9OrRQ
aJ9P6q92s88jQkR5yQvw8R6BaoLnIIP4EByxrRQvPgHzq0Hqkgda3/W5X28upacseTJ2dvViR+s4
Anh9qbEZiXNPkXKsf/MOsi15Hs6K7jagTTyBVBAn4ThqPpRHIzKJuORl618e4e2iKwFLvstPpm1A
4po1mvevvGdsPRx+9T72XrwCVMqKCJYjcKn2RLM+7tK+hukTHn75lT7fM5lAK6lNjM+RCUTMnC8M
+mOI1den63nQHLRfP8JPciI//MdgiDcYrQwlq9jZ2DZQ2Z8cNP346LtXEOzcs4I9MSXhSFaBBC5f
xKl/2+x1vpGnG7gGZkXNtLNC8XZv8p+/wE5kr/xh28sZlAstxX7t5aD9uo8Ymm/QUj22YciNobvJ
y/NgVZEvYH7nmQRHIzaxdcAnf7qcpi/45W+4jP2EqS/owB24CtQ+grXfit3dAc6o7ukyvwIUf3ca
XPQD1cV1Fc7+O9LgE/GQ7Epi9O/Uk6FsPLov2XkbPxlfwijJhgVLmmrt1v+jhy93/UAWfusnn+5k
uBeyBu/v6z1YL3iQf/m0q2EF8LRov9A5fXw0XMip3wj65QB3/IPQz9/x80nJ5R9fRsFo6zzEWgbt
z70m4rJf7G1JFqw/toRdcbglE0FyDJEnfOiiRxPuufEA3+CjU5X0nc5uzlEFnJ4e2JjI2LPEIRE0
toGz5CmlTw77XFV0UPXUaF4Fnwxp28C2sW8Ur7Mvnw33LsFfvnPrNl99qsTzAOzPtcY6vFf6/KrL
VFnyanr81hqXztnppqDMQfSy9Asaf6wZtmGE/sz/eHx/n5W2RALWssNWn9nhAQFODAUJs5hXjN0K
pHi7+EqPxiVNxtXuIsMXzTMkZus4XPqTAIVnZaJ5MvtwEgaSysVoaPiw8P3PH8BcEHTsxP4ZzOte
uoFJcQZqvB/in3xFLnc3lTqfTqhm81MMP/7A2Pk0+h9/u/Q/spXGTG8Pm/bPfA6j74H7LdCfGSw1
YUYf67EOJ+MKWji/fJ2i5v5KFjyKwK/8J5mcsEx++wUWvqUH+GHh3IVRC08SywkQwMFn1WtqYNKe
KD7KTRvyylfTP/MziyVLPOCPX+iIxp5Gy/xrWPIjeNnlGtUuyTYca/oeYHTTBvQVtAIsec0ByJr8
xD+8MamaNFCez3uKHodPz9GBer/8Eu1OyVuf78qaAHHTrvAprm1Oln6m7JPzCmOAvWpCdirDjb3r
qbbot0XfB7DP3x3VJrNPFr5BP71O/Vp76AR09Re2hxvCep13YB5cqVWM2HOxpykADPpby8GSj5Lx
KFPQGaLQ/vILROvbvR9Xu1SG7/39iarFn/LAdlWIDU2gP30+uRYo5fal38gUqCScwNUv5V0gW9Sp
6zxc+t8AHFPQsJnf5nAUSB4rzOoc6l68Xh/HSpWhDARO7cndg01EGhGyUbthLQeZz1dq58GVSBOy
zFNC+tuPxf+h0yOTdfYaNjNcdcYZe1tXDtn+ubeUhhknetklu/4rnvILNA9eTo3mLvvvG9k3yt+/
UwH/+a+//vpfvxMGTfvM3svBgDGbxn//91GBf8fP+N+iKP2bSn9OIpAhzrO///mvQwh/d33bdOP/
Hts6+wx///OXIv85bvD32I7x+/+9/q/l1/7zX/8HAAD//wMAPe/DkOMgAAA=
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 7bfdedf56918cfbc-SJC
Connection:
- keep-alive
Content-Encoding:
- gzip
Content-Type:
- application/json
Date:
- Sun, 30 Apr 2023 07:04:15 GMT
Server:
- cloudflare
access-control-allow-origin:
- '*'
alt-svc:
- h3=":443"; ma=86400, h3-29=":443"; ma=86400
openai-organization:
- user-adtx4fhfg1qsiyzdoaxciooj
openai-processing-ms:
- '25'
openai-version:
- '2020-10-01'
strict-transport-security:
- max-age=15724800; includeSubDomains
x-ratelimit-limit-requests:
- '3000'
x-ratelimit-remaining-requests:
- '2999'
x-ratelimit-reset-requests:
- 20ms
x-request-id:
- f2850337441001a8e5bd70bacb4dede8
status:
code: 200
message: OK
version: 1

View File

@@ -6,8 +6,8 @@ from tests.integration.agent_utils import run_interaction_loop
from tests.integration.challenges.utils import get_level_to_run
from tests.utils import requires_api_key
LEVEL_CURRENTLY_BEATEN = 3
MAX_LEVEL = 5
LEVEL_CURRENTLY_BEATEN = 3 # real level beaten 30 and maybe more, but we can't record it, the cassette is too big
MAX_LEVEL = 3
@pytest.mark.vcr
@@ -30,7 +30,7 @@ def test_memory_challenge_a(
create_instructions_files(memory_management_agent, num_files, task_id)
try:
run_interaction_loop(memory_management_agent, 40)
run_interaction_loop(memory_management_agent, 180)
# catch system exit exceptions
except SystemExit:
file_path = str(memory_management_agent.workspace.get_path("output.txt"))

View File

@@ -6,7 +6,7 @@ from tests.integration.agent_utils import run_interaction_loop
from tests.integration.challenges.utils import generate_noise, get_level_to_run
from tests.utils import requires_api_key
LEVEL_CURRENTLY_BEATEN = 1
LEVEL_CURRENTLY_BEATEN = 2
MAX_LEVEL = 5
NOISE = 1000

View File

@@ -39,10 +39,9 @@ interactions:
to user\"\n },\n \"command\": {\n \"name\": \"command name\",\n \"args\":
{\n \"arg name\": \"value\"\n }\n }\n} \nEnsure the response
can be parsed by Python json.loads"}, {"role": "system", "content": "The current
time and date is Tue Jan 01 00:00:00 2000"}, {"role": "system", "content": "This
reminds you of these events from your past:\n\n\n"}, {"role": "user", "content":
"Determine which next command to use, and respond using the format specified
above:"}], "temperature": 0, "max_tokens": 3223}'
time and date is Tue Jan 01 00:00:00 2000"}, {"role": "user", "content": "Determine
which next command to use, and respond using the format specified above:"}],
"temperature": 0, "max_tokens": 2738}'
headers:
Accept:
- '*/*'
@@ -51,7 +50,7 @@ interactions:
Connection:
- keep-alive
Content-Length:
- '3490'
- '3401'
Content-Type:
- application/json
method: POST
@@ -59,20 +58,20 @@ interactions:
response:
body:
string: !!binary |
H4sIAAAAAAAAA7yTT2/TQBDF73yK0VxycSI3f0jjWwUHisQFAQVhFG3tib3NesfdHSutIn93tLbT
tEY9gbjOG7/3m9nxEXWOCWalkqyqzXS9ubt53H+68u9+FPeON2vzcb654m/v4/vP3zFCvr2jTIYv
ZhlXtSHRbDHCzJESyjG5eHs5X62Wi/llhBXnZDDBopbpYraaSuNueRov4guMsPGqIEyOWDuuatkK
78l6TNbrZYRn76f6PI4jFBZlnkqb9bKNMCtZZ+Qx+XnEivzJ1rEhTFB5r70oKwGSrZANAxxTCwCQ
opTcFKX4FBMYioNADxKKKV7DQRsDjSeQkuDgtNBWeLvThiDjqlI2B+FegMkHMobhhp3JJ6CtMCjo
Wq2qKIdJGfTtIegzeZDJLMXoebAj5dlqW/TpX0qCgpUB7f8iJIIO8lX8YF4SVOwFVF07rp1WQrBj
B1IGVfn9mLQ2yvaQU/j6n7aTOS060746vY0l6vzJ+sYFBiVwDaqCxmtbdEwZO0eZnHOGZSjp5PDS
YQGBUMie2s3jONvXpPav30Sg+HdT98FtdDrUwfSPOw0OPdOL1Y/QlSvGJ94LofdsMeJ4YTL+LZ7N
dKLtiAfw1LbYRrjTVvty2x81JuiFa4xQ25weMInbX+2b3wAAAP//AwAxpIznhgQAAA==
H4sIAAAAAAAAA7yTT2/bMAzF7/sUBC+5KEHSLG3q21a0a6/DiqGoh0C1GVuLLHoSDRsL/N0H2/nT
ethp2K581Hs/StQeTYoRJrmWpCjt9OpDfdd8+ni7+Hz3ePOzWa+emqf77Cb1tdzWqJBfvlMihxOz
hIvSkhh2qDDxpIVSjBaX64v1+8Xqaqmw4JQsRpiVMl3OVlOp/AtP58v5AhVWQWeE0R5Lz0UpG+Ed
uYDR1epa4dn7VL+YLxQKi7an0vXlvFWY5GwSChg977GgcLT1bAkj1CGYINpJB8lOyHUD7GMHABCj
5FxluYQYIzgUDwI10hVjfABHlIIwVIFAcoLaG6GN8GZrLEHCRaFd39ALMLknaxm+srfpBIwTBg19
q9MFpTDJO31Td/pMGpnMYlSvsz3pwM64bAD4khOIDjvw9KMyngIU9BdpCnraP85hQi8WHAR0WXou
vdFCsGUPkneqDrsxcmm1G2in8PifrinxRkxiQjF+J3Kh8h2DFng4vVrC3lMi54jhHqiRM5Nw39q1
jNNCSXp3TKqNtf9yHYbgVh3X9GD625Z2DgPTG4gRuvbZeMEHoes9W4w43piMP8WrmY60PfEBPHYt
tgq3xpmQb4Z9xgiDcIkKjUupwWjefmvf/QIAAP//AwCaXwR3hAQAAA==
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 7be2da273c20ce54-SJC
- 7bfe3ef73b52fac2-SJC
Cache-Control:
- no-cache, must-revalidate
Connection:
@@ -82,7 +81,7 @@ interactions:
Content-Type:
- application/json
Date:
- Thu, 27 Apr 2023 00:12:22 GMT
- Sun, 30 Apr 2023 07:59:46 GMT
Server:
- cloudflare
access-control-allow-origin:
@@ -94,19 +93,103 @@ interactions:
openai-organization:
- user-adtx4fhfg1qsiyzdoaxciooj
openai-processing-ms:
- '14714'
- '13160'
openai-version:
- '2020-10-01'
strict-transport-security:
- max-age=15724800; includeSubDomains
x-ratelimit-limit-requests:
- '3500'
x-ratelimit-limit-tokens:
- '90000'
x-ratelimit-remaining-requests:
- '3499'
x-ratelimit-remaining-tokens:
- '86497'
x-ratelimit-reset-requests:
- 17ms
x-ratelimit-reset-tokens:
- 2.335s
x-request-id:
- 23366aac1ddcc141fc735b4a1a8b268c
- f665162ae22af897be24f632a031d434
status:
code: 200
message: OK
- request:
body: '{"model": "gpt-3.5-turbo", "messages": [{"role": "user", "content": "Your
task is to create a concise running summary of actions and information results
in the provided text, focusing on key and potentially important information
to remember.\n\n\nYou will receive the current summary and the latest development.
Combine them, adding relevant key information from the latest development in
1st person past tense and keeping the summary concise.\n\nSummary So Far:\n\"\"\"\nI
was created.\n\"\"\"\n\nLatest Development:\n\"\"\"\nNothing new happened.\n\"\"\"\n"}],
"temperature": 0, "max_tokens": null}'
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
Content-Length:
- '600'
Content-Type:
- application/json
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: !!binary |
H4sIAAAAAAAAA0SOQUsDMRQG7/6K8J2zZdNa3eZmeygiPSqISEmTZze6mxc2r1Qt+9+lUPU6MMOc
EAMsfOvE97mrbu+O69X3lJrdks2D2Sy/5o/rp+fgu9ViAw3evZOXizHx3OeOJHKChh/ICQVYc9NM
m2szbxqNngN1sNhnqWaTeSWHYcdVPasNNA7F7Qn2hDxwn2Ur/EGpwC6Mxn/6H2sIi+v+gKnrUcO3
HD0V2JcTeiq/zYE7goUrJRZxSc6HnITS+f5eHV1Rl2PlUlCJpY1prxIdVeuKal3OlChMMGq8xRRL
ux3IFU6wKMIZGjEF+oStx9fx6gcAAP//AwCGJ6JPSQEAAA==
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 7bfe3f5a5fb3fac2-SJC
Cache-Control:
- no-cache, must-revalidate
Connection:
- keep-alive
Content-Encoding:
- gzip
Content-Type:
- application/json
Date:
- Sun, 30 Apr 2023 07:59:49 GMT
Server:
- cloudflare
access-control-allow-origin:
- '*'
alt-svc:
- h3=":443"; ma=86400, h3-29=":443"; ma=86400
openai-model:
- gpt-3.5-turbo-0301
openai-organization:
- user-adtx4fhfg1qsiyzdoaxciooj
openai-processing-ms:
- '634'
openai-version:
- '2020-10-01'
strict-transport-security:
- max-age=15724800; includeSubDomains
x-ratelimit-limit-requests:
- '3500'
x-ratelimit-limit-tokens:
- '90000'
x-ratelimit-remaining-requests:
- '3499'
x-ratelimit-remaining-tokens:
- '89866'
x-ratelimit-reset-requests:
- 17ms
x-ratelimit-reset-tokens:
- 88ms
x-request-id:
- 0bc738f58569b4b3635fe0c8384ebcfb
status:
code: 200
message: OK
@@ -151,23 +234,23 @@ interactions:
{\n \"arg name\": \"value\"\n }\n }\n} \nEnsure the response
can be parsed by Python json.loads"}, {"role": "system", "content": "The current
time and date is Tue Jan 01 00:00:00 2000"}, {"role": "system", "content": "This
reminds you of these events from your past:\nNone\n\n"}, {"role": "user", "content":
"Determine which next command to use, and respond using the format specified
above:"}, {"role": "assistant", "content": "{\n \"thoughts\": {\n \"text\":
reminds you of these events from your past: \nI was created and nothing new
has happened."}, {"role": "user", "content": "Determine which next command to
use, and respond using the format specified above:"}, {"role": "assistant",
"content": "{\n \"thoughts\": {\n \"text\": \"I need to use the write_to_file
command to write ''Hello World'' into a file named ''hello_world.txt''.\",\n \"reasoning\":
\"The task requires me to write ''Hello World'' into a file named ''hello_world.txt'',
and the write_to_file command is the most appropriate for this task.\",\n \"plan\":
\"- Use the write_to_file command to write ''Hello World'' into a file named
''hello_world.txt''.\",\n \"criticism\": \"I need to ensure that I use
the correct file name and text to write to the file.\",\n \"speak\":
\"I will use the write_to_file command to write ''Hello World'' into a file
named ''hello_world.txt''.\",\n \"reasoning\": \"The goal is to write
''Hello World'' into a file named ''hello_world.txt'', and the write_to_file
command is the most appropriate for this task.\",\n \"plan\": \"- Use
the write_to_file command to write ''Hello World'' into a file named ''hello_world.txt''.\",\n \"criticism\":
\"I need to ensure that I am using the correct file name and that the text is
written correctly.\",\n \"speak\": \"I will use the write to file command
to write ''Hello World'' into a file named ''hello_world.txt''.\"\n },\n \"command\":
{\n \"name\": \"write_to_file\",\n \"args\": {\n \"filename\":
\"hello_world.txt\",\n \"text\": \"Hello World\"\n }\n }\n}"},
{"role": "system", "content": "Command write_to_file returned: File written
to successfully."}, {"role": "user", "content": "Determine which next command
to use, and respond using the format specified above:"}], "temperature": 0,
"max_tokens": 2970}'
named ''hello_world.txt''.\"\n },\n \"command\": {\n \"name\":
\"write_to_file\",\n \"args\": {\n \"filename\": \"hello_world.txt\",\n \"text\":
\"Hello World\"\n }\n }\n}"}, {"role": "system", "content": "Command
write_to_file returned: File written to successfully."}, {"role": "user", "content":
"Determine which next command to use, and respond using the format specified
above:"}], "temperature": 0, "max_tokens": 2486}'
headers:
Accept:
- '*/*'
@@ -176,7 +259,7 @@ interactions:
Connection:
- keep-alive
Content-Length:
- '4612'
- '4646'
Content-Type:
- application/json
method: POST
@@ -184,19 +267,21 @@ interactions:
response:
body:
string: !!binary |
H4sIAAAAAAAAA6SSQW8TMRCF7/yK0ZydKkuSttkbAiGFA+JQBIKtItc7Wbvx2pZnFoqi/e9om00T
QishcZ3x++bNPO/Q1ViisVpMm/zkann/9b3JZnm1XMnHmzfLd7b4IrL6xB++vUWF8e6ejIyKCxPb
5ElcDKjQZNJCNZbF5fXrxWI+my8UtrEmjyU2SSazi8VEunwXJ9PZtECFHeuGsNxhyrFNspa4pcBY
FtPiWuERfmzMLhVKFO2PpWIx7xUaG50hxvL7DlviAzhHT1iiZnYsOshgMwahMKywqwIAQIViY9dY
4QpLGItjgx5kKFa4gp/Oe+iYQCyBaN6uR4MEJratDjVIhKfa4dVFheoUmUlzDC40e+6NJWii9uD4
WbmCR/CLIweZJWgjC+iUckzZaSHYxAxih+4zHpLXYT9+Ap//eyOTnTjjuD1cKhA9KilwlwedFliB
1T/oCXZcCbgzhpg3nfe/ztGcSG9fDAD+1e4e2atD3uPzv+IOuqX9tD+ucWZK5+b8p5xGO+Z66q8+
X/Ko7EdvVeixV7hxwbFd70lYIktMqNCFmh6wnPa3/avfAAAA//8DAKZ6myqyAwAA
H4sIAAAAAAAAA7SSTW/TQBCG7/yK0VxycaIkJST4VqmijYSEQEWIYhRt1xN7yXp3uztOgiL/d7Sx
TUpJOYC4zuf7zLwHVDmmKEvBsnJ6OL/cXV+9mX2+0+aag7u6vHlvH+7myw/bd2+XmKC9/0aSu46R
tJXTxMoaTFB6Ekw5ppNXi+ni5WS2eJ1gZXPSmGLheHgxmg259vd2OL4YTzDBOoiCMD2g87ZyvGK7
IRMwnYyn8wRPw0+JxSJBtiz0KTSdzJoEZWmVpIDplwNWFPrB3mrCFEUIKrAwHGVaw2QiwiEzAAAZ
cmnrouSQYQpdsEvQnmMwwyWUYksQaikphHWt9XfYecVMBgY3pLWFT9brfADKsAUBa6UJjKgoh0EZ
86tdzI94z4MR3JYEhvYMgcmBCsAW6kDAJQGLsFl18ATSVpUweSz4GeurRhkmj+V6EsEaZYpW821X
Bp4eauUpQEVn54BYM/kjjzLFOZ5Y+SeiBI4an1UfCUuCygYG4Zy3zivBBGvrgcuYPYPjtDAtyRA+
/vNxZKSTKlT9Qw3RsZNMqH3sEwxLcN5uVU4gYCu0yqG96VFob0hTPLskOBKbfsFOaf13X21HNklv
0K78N3/Gb7Tbfhn/RJTwxVNrP/bL/zB4j3DE6Ggy02CT4FoZFcpVuxtTDGwdJqhMTntMx83X5sUP
AAAA//8DAK0qY5KVBAAA
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 7be2da943bd5ce54-SJC
- 7bfe3f5edbd9fac2-SJC
Cache-Control:
- no-cache, must-revalidate
Connection:
@@ -206,7 +291,7 @@ interactions:
Content-Type:
- application/json
Date:
- Thu, 27 Apr 2023 00:12:36 GMT
- Sun, 30 Apr 2023 08:00:00 GMT
Server:
- cloudflare
access-control-allow-origin:
@@ -218,19 +303,25 @@ interactions:
openai-organization:
- user-adtx4fhfg1qsiyzdoaxciooj
openai-processing-ms:
- '10505'
- '10898'
openai-version:
- '2020-10-01'
strict-transport-security:
- max-age=15724800; includeSubDomains
x-ratelimit-limit-requests:
- '3500'
x-ratelimit-limit-tokens:
- '90000'
x-ratelimit-remaining-requests:
- '3499'
x-ratelimit-remaining-tokens:
- '86482'
x-ratelimit-reset-requests:
- 17ms
x-ratelimit-reset-tokens:
- 2.345s
x-request-id:
- f593fff7d415793b53e5124283ad25c9
- f294799d2564196ef4852c84fdc8fb17
status:
code: 200
message: OK

View File

@@ -33,18 +33,14 @@ def test_generate_context_empty_inputs(mocker):
# Assert
expected_result = (
-1,
47,
3,
32,
2,
[
{"role": "system", "content": ""},
{
"role": "system",
"content": f"The current time and date is {time.strftime('%c')}",
},
{
"role": "system",
"content": f"This reminds you of these events from your past:\n\n\n",
},
],
)
assert result == expected_result
@@ -78,5 +74,5 @@ def test_generate_context_valid_inputs():
assert result[0] >= 0
assert result[2] >= 0
assert result[1] >= 0
assert len(result[3]) >= 3 # current_context should have at least 3 messages
assert len(result[3]) >= 2 # current_context should have at least 2 messages
assert result[1] <= 2048 # token limit for GPT-3.5-turbo-0301 is 2048 tokens

View File

@@ -41,7 +41,10 @@ def before_record_request(request):
def filter_hostnames(request):
allowed_hostnames = ["api.openai.com"] # List of hostnames you want to allow
allowed_hostnames = [
"api.openai.com",
"localhost:50337",
] # List of hostnames you want to allow
if any(hostname in request.url for hostname in allowed_hostnames):
return request