raise error when msg is invalid; fix docstr; improve ResponsiveAgent; update doc and packaging; capture ipython output; find code blocks with llm when regex fails. (#1154)

* autogen.agent -> autogen.agentchat

* bug fix in portfolio

* notebook

* timeout

* timeout

* infer lang; close #1150

* timeout

* message context

* context handling

* add sender to generate_reply

* clean up the receive function

* move mathchat to contrib

* contrib

* last_message

* Add OptiGuide: agent and notebook

* Optiguide notebook: add figures and URL
1. figures and code points to remote URL
2. simplify the prompt for the interpreter, because
all information is already in the chat history.

* Update name: Agent -> GenericAgent

* Update notebook

* Rename: GenericAgent -> ResponsiveAgent

* Rebase to autogen.agentchat

* OptiGuide: Comment, sytle, and notebook updates

* simplify optiguide

* raise error when msg is invalid; fix docstr

* allow return None for generate_reply()

* update_system_message

* test update_system_message

* simplify optiguide

* simplify optiguide

* simplify optiguide

* simplify optiguide

* move test

* add test and fix bug

* doc update

* doc update

* doc update

* color

* optiguide

* prompt

* test danger case

* packaging

* docker

* remove path in traceback

* capture ipython output

* simplify

* find code blocks with llm

* find code with llm

* order

* order

* fix bug in context handling

* print executing msg

* print executing msg

* test find code

* test find code

* disable find_code

* default_auto_reply

* default auto reply

* remove optiguide

* remove -e

---------

Co-authored-by: Beibin Li <beibin79@gmail.com>
This commit is contained in:
Chi Wang
2023-07-31 19:22:30 -07:00
committed by GitHub
parent da92238ffe
commit c48babd02f
39 changed files with 1225 additions and 866 deletions

View File

@@ -1,10 +1,10 @@
import os
import sys
import pytest
from flaml import oai
from flaml import autogen
from flaml.autogen.agentchat import AssistantAgent, UserProxyAgent
KEY_LOC = "test/autogen"
KEY_LOC = "notebook"
OAI_CONFIG_LIST = "OAI_CONFIG_LIST"
here = os.path.abspath(os.path.dirname(__file__))
@@ -20,16 +20,16 @@ def test_ai_user_proxy_agent():
return
conversations = {}
oai.ChatCompletion.start_logging(conversations)
autogen.ChatCompletion.start_logging(conversations)
config_list = oai.config_list_from_json(
config_list = autogen.config_list_from_json(
OAI_CONFIG_LIST,
file_location=KEY_LOC,
)
assistant = AssistantAgent(
"assistant",
system_message="You are a helpful assistant.",
oai_config={
llm_config={
"request_timeout": 600,
"seed": 42,
"config_list": config_list,
@@ -41,7 +41,7 @@ def test_ai_user_proxy_agent():
human_input_mode="NEVER",
max_consecutive_auto_reply=2,
code_execution_config=False,
oai_config={
llm_config={
"config_list": config_list,
},
# In the system message the "user" always refers to ther other agent.
@@ -62,7 +62,7 @@ def test_gpt35(human_input_mode="NEVER", max_consecutive_auto_reply=5):
import openai
except ImportError:
return
config_list = oai.config_list_from_json(
config_list = autogen.config_list_from_json(
OAI_CONFIG_LIST,
file_location=KEY_LOC,
filter_dict={
@@ -75,14 +75,14 @@ def test_gpt35(human_input_mode="NEVER", max_consecutive_auto_reply=5):
},
},
)
llm_config = {
"seed": 42,
"config_list": config_list,
"max_tokens": 1024,
}
assistant = AssistantAgent(
"coding_agent",
oai_config={
# "request_timeout": 600,
"seed": 42,
"config_list": config_list,
"max_tokens": 1024,
},
llm_config=llm_config,
)
user = UserProxyAgent(
"user",
@@ -94,6 +94,8 @@ def test_gpt35(human_input_mode="NEVER", max_consecutive_auto_reply=5):
"use_docker": "python:3",
"timeout": 60,
},
llm_config=llm_config,
system_message="""Reply TERMINATE to end the conversation.""",
)
user.initiate_chat(assistant, message="TERMINATE")
# should terminate without sending any message
@@ -115,16 +117,17 @@ def test_create_execute_script(human_input_mode="NEVER", max_consecutive_auto_re
except ImportError:
return
config_list = oai.config_list_from_json(OAI_CONFIG_LIST, file_location=KEY_LOC)
config_list = autogen.config_list_from_json(OAI_CONFIG_LIST, file_location=KEY_LOC)
conversations = {}
oai.ChatCompletion.start_logging(conversations)
autogen.ChatCompletion.start_logging(conversations)
llm_config = {
"request_timeout": 600,
"seed": 42,
"config_list": config_list,
}
assistant = AssistantAgent(
"assistant",
oai_config={
"request_timeout": 600,
"seed": 42,
"config_list": config_list,
},
llm_config=llm_config,
)
user = UserProxyAgent(
"user",
@@ -145,10 +148,10 @@ print('Hello world!')
```""",
)
print(conversations)
oai.ChatCompletion.start_logging(compact=False)
autogen.ChatCompletion.start_logging(compact=False)
user.send("""Execute temp.py""", assistant)
print(oai.ChatCompletion.logged_history)
oai.ChatCompletion.stop_logging()
print(autogen.ChatCompletion.logged_history)
autogen.ChatCompletion.stop_logging()
def test_tsp(human_input_mode="NEVER", max_consecutive_auto_reply=10):
@@ -157,7 +160,7 @@ def test_tsp(human_input_mode="NEVER", max_consecutive_auto_reply=10):
except ImportError:
return
config_list = oai.config_list_from_json(
config_list = autogen.config_list_from_json(
OAI_CONFIG_LIST,
file_location=KEY_LOC,
filter_dict={
@@ -179,19 +182,17 @@ def test_tsp(human_input_mode="NEVER", max_consecutive_auto_reply=10):
def generate_init_message(self, question) -> str:
return self._prompt.format(question=question)
oai.ChatCompletion.start_logging()
assistant = AssistantAgent("assistant", oai_config={"temperature": 0, "config_list": config_list})
autogen.ChatCompletion.start_logging()
assistant = AssistantAgent("assistant", llm_config={"temperature": 0, "config_list": config_list})
user = TSPUserProxyAgent(
"user",
code_execution_config={"work_dir": here},
human_input_mode=human_input_mode,
max_consecutive_auto_reply=max_consecutive_auto_reply,
)
# agent.receive(prompt.format(question=hard_questions[0]), user)
# agent.receive(prompt.format(question=hard_questions[1]), user)
user.initiate_chat(assistant, question=hard_questions[2])
print(oai.ChatCompletion.logged_history)
oai.ChatCompletion.stop_logging()
print(autogen.ChatCompletion.logged_history)
autogen.ChatCompletion.stop_logging()
if __name__ == "__main__":

View File

@@ -1,14 +1,12 @@
from flaml import oai
import pytest
import sys
from flaml import autogen
from flaml.autogen.agentchat.contrib.math_user_proxy_agent import (
MathUserProxyAgent,
_remove_print,
_add_print_to_last_line,
)
import pytest
import sys
KEY_LOC = "test/autogen"
OAI_CONFIG_LIST = "OAI_CONFIG_LIST"
from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST
@pytest.mark.skipif(
@@ -24,9 +22,9 @@ def test_math_user_proxy_agent():
from flaml.autogen.agentchat.assistant_agent import AssistantAgent
conversations = {}
oai.ChatCompletion.start_logging(conversations)
autogen.ChatCompletion.start_logging(conversations)
config_list = oai.config_list_from_json(
config_list = autogen.config_list_from_json(
OAI_CONFIG_LIST,
file_location=KEY_LOC,
filter_dict={
@@ -36,7 +34,7 @@ def test_math_user_proxy_agent():
assistant = AssistantAgent(
"assistant",
system_message="You are a helpful assistant.",
oai_config={
llm_config={
"request_timeout": 600,
"seed": 42,
"config_list": config_list,

View File

@@ -13,16 +13,20 @@ def test_responsive_agent(monkeypatch):
monkeypatch.setattr(sys, "stdin", StringIO("TERMINATE\n\n"))
dummy_agent_1.receive(
{
"content": "hello",
"content": "hello {name}",
"context": {
"name": "dummy_agent_2",
},
},
dummy_agent_2,
) # receive a dict
assert "context" in dummy_agent_1.chat_messages["dummy_agent_2"][-2]
# receive dict without openai fields to be printed, such as "content", 'function_call'. There should be no error raised.
pre_len = len(dummy_agent_1.oai_conversations["dummy_agent_2"])
dummy_agent_1.receive({"message": "hello"}, dummy_agent_2)
pre_len = len(dummy_agent_1.chat_messages["dummy_agent_2"])
with pytest.raises(ValueError):
dummy_agent_1.receive({"message": "hello"}, dummy_agent_2)
assert pre_len == len(
dummy_agent_1.oai_conversations["dummy_agent_2"]
dummy_agent_1.chat_messages["dummy_agent_2"]
), "When the message is not an valid openai message, it should not be appended to the oai conversation."
monkeypatch.setattr(sys, "stdin", StringIO("exit"))
@@ -36,14 +40,18 @@ def test_responsive_agent(monkeypatch):
) # send a dict
# send dict with no openai fields
pre_len = len(dummy_agent_1.oai_conversations["dummy_agent_2"])
pre_len = len(dummy_agent_1.chat_messages["dummy_agent_2"])
with pytest.raises(ValueError):
dummy_agent_1.send({"message": "hello"}, dummy_agent_2)
assert pre_len == len(
dummy_agent_1.oai_conversations["dummy_agent_2"]
dummy_agent_1.chat_messages["dummy_agent_2"]
), "When the message is not a valid openai message, it should not be appended to the oai conversation."
# update system message
dummy_agent_1.update_system_message("new system message")
assert dummy_agent_1._oai_system_message[0]["content"] == "new system message"
if __name__ == "__main__":
test_responsive_agent(pytest.monkeypatch)

View File

@@ -5,30 +5,28 @@ import pytest
from functools import partial
import os
import json
from flaml import oai
from flaml import autogen
from flaml.autogen.code_utils import (
eval_function_completions,
generate_assertions,
implement,
generate_code,
improve_function,
improve_code,
)
from flaml.autogen.math_utils import eval_math_responses, solve_problem
KEY_LOC = "test/autogen"
KEY_LOC = "notebook"
OAI_CONFIG_LIST = "OAI_CONFIG_LIST"
here = os.path.abspath(os.path.dirname(__file__))
def yes_or_no_filter(context, response, **_):
return context.get("yes_or_no_choice", False) is False or any(
text in ["Yes.", "No."] for text in oai.Completion.extract_text(response)
text in ["Yes.", "No."] for text in autogen.Completion.extract_text(response)
)
def valid_json_filter(response, **_):
for text in oai.Completion.extract_text(response):
for text in autogen.Completion.extract_text(response):
try:
json.loads(text)
return True
@@ -43,47 +41,47 @@ def test_filter():
except ImportError as exc:
print(exc)
return
response = oai.Completion.create(
response = autogen.Completion.create(
context={"yes_or_no_choice": True},
config_list=[{"model": "text-ada-001"}, {"model": "gpt-3.5-turbo"}, {"model": "text-davinci-003"}],
prompt="Is 37 a prime number? Please answer 'Yes.' or 'No.'",
filter_func=yes_or_no_filter,
)
assert (
oai.Completion.extract_text(response)[0] in ["Yes.", "No."]
autogen.Completion.extract_text(response)[0] in ["Yes.", "No."]
or not response["pass_filter"]
and response["config_id"] == 2
)
response = oai.Completion.create(
response = autogen.Completion.create(
context={"yes_or_no_choice": False},
config_list=[{"model": "text-ada-001"}, {"model": "gpt-3.5-turbo"}, {"model": "text-davinci-003"}],
prompt="Is 37 a prime number?",
filter_func=yes_or_no_filter,
)
assert response["model"] == "text-ada-001"
response = oai.Completion.create(
response = autogen.Completion.create(
config_list=[{"model": "text-ada-001"}, {"model": "gpt-3.5-turbo"}, {"model": "text-davinci-003"}],
prompt="How to construct a json request to Bing API to search for 'latest AI news'? Return the JSON request.",
filter_func=valid_json_filter,
)
assert response["config_id"] == 2 or response["pass_filter"], "the response must pass filter unless all fail"
assert not response["pass_filter"] or json.loads(oai.Completion.extract_text(response)[0])
assert not response["pass_filter"] or json.loads(autogen.Completion.extract_text(response)[0])
def test_chatcompletion():
params = oai.ChatCompletion._construct_params(
params = autogen.ChatCompletion._construct_params(
context=None,
config={"model": "unknown"},
prompt="hi",
)
assert "messages" in params
params = oai.Completion._construct_params(
params = autogen.Completion._construct_params(
context=None,
config={"model": "unknown"},
prompt="hi",
)
assert "messages" not in params
params = oai.Completion._construct_params(
params = autogen.Completion._construct_params(
context=None,
config={"model": "gpt-4"},
prompt="hi",
@@ -97,46 +95,13 @@ def test_multi_model():
except ImportError as exc:
print(exc)
return
response = oai.Completion.create(
config_list=oai.config_list_gpt4_gpt35(KEY_LOC),
response = autogen.Completion.create(
config_list=autogen.config_list_gpt4_gpt35(KEY_LOC),
prompt="Hi",
)
print(response)
def test_improve():
try:
import openai
import diskcache
except ImportError as exc:
print(exc)
return
config_list = oai.config_list_openai_aoai(KEY_LOC)
improved, _ = improve_function(
"flaml/autogen/math_utils.py",
"solve_problem",
"Solve math problems accurately, by avoiding calculation errors and reduce reasoning errors.",
config_list=config_list,
)
with open(f"{here}/math_utils.py.improved", "w") as f:
f.write(improved)
suggestion, _ = improve_code(
["flaml/autogen/code_utils.py", "flaml/autogen/math_utils.py"],
"leverage generative AI smartly and cost-effectively",
config_list=config_list,
)
print(suggestion)
improvement, cost = improve_code(
["flaml/autogen/code_utils.py", "flaml/autogen/math_utils.py"],
"leverage generative AI smartly and cost-effectively",
suggest_only=False,
config_list=config_list,
)
print(cost)
with open(f"{here}/suggested_improvement.txt", "w") as f:
f.write(improvement)
def test_nocontext():
try:
import openai
@@ -144,12 +109,12 @@ def test_nocontext():
except ImportError as exc:
print(exc)
return
response = oai.Completion.create(
response = autogen.Completion.create(
model="text-ada-001", prompt="1+1=", max_tokens=1, use_cache=False, request_timeout=10
)
print(response)
code, _ = generate_code(
config_list=oai.config_list_from_json(
config_list=autogen.config_list_from_json(
OAI_CONFIG_LIST,
file_location=KEY_LOC,
filter_dict={
@@ -175,7 +140,7 @@ def test_nocontext():
)
print(code)
solution, cost = solve_problem("1+1=", config_list=oai.config_list_gpt4_gpt35(KEY_LOC))
solution, cost = solve_problem("1+1=", config_list=autogen.config_list_gpt4_gpt35(KEY_LOC))
print(solution, cost)
@@ -184,7 +149,7 @@ def test_nocontext():
reason="do not run on windows",
)
def test_humaneval(num_samples=1):
gpt35_config_list = oai.config_list_from_json(
gpt35_config_list = autogen.config_list_from_json(
env_or_file="OAI_CONFIG_LIST",
filter_dict={
"model": {
@@ -221,17 +186,17 @@ def test_humaneval(num_samples=1):
}
for x in range(n_tune_data, len(data))
]
oai.Completion.clear_cache(cache_path_root="{here}/cache")
oai.Completion.set_cache(seed)
autogen.Completion.clear_cache(cache_path_root="{here}/cache")
autogen.Completion.set_cache(seed)
try:
import openai
import diskcache
except ImportError as exc:
print(exc)
return
oai.Completion.clear_cache(400)
autogen.Completion.clear_cache(400)
# no error should be raised
response = oai.Completion.create(
response = autogen.Completion.create(
context=test_data[0],
config_list=[{"model": "gpt-3.5-turbo"}],
prompt="",
@@ -241,7 +206,7 @@ def test_humaneval(num_samples=1):
)
# assert response == -1
# a minimal tuning example
config, _ = oai.Completion.tune(
config, _ = autogen.Completion.tune(
data=tune_data,
metric="success",
mode="max",
@@ -249,9 +214,9 @@ def test_humaneval(num_samples=1):
n=1,
prompt="{definition}",
)
response = oai.Completion.create(context=test_data[0], **config)
response = autogen.Completion.create(context=test_data[0], **config)
# a minimal tuning example for tuning chat completion models using the Completion class
config, _ = oai.Completion.tune(
config, _ = autogen.Completion.tune(
data=tune_data,
metric="succeed_assertions",
mode="max",
@@ -260,10 +225,10 @@ def test_humaneval(num_samples=1):
model="text-davinci-003",
prompt="{definition}",
)
response = oai.Completion.create(context=test_data[0], **config)
response = autogen.Completion.create(context=test_data[0], **config)
# a minimal tuning example for tuning chat completion models using the ChatCompletion class
config_list = oai.config_list_openai_aoai(KEY_LOC)
config, _ = oai.ChatCompletion.tune(
config_list = autogen.config_list_openai_aoai(KEY_LOC)
config, _ = autogen.ChatCompletion.tune(
data=tune_data,
metric="expected_success",
mode="max",
@@ -272,7 +237,7 @@ def test_humaneval(num_samples=1):
messages=[{"role": "user", "content": "{definition}"}],
config_list=config_list,
)
response = oai.ChatCompletion.create(context=test_data[0], config_list=config_list, **config)
response = autogen.ChatCompletion.create(context=test_data[0], config_list=config_list, **config)
print(response)
from openai.error import RateLimitError
@@ -289,7 +254,7 @@ def test_humaneval(num_samples=1):
assert selected == 0
print(eval_function_completions([code], **tune_data[1]))
# a more comprehensive tuning example
config2, analysis = oai.Completion.tune(
config2, analysis = autogen.Completion.tune(
data=tune_data,
metric="success",
mode="max",
@@ -310,12 +275,12 @@ def test_humaneval(num_samples=1):
print(config2)
print(analysis.best_result)
print(test_data[0])
response = oai.Completion.create(context=test_data[0], **config2)
response = autogen.Completion.create(context=test_data[0], **config2)
print(response)
oai.Completion.data = test_data[:num_samples]
result = oai.Completion._eval(analysis.best_config, prune=False, eval_only=True)
autogen.Completion.data = test_data[:num_samples]
result = autogen.Completion._eval(analysis.best_config, prune=False, eval_only=True)
print("result without pruning", result)
result = oai.Completion.test(test_data[:num_samples], **config2)
result = autogen.Completion.test(test_data[:num_samples], **config2)
print(result)
try:
code, cost, selected = implement(
@@ -376,7 +341,7 @@ def test_math(num_samples=-1):
% data["problem"]
]
oai.Completion.set_cache(seed)
autogen.Completion.set_cache(seed)
vanilla_config = {
"model": "text-davinci-003",
"temperature": 1,
@@ -386,8 +351,8 @@ def test_math(num_samples=-1):
"stop": "###",
}
test_data_sample = test_data[0:3]
result = oai.Completion.test(test_data_sample, eval_math_responses, **vanilla_config)
result = oai.Completion.test(
result = autogen.Completion.test(test_data_sample, eval_math_responses, **vanilla_config)
result = autogen.Completion.test(
test_data_sample,
eval_math_responses,
agg_method="median",
@@ -400,13 +365,13 @@ def test_math(num_samples=-1):
def my_average(results):
return np.mean(results)
result = oai.Completion.test(
result = autogen.Completion.test(
test_data_sample,
eval_math_responses,
agg_method=my_median,
**vanilla_config,
)
result = oai.Completion.test(
result = autogen.Completion.test(
test_data_sample,
eval_math_responses,
agg_method={
@@ -420,7 +385,7 @@ def test_math(num_samples=-1):
print(result)
config, _ = oai.Completion.tune(
config, _ = autogen.Completion.tune(
data=tune_data, # the data for tuning
metric="expected_success", # the metric to optimize
mode="max", # the optimization mode
@@ -433,7 +398,7 @@ def test_math(num_samples=-1):
stop="###", # the stop sequence
)
print("tuned config", config)
result = oai.Completion.test(test_data_sample, config_list=oai.config_list_openai_aoai(KEY_LOC), **config)
result = autogen.Completion.test(test_data_sample, config_list=autogen.config_list_openai_aoai(KEY_LOC), **config)
print("result from tuned config:", result)
print("empty responses", eval_math_responses([], None))
@@ -441,7 +406,7 @@ def test_math(num_samples=-1):
if __name__ == "__main__":
import openai
config_list = oai.config_list_openai_aoai(KEY_LOC)
config_list = autogen.config_list_openai_aoai(KEY_LOC)
assert len(config_list) >= 3, config_list
openai.api_key = os.environ["OPENAI_API_KEY"]

View File

@@ -1,22 +1,21 @@
import json
import os
from flaml import oai
KEY_LOC = "test/autogen"
from flaml import autogen
from test_completion import KEY_LOC, OAI_CONFIG_LIST
def test_config_list_from_json():
config_list = oai.config_list_gpt4_gpt35(key_file_path=KEY_LOC)
config_list = autogen.config_list_gpt4_gpt35(key_file_path=KEY_LOC)
json_file = os.path.join(KEY_LOC, "config_list_test.json")
with open(json_file, "w") as f:
json.dump(config_list, f, indent=4)
config_list_1 = oai.config_list_from_json(json_file)
config_list_1 = autogen.config_list_from_json(json_file)
assert config_list == config_list_1
os.environ["config_list_test"] = json.dumps(config_list)
config_list_2 = oai.config_list_from_json("config_list_test")
config_list_2 = autogen.config_list_from_json("config_list_test")
assert config_list == config_list_2
config_list_3 = oai.config_list_from_json(
"OAI_CONFIG_LIST", file_location=KEY_LOC, filter_dict={"model": ["gpt4", "gpt-4-32k"]}
config_list_3 = autogen.config_list_from_json(
OAI_CONFIG_LIST, file_location=KEY_LOC, filter_dict={"model": ["gpt4", "gpt-4-32k"]}
)
assert all(config.get("model") in ["gpt4", "gpt-4-32k"] for config in config_list_3)
del os.environ["config_list_test"]
@@ -24,7 +23,7 @@ def test_config_list_from_json():
def test_config_list_openai_aoai():
config_list = oai.config_list_openai_aoai(key_file_path=KEY_LOC)
config_list = autogen.config_list_openai_aoai(key_file_path=KEY_LOC)
assert all(config.get("api_type") in [None, "open_ai", "azure"] for config in config_list)

View File

@@ -1,11 +1,151 @@
import sys
import os
import pytest
from flaml.autogen.code_utils import UNKNOWN, extract_code, execute_code, infer_lang
from flaml import autogen
from flaml.autogen.code_utils import (
UNKNOWN,
extract_code,
execute_code,
infer_lang,
improve_code,
improve_function,
)
KEY_LOC = "notebook"
OAI_CONFIG_LIST = "OAI_CONFIG_LIST"
here = os.path.abspath(os.path.dirname(__file__))
# def test_find_code():
# try:
# import openai
# except ImportError:
# return
# # need gpt-4 for this task
# config_list = autogen.config_list_from_json(
# OAI_CONFIG_LIST,
# file_location=KEY_LOC,
# filter_dict={
# "model": ["gpt-4", "gpt4", "gpt-4-32k", "gpt-4-32k-0314"],
# },
# )
# # config_list = autogen.config_list_from_json(
# # OAI_CONFIG_LIST,
# # file_location=KEY_LOC,
# # filter_dict={
# # "model": {
# # "gpt-3.5-turbo",
# # "gpt-3.5-turbo-16k",
# # "gpt-3.5-turbo-0301",
# # "chatgpt-35-turbo-0301",
# # "gpt-35-turbo-v0301",
# # },
# # },
# # )
# seed = 42
# messages = [
# {
# "role": "user",
# "content": "Print hello world to a file called hello.txt",
# },
# {
# "role": "user",
# "content": """
# # filename: write_hello.py
# ```
# with open('hello.txt', 'w') as f:
# f.write('Hello, World!')
# print('Hello, World! printed to hello.txt')
# ```
# Please execute the above Python code to print "Hello, World!" to a file called hello.txt and print the success message.
# """,
# },
# ]
# codeblocks, _ = find_code(messages, seed=seed, config_list=config_list)
# assert codeblocks[0][0] == "python", codeblocks
# messages += [
# {
# "role": "user",
# "content": """
# exitcode: 0 (execution succeeded)
# Code output:
# Hello, World! printed to hello.txt
# """,
# },
# {
# "role": "assistant",
# "content": "Great! Can I help you with anything else?",
# },
# ]
# codeblocks, content = find_code(messages, seed=seed, config_list=config_list)
# assert codeblocks[0][0] == "unknown", content
# messages += [
# {
# "role": "user",
# "content": "Save a pandas df with 3 rows and 3 columns to disk.",
# },
# {
# "role": "assistant",
# "content": """
# ```
# # filename: save_df.py
# import pandas as pd
# df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
# df.to_csv('df.csv')
# print('df saved to df.csv')
# ```
# Please execute the above Python code to save a pandas df with 3 rows and 3 columns to disk.
# Before you run the code above, run
# ```
# pip install pandas
# ```
# first to install pandas.
# """,
# },
# ]
# codeblocks, content = find_code(messages, seed=seed, config_list=config_list)
# assert (
# len(codeblocks) == 2
# and (codeblocks[0][0] == "sh"
# and codeblocks[1][0] == "python"
# or codeblocks[0][0] == "python"
# and codeblocks[1][0] == "sh")
# ), content
# messages += [
# {
# "role": "user",
# "content": "The code is unsafe to execute in my environment.",
# },
# {
# "role": "assistant",
# "content": "please run python write_hello.py",
# },
# ]
# # codeblocks, content = find_code(messages, config_list=config_list)
# # assert codeblocks[0][0] != "unknown", content
# # I'm sorry, but I cannot execute code from earlier messages. Please provide the code again if you would like me to execute it.
# messages[-1]["content"] = "please skip pip install pandas if you already have pandas installed"
# codeblocks, content = find_code(messages, seed=seed, config_list=config_list)
# assert codeblocks[0][0] != "sh", content
# messages += [
# {
# "role": "user",
# "content": "The code is still unsafe to execute in my environment.",
# },
# {
# "role": "assistant",
# "content": "Let me try something else. Do you have docker installed?",
# },
# ]
# codeblocks, content = find_code(messages, seed=seed, config_list=config_list)
# assert codeblocks[0][0] == "unknown", content
# print(content)
def test_infer_lang():
assert infer_lang("print('hello world')") == "python"
assert infer_lang("pip install flaml") == "sh"
@@ -59,12 +199,16 @@ def test_execute_code():
import docker
except ImportError as exc:
print(exc)
return
exitcode, msg, image = execute_code("print('hello world')", filename="tmp/codetest.py")
assert exitcode == 0 and msg == b"hello world\n", msg
docker = None
exit_code, msg, image = execute_code("print('hello world')", filename="tmp/codetest.py")
assert exit_code == 0 and msg == "hello world\n", msg
# read a file
print(execute_code("with open('tmp/codetest.py', 'r') as f: a=f.read()"))
# create a file
exit_code, msg, image = execute_code(
"with open('tmp/codetest.py', 'w') as f: f.write('b=1')", work_dir=f"{here}/my_tmp", filename="tmp2/codetest.py"
)
assert exit_code and 'File "tmp2/codetest.py"' in msg, msg
print(execute_code("with open('tmp/codetest.py', 'w') as f: f.write('b=1')", work_dir=f"{here}/my_tmp"))
# execute code in a file
print(execute_code(filename="tmp/codetest.py"))
@@ -72,20 +216,53 @@ def test_execute_code():
# execute code for assertion error
exit_code, msg, image = execute_code("assert 1==2")
assert exit_code, msg
assert 'File ""' in msg
# execute code which takes a long time
exit_code, error, image = execute_code("import time; time.sleep(2)", timeout=1)
assert exit_code and error.decode() == "Timeout"
assert isinstance(image, str)
assert exit_code and error == "Timeout"
assert isinstance(image, str) or docker is None or os.path.exists("/.dockerenv")
def test_execute_code_no_docker():
exit_code, error, image = execute_code("import time; time.sleep(2)", timeout=1, use_docker=False)
if sys.platform != "win32":
assert exit_code and error.decode() == "Timeout"
assert exit_code and error == "Timeout"
assert image is None
def test_improve():
try:
import openai
except ImportError:
return
config_list = autogen.config_list_openai_aoai(KEY_LOC)
improved, _ = improve_function(
"flaml/autogen/math_utils.py",
"solve_problem",
"Solve math problems accurately, by avoiding calculation errors and reduce reasoning errors.",
config_list=config_list,
)
with open(f"{here}/math_utils.py.improved", "w") as f:
f.write(improved)
suggestion, _ = improve_code(
["flaml/autogen/code_utils.py", "flaml/autogen/math_utils.py"],
"leverage generative AI smartly and cost-effectively",
config_list=config_list,
)
print(suggestion)
improvement, cost = improve_code(
["flaml/autogen/code_utils.py", "flaml/autogen/math_utils.py"],
"leverage generative AI smartly and cost-effectively",
suggest_only=False,
config_list=config_list,
)
print(cost)
with open(f"{here}/suggested_improvement.txt", "w") as f:
f.write(improvement)
if __name__ == "__main__":
# test_infer_lang()
# test_extract_code()
test_execute_code()
# test_find_code()

View File

@@ -4,15 +4,14 @@ except ImportError:
openai = None
import pytest
import json
from flaml import oai
from flaml import autogen
from flaml.autogen.math_utils import eval_math_responses
KEY_LOC = "test/autogen"
from test_code import KEY_LOC
@pytest.mark.skipif(openai is None, reason="openai not installed")
def test_eval_math_responses():
config_list = oai.config_list_from_models(
config_list = autogen.config_list_from_models(
KEY_LOC, exclude="aoai", model_list=["gpt-4-0613", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k"]
)
functions = [
@@ -36,7 +35,7 @@ def test_eval_math_responses():
},
},
]
response = oai.ChatCompletion.create(
response = autogen.ChatCompletion.create(
config_list=config_list,
messages=[
{
@@ -47,7 +46,7 @@ def test_eval_math_responses():
functions=functions,
)
print(response)
responses = oai.ChatCompletion.extract_text_or_function_call(response)
responses = autogen.ChatCompletion.extract_text_or_function_call(response)
print(responses[0])
function_call = responses[0]["function_call"]
name, arguments = function_call["name"], json.loads(function_call["arguments"])