mirror of
https://github.com/microsoft/autogen.git
synced 2026-04-20 03:02:16 -04:00
raise error when msg is invalid; fix docstr; improve ResponsiveAgent; update doc and packaging; capture ipython output; find code blocks with llm when regex fails. (#1154)
* autogen.agent -> autogen.agentchat * bug fix in portfolio * notebook * timeout * timeout * infer lang; close #1150 * timeout * message context * context handling * add sender to generate_reply * clean up the receive function * move mathchat to contrib * contrib * last_message * Add OptiGuide: agent and notebook * Optiguide notebook: add figures and URL 1. figures and code points to remote URL 2. simplify the prompt for the interpreter, because all information is already in the chat history. * Update name: Agent -> GenericAgent * Update notebook * Rename: GenericAgent -> ResponsiveAgent * Rebase to autogen.agentchat * OptiGuide: Comment, sytle, and notebook updates * simplify optiguide * raise error when msg is invalid; fix docstr * allow return None for generate_reply() * update_system_message * test update_system_message * simplify optiguide * simplify optiguide * simplify optiguide * simplify optiguide * move test * add test and fix bug * doc update * doc update * doc update * color * optiguide * prompt * test danger case * packaging * docker * remove path in traceback * capture ipython output * simplify * find code blocks with llm * find code with llm * order * order * fix bug in context handling * print executing msg * print executing msg * test find code * test find code * disable find_code * default_auto_reply * default auto reply * remove optiguide * remove -e --------- Co-authored-by: Beibin Li <beibin79@gmail.com>
This commit is contained in:
@@ -1,10 +1,10 @@
|
||||
import os
|
||||
import sys
|
||||
import pytest
|
||||
from flaml import oai
|
||||
from flaml import autogen
|
||||
from flaml.autogen.agentchat import AssistantAgent, UserProxyAgent
|
||||
|
||||
KEY_LOC = "test/autogen"
|
||||
KEY_LOC = "notebook"
|
||||
OAI_CONFIG_LIST = "OAI_CONFIG_LIST"
|
||||
here = os.path.abspath(os.path.dirname(__file__))
|
||||
|
||||
@@ -20,16 +20,16 @@ def test_ai_user_proxy_agent():
|
||||
return
|
||||
|
||||
conversations = {}
|
||||
oai.ChatCompletion.start_logging(conversations)
|
||||
autogen.ChatCompletion.start_logging(conversations)
|
||||
|
||||
config_list = oai.config_list_from_json(
|
||||
config_list = autogen.config_list_from_json(
|
||||
OAI_CONFIG_LIST,
|
||||
file_location=KEY_LOC,
|
||||
)
|
||||
assistant = AssistantAgent(
|
||||
"assistant",
|
||||
system_message="You are a helpful assistant.",
|
||||
oai_config={
|
||||
llm_config={
|
||||
"request_timeout": 600,
|
||||
"seed": 42,
|
||||
"config_list": config_list,
|
||||
@@ -41,7 +41,7 @@ def test_ai_user_proxy_agent():
|
||||
human_input_mode="NEVER",
|
||||
max_consecutive_auto_reply=2,
|
||||
code_execution_config=False,
|
||||
oai_config={
|
||||
llm_config={
|
||||
"config_list": config_list,
|
||||
},
|
||||
# In the system message the "user" always refers to ther other agent.
|
||||
@@ -62,7 +62,7 @@ def test_gpt35(human_input_mode="NEVER", max_consecutive_auto_reply=5):
|
||||
import openai
|
||||
except ImportError:
|
||||
return
|
||||
config_list = oai.config_list_from_json(
|
||||
config_list = autogen.config_list_from_json(
|
||||
OAI_CONFIG_LIST,
|
||||
file_location=KEY_LOC,
|
||||
filter_dict={
|
||||
@@ -75,14 +75,14 @@ def test_gpt35(human_input_mode="NEVER", max_consecutive_auto_reply=5):
|
||||
},
|
||||
},
|
||||
)
|
||||
llm_config = {
|
||||
"seed": 42,
|
||||
"config_list": config_list,
|
||||
"max_tokens": 1024,
|
||||
}
|
||||
assistant = AssistantAgent(
|
||||
"coding_agent",
|
||||
oai_config={
|
||||
# "request_timeout": 600,
|
||||
"seed": 42,
|
||||
"config_list": config_list,
|
||||
"max_tokens": 1024,
|
||||
},
|
||||
llm_config=llm_config,
|
||||
)
|
||||
user = UserProxyAgent(
|
||||
"user",
|
||||
@@ -94,6 +94,8 @@ def test_gpt35(human_input_mode="NEVER", max_consecutive_auto_reply=5):
|
||||
"use_docker": "python:3",
|
||||
"timeout": 60,
|
||||
},
|
||||
llm_config=llm_config,
|
||||
system_message="""Reply TERMINATE to end the conversation.""",
|
||||
)
|
||||
user.initiate_chat(assistant, message="TERMINATE")
|
||||
# should terminate without sending any message
|
||||
@@ -115,16 +117,17 @@ def test_create_execute_script(human_input_mode="NEVER", max_consecutive_auto_re
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
config_list = oai.config_list_from_json(OAI_CONFIG_LIST, file_location=KEY_LOC)
|
||||
config_list = autogen.config_list_from_json(OAI_CONFIG_LIST, file_location=KEY_LOC)
|
||||
conversations = {}
|
||||
oai.ChatCompletion.start_logging(conversations)
|
||||
autogen.ChatCompletion.start_logging(conversations)
|
||||
llm_config = {
|
||||
"request_timeout": 600,
|
||||
"seed": 42,
|
||||
"config_list": config_list,
|
||||
}
|
||||
assistant = AssistantAgent(
|
||||
"assistant",
|
||||
oai_config={
|
||||
"request_timeout": 600,
|
||||
"seed": 42,
|
||||
"config_list": config_list,
|
||||
},
|
||||
llm_config=llm_config,
|
||||
)
|
||||
user = UserProxyAgent(
|
||||
"user",
|
||||
@@ -145,10 +148,10 @@ print('Hello world!')
|
||||
```""",
|
||||
)
|
||||
print(conversations)
|
||||
oai.ChatCompletion.start_logging(compact=False)
|
||||
autogen.ChatCompletion.start_logging(compact=False)
|
||||
user.send("""Execute temp.py""", assistant)
|
||||
print(oai.ChatCompletion.logged_history)
|
||||
oai.ChatCompletion.stop_logging()
|
||||
print(autogen.ChatCompletion.logged_history)
|
||||
autogen.ChatCompletion.stop_logging()
|
||||
|
||||
|
||||
def test_tsp(human_input_mode="NEVER", max_consecutive_auto_reply=10):
|
||||
@@ -157,7 +160,7 @@ def test_tsp(human_input_mode="NEVER", max_consecutive_auto_reply=10):
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
config_list = oai.config_list_from_json(
|
||||
config_list = autogen.config_list_from_json(
|
||||
OAI_CONFIG_LIST,
|
||||
file_location=KEY_LOC,
|
||||
filter_dict={
|
||||
@@ -179,19 +182,17 @@ def test_tsp(human_input_mode="NEVER", max_consecutive_auto_reply=10):
|
||||
def generate_init_message(self, question) -> str:
|
||||
return self._prompt.format(question=question)
|
||||
|
||||
oai.ChatCompletion.start_logging()
|
||||
assistant = AssistantAgent("assistant", oai_config={"temperature": 0, "config_list": config_list})
|
||||
autogen.ChatCompletion.start_logging()
|
||||
assistant = AssistantAgent("assistant", llm_config={"temperature": 0, "config_list": config_list})
|
||||
user = TSPUserProxyAgent(
|
||||
"user",
|
||||
code_execution_config={"work_dir": here},
|
||||
human_input_mode=human_input_mode,
|
||||
max_consecutive_auto_reply=max_consecutive_auto_reply,
|
||||
)
|
||||
# agent.receive(prompt.format(question=hard_questions[0]), user)
|
||||
# agent.receive(prompt.format(question=hard_questions[1]), user)
|
||||
user.initiate_chat(assistant, question=hard_questions[2])
|
||||
print(oai.ChatCompletion.logged_history)
|
||||
oai.ChatCompletion.stop_logging()
|
||||
print(autogen.ChatCompletion.logged_history)
|
||||
autogen.ChatCompletion.stop_logging()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
@@ -1,14 +1,12 @@
|
||||
from flaml import oai
|
||||
import pytest
|
||||
import sys
|
||||
from flaml import autogen
|
||||
from flaml.autogen.agentchat.contrib.math_user_proxy_agent import (
|
||||
MathUserProxyAgent,
|
||||
_remove_print,
|
||||
_add_print_to_last_line,
|
||||
)
|
||||
import pytest
|
||||
import sys
|
||||
|
||||
KEY_LOC = "test/autogen"
|
||||
OAI_CONFIG_LIST = "OAI_CONFIG_LIST"
|
||||
from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
@@ -24,9 +22,9 @@ def test_math_user_proxy_agent():
|
||||
from flaml.autogen.agentchat.assistant_agent import AssistantAgent
|
||||
|
||||
conversations = {}
|
||||
oai.ChatCompletion.start_logging(conversations)
|
||||
autogen.ChatCompletion.start_logging(conversations)
|
||||
|
||||
config_list = oai.config_list_from_json(
|
||||
config_list = autogen.config_list_from_json(
|
||||
OAI_CONFIG_LIST,
|
||||
file_location=KEY_LOC,
|
||||
filter_dict={
|
||||
@@ -36,7 +34,7 @@ def test_math_user_proxy_agent():
|
||||
assistant = AssistantAgent(
|
||||
"assistant",
|
||||
system_message="You are a helpful assistant.",
|
||||
oai_config={
|
||||
llm_config={
|
||||
"request_timeout": 600,
|
||||
"seed": 42,
|
||||
"config_list": config_list,
|
||||
@@ -13,16 +13,20 @@ def test_responsive_agent(monkeypatch):
|
||||
monkeypatch.setattr(sys, "stdin", StringIO("TERMINATE\n\n"))
|
||||
dummy_agent_1.receive(
|
||||
{
|
||||
"content": "hello",
|
||||
"content": "hello {name}",
|
||||
"context": {
|
||||
"name": "dummy_agent_2",
|
||||
},
|
||||
},
|
||||
dummy_agent_2,
|
||||
) # receive a dict
|
||||
|
||||
assert "context" in dummy_agent_1.chat_messages["dummy_agent_2"][-2]
|
||||
# receive dict without openai fields to be printed, such as "content", 'function_call'. There should be no error raised.
|
||||
pre_len = len(dummy_agent_1.oai_conversations["dummy_agent_2"])
|
||||
dummy_agent_1.receive({"message": "hello"}, dummy_agent_2)
|
||||
pre_len = len(dummy_agent_1.chat_messages["dummy_agent_2"])
|
||||
with pytest.raises(ValueError):
|
||||
dummy_agent_1.receive({"message": "hello"}, dummy_agent_2)
|
||||
assert pre_len == len(
|
||||
dummy_agent_1.oai_conversations["dummy_agent_2"]
|
||||
dummy_agent_1.chat_messages["dummy_agent_2"]
|
||||
), "When the message is not an valid openai message, it should not be appended to the oai conversation."
|
||||
|
||||
monkeypatch.setattr(sys, "stdin", StringIO("exit"))
|
||||
@@ -36,14 +40,18 @@ def test_responsive_agent(monkeypatch):
|
||||
) # send a dict
|
||||
|
||||
# send dict with no openai fields
|
||||
pre_len = len(dummy_agent_1.oai_conversations["dummy_agent_2"])
|
||||
pre_len = len(dummy_agent_1.chat_messages["dummy_agent_2"])
|
||||
with pytest.raises(ValueError):
|
||||
dummy_agent_1.send({"message": "hello"}, dummy_agent_2)
|
||||
|
||||
assert pre_len == len(
|
||||
dummy_agent_1.oai_conversations["dummy_agent_2"]
|
||||
dummy_agent_1.chat_messages["dummy_agent_2"]
|
||||
), "When the message is not a valid openai message, it should not be appended to the oai conversation."
|
||||
|
||||
# update system message
|
||||
dummy_agent_1.update_system_message("new system message")
|
||||
assert dummy_agent_1._oai_system_message[0]["content"] == "new system message"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_responsive_agent(pytest.monkeypatch)
|
||||
@@ -5,30 +5,28 @@ import pytest
|
||||
from functools import partial
|
||||
import os
|
||||
import json
|
||||
from flaml import oai
|
||||
from flaml import autogen
|
||||
from flaml.autogen.code_utils import (
|
||||
eval_function_completions,
|
||||
generate_assertions,
|
||||
implement,
|
||||
generate_code,
|
||||
improve_function,
|
||||
improve_code,
|
||||
)
|
||||
from flaml.autogen.math_utils import eval_math_responses, solve_problem
|
||||
|
||||
KEY_LOC = "test/autogen"
|
||||
KEY_LOC = "notebook"
|
||||
OAI_CONFIG_LIST = "OAI_CONFIG_LIST"
|
||||
here = os.path.abspath(os.path.dirname(__file__))
|
||||
|
||||
|
||||
def yes_or_no_filter(context, response, **_):
|
||||
return context.get("yes_or_no_choice", False) is False or any(
|
||||
text in ["Yes.", "No."] for text in oai.Completion.extract_text(response)
|
||||
text in ["Yes.", "No."] for text in autogen.Completion.extract_text(response)
|
||||
)
|
||||
|
||||
|
||||
def valid_json_filter(response, **_):
|
||||
for text in oai.Completion.extract_text(response):
|
||||
for text in autogen.Completion.extract_text(response):
|
||||
try:
|
||||
json.loads(text)
|
||||
return True
|
||||
@@ -43,47 +41,47 @@ def test_filter():
|
||||
except ImportError as exc:
|
||||
print(exc)
|
||||
return
|
||||
response = oai.Completion.create(
|
||||
response = autogen.Completion.create(
|
||||
context={"yes_or_no_choice": True},
|
||||
config_list=[{"model": "text-ada-001"}, {"model": "gpt-3.5-turbo"}, {"model": "text-davinci-003"}],
|
||||
prompt="Is 37 a prime number? Please answer 'Yes.' or 'No.'",
|
||||
filter_func=yes_or_no_filter,
|
||||
)
|
||||
assert (
|
||||
oai.Completion.extract_text(response)[0] in ["Yes.", "No."]
|
||||
autogen.Completion.extract_text(response)[0] in ["Yes.", "No."]
|
||||
or not response["pass_filter"]
|
||||
and response["config_id"] == 2
|
||||
)
|
||||
response = oai.Completion.create(
|
||||
response = autogen.Completion.create(
|
||||
context={"yes_or_no_choice": False},
|
||||
config_list=[{"model": "text-ada-001"}, {"model": "gpt-3.5-turbo"}, {"model": "text-davinci-003"}],
|
||||
prompt="Is 37 a prime number?",
|
||||
filter_func=yes_or_no_filter,
|
||||
)
|
||||
assert response["model"] == "text-ada-001"
|
||||
response = oai.Completion.create(
|
||||
response = autogen.Completion.create(
|
||||
config_list=[{"model": "text-ada-001"}, {"model": "gpt-3.5-turbo"}, {"model": "text-davinci-003"}],
|
||||
prompt="How to construct a json request to Bing API to search for 'latest AI news'? Return the JSON request.",
|
||||
filter_func=valid_json_filter,
|
||||
)
|
||||
assert response["config_id"] == 2 or response["pass_filter"], "the response must pass filter unless all fail"
|
||||
assert not response["pass_filter"] or json.loads(oai.Completion.extract_text(response)[0])
|
||||
assert not response["pass_filter"] or json.loads(autogen.Completion.extract_text(response)[0])
|
||||
|
||||
|
||||
def test_chatcompletion():
|
||||
params = oai.ChatCompletion._construct_params(
|
||||
params = autogen.ChatCompletion._construct_params(
|
||||
context=None,
|
||||
config={"model": "unknown"},
|
||||
prompt="hi",
|
||||
)
|
||||
assert "messages" in params
|
||||
params = oai.Completion._construct_params(
|
||||
params = autogen.Completion._construct_params(
|
||||
context=None,
|
||||
config={"model": "unknown"},
|
||||
prompt="hi",
|
||||
)
|
||||
assert "messages" not in params
|
||||
params = oai.Completion._construct_params(
|
||||
params = autogen.Completion._construct_params(
|
||||
context=None,
|
||||
config={"model": "gpt-4"},
|
||||
prompt="hi",
|
||||
@@ -97,46 +95,13 @@ def test_multi_model():
|
||||
except ImportError as exc:
|
||||
print(exc)
|
||||
return
|
||||
response = oai.Completion.create(
|
||||
config_list=oai.config_list_gpt4_gpt35(KEY_LOC),
|
||||
response = autogen.Completion.create(
|
||||
config_list=autogen.config_list_gpt4_gpt35(KEY_LOC),
|
||||
prompt="Hi",
|
||||
)
|
||||
print(response)
|
||||
|
||||
|
||||
def test_improve():
|
||||
try:
|
||||
import openai
|
||||
import diskcache
|
||||
except ImportError as exc:
|
||||
print(exc)
|
||||
return
|
||||
config_list = oai.config_list_openai_aoai(KEY_LOC)
|
||||
improved, _ = improve_function(
|
||||
"flaml/autogen/math_utils.py",
|
||||
"solve_problem",
|
||||
"Solve math problems accurately, by avoiding calculation errors and reduce reasoning errors.",
|
||||
config_list=config_list,
|
||||
)
|
||||
with open(f"{here}/math_utils.py.improved", "w") as f:
|
||||
f.write(improved)
|
||||
suggestion, _ = improve_code(
|
||||
["flaml/autogen/code_utils.py", "flaml/autogen/math_utils.py"],
|
||||
"leverage generative AI smartly and cost-effectively",
|
||||
config_list=config_list,
|
||||
)
|
||||
print(suggestion)
|
||||
improvement, cost = improve_code(
|
||||
["flaml/autogen/code_utils.py", "flaml/autogen/math_utils.py"],
|
||||
"leverage generative AI smartly and cost-effectively",
|
||||
suggest_only=False,
|
||||
config_list=config_list,
|
||||
)
|
||||
print(cost)
|
||||
with open(f"{here}/suggested_improvement.txt", "w") as f:
|
||||
f.write(improvement)
|
||||
|
||||
|
||||
def test_nocontext():
|
||||
try:
|
||||
import openai
|
||||
@@ -144,12 +109,12 @@ def test_nocontext():
|
||||
except ImportError as exc:
|
||||
print(exc)
|
||||
return
|
||||
response = oai.Completion.create(
|
||||
response = autogen.Completion.create(
|
||||
model="text-ada-001", prompt="1+1=", max_tokens=1, use_cache=False, request_timeout=10
|
||||
)
|
||||
print(response)
|
||||
code, _ = generate_code(
|
||||
config_list=oai.config_list_from_json(
|
||||
config_list=autogen.config_list_from_json(
|
||||
OAI_CONFIG_LIST,
|
||||
file_location=KEY_LOC,
|
||||
filter_dict={
|
||||
@@ -175,7 +140,7 @@ def test_nocontext():
|
||||
)
|
||||
print(code)
|
||||
|
||||
solution, cost = solve_problem("1+1=", config_list=oai.config_list_gpt4_gpt35(KEY_LOC))
|
||||
solution, cost = solve_problem("1+1=", config_list=autogen.config_list_gpt4_gpt35(KEY_LOC))
|
||||
print(solution, cost)
|
||||
|
||||
|
||||
@@ -184,7 +149,7 @@ def test_nocontext():
|
||||
reason="do not run on windows",
|
||||
)
|
||||
def test_humaneval(num_samples=1):
|
||||
gpt35_config_list = oai.config_list_from_json(
|
||||
gpt35_config_list = autogen.config_list_from_json(
|
||||
env_or_file="OAI_CONFIG_LIST",
|
||||
filter_dict={
|
||||
"model": {
|
||||
@@ -221,17 +186,17 @@ def test_humaneval(num_samples=1):
|
||||
}
|
||||
for x in range(n_tune_data, len(data))
|
||||
]
|
||||
oai.Completion.clear_cache(cache_path_root="{here}/cache")
|
||||
oai.Completion.set_cache(seed)
|
||||
autogen.Completion.clear_cache(cache_path_root="{here}/cache")
|
||||
autogen.Completion.set_cache(seed)
|
||||
try:
|
||||
import openai
|
||||
import diskcache
|
||||
except ImportError as exc:
|
||||
print(exc)
|
||||
return
|
||||
oai.Completion.clear_cache(400)
|
||||
autogen.Completion.clear_cache(400)
|
||||
# no error should be raised
|
||||
response = oai.Completion.create(
|
||||
response = autogen.Completion.create(
|
||||
context=test_data[0],
|
||||
config_list=[{"model": "gpt-3.5-turbo"}],
|
||||
prompt="",
|
||||
@@ -241,7 +206,7 @@ def test_humaneval(num_samples=1):
|
||||
)
|
||||
# assert response == -1
|
||||
# a minimal tuning example
|
||||
config, _ = oai.Completion.tune(
|
||||
config, _ = autogen.Completion.tune(
|
||||
data=tune_data,
|
||||
metric="success",
|
||||
mode="max",
|
||||
@@ -249,9 +214,9 @@ def test_humaneval(num_samples=1):
|
||||
n=1,
|
||||
prompt="{definition}",
|
||||
)
|
||||
response = oai.Completion.create(context=test_data[0], **config)
|
||||
response = autogen.Completion.create(context=test_data[0], **config)
|
||||
# a minimal tuning example for tuning chat completion models using the Completion class
|
||||
config, _ = oai.Completion.tune(
|
||||
config, _ = autogen.Completion.tune(
|
||||
data=tune_data,
|
||||
metric="succeed_assertions",
|
||||
mode="max",
|
||||
@@ -260,10 +225,10 @@ def test_humaneval(num_samples=1):
|
||||
model="text-davinci-003",
|
||||
prompt="{definition}",
|
||||
)
|
||||
response = oai.Completion.create(context=test_data[0], **config)
|
||||
response = autogen.Completion.create(context=test_data[0], **config)
|
||||
# a minimal tuning example for tuning chat completion models using the ChatCompletion class
|
||||
config_list = oai.config_list_openai_aoai(KEY_LOC)
|
||||
config, _ = oai.ChatCompletion.tune(
|
||||
config_list = autogen.config_list_openai_aoai(KEY_LOC)
|
||||
config, _ = autogen.ChatCompletion.tune(
|
||||
data=tune_data,
|
||||
metric="expected_success",
|
||||
mode="max",
|
||||
@@ -272,7 +237,7 @@ def test_humaneval(num_samples=1):
|
||||
messages=[{"role": "user", "content": "{definition}"}],
|
||||
config_list=config_list,
|
||||
)
|
||||
response = oai.ChatCompletion.create(context=test_data[0], config_list=config_list, **config)
|
||||
response = autogen.ChatCompletion.create(context=test_data[0], config_list=config_list, **config)
|
||||
print(response)
|
||||
from openai.error import RateLimitError
|
||||
|
||||
@@ -289,7 +254,7 @@ def test_humaneval(num_samples=1):
|
||||
assert selected == 0
|
||||
print(eval_function_completions([code], **tune_data[1]))
|
||||
# a more comprehensive tuning example
|
||||
config2, analysis = oai.Completion.tune(
|
||||
config2, analysis = autogen.Completion.tune(
|
||||
data=tune_data,
|
||||
metric="success",
|
||||
mode="max",
|
||||
@@ -310,12 +275,12 @@ def test_humaneval(num_samples=1):
|
||||
print(config2)
|
||||
print(analysis.best_result)
|
||||
print(test_data[0])
|
||||
response = oai.Completion.create(context=test_data[0], **config2)
|
||||
response = autogen.Completion.create(context=test_data[0], **config2)
|
||||
print(response)
|
||||
oai.Completion.data = test_data[:num_samples]
|
||||
result = oai.Completion._eval(analysis.best_config, prune=False, eval_only=True)
|
||||
autogen.Completion.data = test_data[:num_samples]
|
||||
result = autogen.Completion._eval(analysis.best_config, prune=False, eval_only=True)
|
||||
print("result without pruning", result)
|
||||
result = oai.Completion.test(test_data[:num_samples], **config2)
|
||||
result = autogen.Completion.test(test_data[:num_samples], **config2)
|
||||
print(result)
|
||||
try:
|
||||
code, cost, selected = implement(
|
||||
@@ -376,7 +341,7 @@ def test_math(num_samples=-1):
|
||||
% data["problem"]
|
||||
]
|
||||
|
||||
oai.Completion.set_cache(seed)
|
||||
autogen.Completion.set_cache(seed)
|
||||
vanilla_config = {
|
||||
"model": "text-davinci-003",
|
||||
"temperature": 1,
|
||||
@@ -386,8 +351,8 @@ def test_math(num_samples=-1):
|
||||
"stop": "###",
|
||||
}
|
||||
test_data_sample = test_data[0:3]
|
||||
result = oai.Completion.test(test_data_sample, eval_math_responses, **vanilla_config)
|
||||
result = oai.Completion.test(
|
||||
result = autogen.Completion.test(test_data_sample, eval_math_responses, **vanilla_config)
|
||||
result = autogen.Completion.test(
|
||||
test_data_sample,
|
||||
eval_math_responses,
|
||||
agg_method="median",
|
||||
@@ -400,13 +365,13 @@ def test_math(num_samples=-1):
|
||||
def my_average(results):
|
||||
return np.mean(results)
|
||||
|
||||
result = oai.Completion.test(
|
||||
result = autogen.Completion.test(
|
||||
test_data_sample,
|
||||
eval_math_responses,
|
||||
agg_method=my_median,
|
||||
**vanilla_config,
|
||||
)
|
||||
result = oai.Completion.test(
|
||||
result = autogen.Completion.test(
|
||||
test_data_sample,
|
||||
eval_math_responses,
|
||||
agg_method={
|
||||
@@ -420,7 +385,7 @@ def test_math(num_samples=-1):
|
||||
|
||||
print(result)
|
||||
|
||||
config, _ = oai.Completion.tune(
|
||||
config, _ = autogen.Completion.tune(
|
||||
data=tune_data, # the data for tuning
|
||||
metric="expected_success", # the metric to optimize
|
||||
mode="max", # the optimization mode
|
||||
@@ -433,7 +398,7 @@ def test_math(num_samples=-1):
|
||||
stop="###", # the stop sequence
|
||||
)
|
||||
print("tuned config", config)
|
||||
result = oai.Completion.test(test_data_sample, config_list=oai.config_list_openai_aoai(KEY_LOC), **config)
|
||||
result = autogen.Completion.test(test_data_sample, config_list=autogen.config_list_openai_aoai(KEY_LOC), **config)
|
||||
print("result from tuned config:", result)
|
||||
print("empty responses", eval_math_responses([], None))
|
||||
|
||||
@@ -441,7 +406,7 @@ def test_math(num_samples=-1):
|
||||
if __name__ == "__main__":
|
||||
import openai
|
||||
|
||||
config_list = oai.config_list_openai_aoai(KEY_LOC)
|
||||
config_list = autogen.config_list_openai_aoai(KEY_LOC)
|
||||
assert len(config_list) >= 3, config_list
|
||||
openai.api_key = os.environ["OPENAI_API_KEY"]
|
||||
|
||||
|
||||
@@ -1,22 +1,21 @@
|
||||
import json
|
||||
import os
|
||||
from flaml import oai
|
||||
|
||||
KEY_LOC = "test/autogen"
|
||||
from flaml import autogen
|
||||
from test_completion import KEY_LOC, OAI_CONFIG_LIST
|
||||
|
||||
|
||||
def test_config_list_from_json():
|
||||
config_list = oai.config_list_gpt4_gpt35(key_file_path=KEY_LOC)
|
||||
config_list = autogen.config_list_gpt4_gpt35(key_file_path=KEY_LOC)
|
||||
json_file = os.path.join(KEY_LOC, "config_list_test.json")
|
||||
with open(json_file, "w") as f:
|
||||
json.dump(config_list, f, indent=4)
|
||||
config_list_1 = oai.config_list_from_json(json_file)
|
||||
config_list_1 = autogen.config_list_from_json(json_file)
|
||||
assert config_list == config_list_1
|
||||
os.environ["config_list_test"] = json.dumps(config_list)
|
||||
config_list_2 = oai.config_list_from_json("config_list_test")
|
||||
config_list_2 = autogen.config_list_from_json("config_list_test")
|
||||
assert config_list == config_list_2
|
||||
config_list_3 = oai.config_list_from_json(
|
||||
"OAI_CONFIG_LIST", file_location=KEY_LOC, filter_dict={"model": ["gpt4", "gpt-4-32k"]}
|
||||
config_list_3 = autogen.config_list_from_json(
|
||||
OAI_CONFIG_LIST, file_location=KEY_LOC, filter_dict={"model": ["gpt4", "gpt-4-32k"]}
|
||||
)
|
||||
assert all(config.get("model") in ["gpt4", "gpt-4-32k"] for config in config_list_3)
|
||||
del os.environ["config_list_test"]
|
||||
@@ -24,7 +23,7 @@ def test_config_list_from_json():
|
||||
|
||||
|
||||
def test_config_list_openai_aoai():
|
||||
config_list = oai.config_list_openai_aoai(key_file_path=KEY_LOC)
|
||||
config_list = autogen.config_list_openai_aoai(key_file_path=KEY_LOC)
|
||||
assert all(config.get("api_type") in [None, "open_ai", "azure"] for config in config_list)
|
||||
|
||||
|
||||
|
||||
@@ -1,11 +1,151 @@
|
||||
import sys
|
||||
import os
|
||||
import pytest
|
||||
from flaml.autogen.code_utils import UNKNOWN, extract_code, execute_code, infer_lang
|
||||
from flaml import autogen
|
||||
from flaml.autogen.code_utils import (
|
||||
UNKNOWN,
|
||||
extract_code,
|
||||
execute_code,
|
||||
infer_lang,
|
||||
improve_code,
|
||||
improve_function,
|
||||
)
|
||||
|
||||
KEY_LOC = "notebook"
|
||||
OAI_CONFIG_LIST = "OAI_CONFIG_LIST"
|
||||
here = os.path.abspath(os.path.dirname(__file__))
|
||||
|
||||
|
||||
# def test_find_code():
|
||||
# try:
|
||||
# import openai
|
||||
# except ImportError:
|
||||
# return
|
||||
# # need gpt-4 for this task
|
||||
# config_list = autogen.config_list_from_json(
|
||||
# OAI_CONFIG_LIST,
|
||||
# file_location=KEY_LOC,
|
||||
# filter_dict={
|
||||
# "model": ["gpt-4", "gpt4", "gpt-4-32k", "gpt-4-32k-0314"],
|
||||
# },
|
||||
# )
|
||||
# # config_list = autogen.config_list_from_json(
|
||||
# # OAI_CONFIG_LIST,
|
||||
# # file_location=KEY_LOC,
|
||||
# # filter_dict={
|
||||
# # "model": {
|
||||
# # "gpt-3.5-turbo",
|
||||
# # "gpt-3.5-turbo-16k",
|
||||
# # "gpt-3.5-turbo-0301",
|
||||
# # "chatgpt-35-turbo-0301",
|
||||
# # "gpt-35-turbo-v0301",
|
||||
# # },
|
||||
# # },
|
||||
# # )
|
||||
# seed = 42
|
||||
# messages = [
|
||||
# {
|
||||
# "role": "user",
|
||||
# "content": "Print hello world to a file called hello.txt",
|
||||
# },
|
||||
# {
|
||||
# "role": "user",
|
||||
# "content": """
|
||||
# # filename: write_hello.py
|
||||
# ```
|
||||
# with open('hello.txt', 'w') as f:
|
||||
# f.write('Hello, World!')
|
||||
# print('Hello, World! printed to hello.txt')
|
||||
# ```
|
||||
# Please execute the above Python code to print "Hello, World!" to a file called hello.txt and print the success message.
|
||||
# """,
|
||||
# },
|
||||
# ]
|
||||
# codeblocks, _ = find_code(messages, seed=seed, config_list=config_list)
|
||||
# assert codeblocks[0][0] == "python", codeblocks
|
||||
# messages += [
|
||||
# {
|
||||
# "role": "user",
|
||||
# "content": """
|
||||
# exitcode: 0 (execution succeeded)
|
||||
# Code output:
|
||||
# Hello, World! printed to hello.txt
|
||||
# """,
|
||||
# },
|
||||
# {
|
||||
# "role": "assistant",
|
||||
# "content": "Great! Can I help you with anything else?",
|
||||
# },
|
||||
# ]
|
||||
# codeblocks, content = find_code(messages, seed=seed, config_list=config_list)
|
||||
# assert codeblocks[0][0] == "unknown", content
|
||||
# messages += [
|
||||
# {
|
||||
# "role": "user",
|
||||
# "content": "Save a pandas df with 3 rows and 3 columns to disk.",
|
||||
# },
|
||||
# {
|
||||
# "role": "assistant",
|
||||
# "content": """
|
||||
# ```
|
||||
# # filename: save_df.py
|
||||
# import pandas as pd
|
||||
|
||||
# df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
|
||||
# df.to_csv('df.csv')
|
||||
# print('df saved to df.csv')
|
||||
# ```
|
||||
# Please execute the above Python code to save a pandas df with 3 rows and 3 columns to disk.
|
||||
# Before you run the code above, run
|
||||
# ```
|
||||
# pip install pandas
|
||||
# ```
|
||||
# first to install pandas.
|
||||
# """,
|
||||
# },
|
||||
# ]
|
||||
# codeblocks, content = find_code(messages, seed=seed, config_list=config_list)
|
||||
# assert (
|
||||
# len(codeblocks) == 2
|
||||
# and (codeblocks[0][0] == "sh"
|
||||
# and codeblocks[1][0] == "python"
|
||||
# or codeblocks[0][0] == "python"
|
||||
# and codeblocks[1][0] == "sh")
|
||||
# ), content
|
||||
|
||||
# messages += [
|
||||
# {
|
||||
# "role": "user",
|
||||
# "content": "The code is unsafe to execute in my environment.",
|
||||
# },
|
||||
# {
|
||||
# "role": "assistant",
|
||||
# "content": "please run python write_hello.py",
|
||||
# },
|
||||
# ]
|
||||
# # codeblocks, content = find_code(messages, config_list=config_list)
|
||||
# # assert codeblocks[0][0] != "unknown", content
|
||||
# # I'm sorry, but I cannot execute code from earlier messages. Please provide the code again if you would like me to execute it.
|
||||
|
||||
# messages[-1]["content"] = "please skip pip install pandas if you already have pandas installed"
|
||||
# codeblocks, content = find_code(messages, seed=seed, config_list=config_list)
|
||||
# assert codeblocks[0][0] != "sh", content
|
||||
|
||||
# messages += [
|
||||
# {
|
||||
# "role": "user",
|
||||
# "content": "The code is still unsafe to execute in my environment.",
|
||||
# },
|
||||
# {
|
||||
# "role": "assistant",
|
||||
# "content": "Let me try something else. Do you have docker installed?",
|
||||
# },
|
||||
# ]
|
||||
# codeblocks, content = find_code(messages, seed=seed, config_list=config_list)
|
||||
# assert codeblocks[0][0] == "unknown", content
|
||||
# print(content)
|
||||
|
||||
|
||||
def test_infer_lang():
|
||||
assert infer_lang("print('hello world')") == "python"
|
||||
assert infer_lang("pip install flaml") == "sh"
|
||||
@@ -59,12 +199,16 @@ def test_execute_code():
|
||||
import docker
|
||||
except ImportError as exc:
|
||||
print(exc)
|
||||
return
|
||||
exitcode, msg, image = execute_code("print('hello world')", filename="tmp/codetest.py")
|
||||
assert exitcode == 0 and msg == b"hello world\n", msg
|
||||
docker = None
|
||||
exit_code, msg, image = execute_code("print('hello world')", filename="tmp/codetest.py")
|
||||
assert exit_code == 0 and msg == "hello world\n", msg
|
||||
# read a file
|
||||
print(execute_code("with open('tmp/codetest.py', 'r') as f: a=f.read()"))
|
||||
# create a file
|
||||
exit_code, msg, image = execute_code(
|
||||
"with open('tmp/codetest.py', 'w') as f: f.write('b=1')", work_dir=f"{here}/my_tmp", filename="tmp2/codetest.py"
|
||||
)
|
||||
assert exit_code and 'File "tmp2/codetest.py"' in msg, msg
|
||||
print(execute_code("with open('tmp/codetest.py', 'w') as f: f.write('b=1')", work_dir=f"{here}/my_tmp"))
|
||||
# execute code in a file
|
||||
print(execute_code(filename="tmp/codetest.py"))
|
||||
@@ -72,20 +216,53 @@ def test_execute_code():
|
||||
# execute code for assertion error
|
||||
exit_code, msg, image = execute_code("assert 1==2")
|
||||
assert exit_code, msg
|
||||
assert 'File ""' in msg
|
||||
# execute code which takes a long time
|
||||
exit_code, error, image = execute_code("import time; time.sleep(2)", timeout=1)
|
||||
assert exit_code and error.decode() == "Timeout"
|
||||
assert isinstance(image, str)
|
||||
assert exit_code and error == "Timeout"
|
||||
assert isinstance(image, str) or docker is None or os.path.exists("/.dockerenv")
|
||||
|
||||
|
||||
def test_execute_code_no_docker():
|
||||
exit_code, error, image = execute_code("import time; time.sleep(2)", timeout=1, use_docker=False)
|
||||
if sys.platform != "win32":
|
||||
assert exit_code and error.decode() == "Timeout"
|
||||
assert exit_code and error == "Timeout"
|
||||
assert image is None
|
||||
|
||||
|
||||
def test_improve():
|
||||
try:
|
||||
import openai
|
||||
except ImportError:
|
||||
return
|
||||
config_list = autogen.config_list_openai_aoai(KEY_LOC)
|
||||
improved, _ = improve_function(
|
||||
"flaml/autogen/math_utils.py",
|
||||
"solve_problem",
|
||||
"Solve math problems accurately, by avoiding calculation errors and reduce reasoning errors.",
|
||||
config_list=config_list,
|
||||
)
|
||||
with open(f"{here}/math_utils.py.improved", "w") as f:
|
||||
f.write(improved)
|
||||
suggestion, _ = improve_code(
|
||||
["flaml/autogen/code_utils.py", "flaml/autogen/math_utils.py"],
|
||||
"leverage generative AI smartly and cost-effectively",
|
||||
config_list=config_list,
|
||||
)
|
||||
print(suggestion)
|
||||
improvement, cost = improve_code(
|
||||
["flaml/autogen/code_utils.py", "flaml/autogen/math_utils.py"],
|
||||
"leverage generative AI smartly and cost-effectively",
|
||||
suggest_only=False,
|
||||
config_list=config_list,
|
||||
)
|
||||
print(cost)
|
||||
with open(f"{here}/suggested_improvement.txt", "w") as f:
|
||||
f.write(improvement)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# test_infer_lang()
|
||||
# test_extract_code()
|
||||
test_execute_code()
|
||||
# test_find_code()
|
||||
|
||||
@@ -4,15 +4,14 @@ except ImportError:
|
||||
openai = None
|
||||
import pytest
|
||||
import json
|
||||
from flaml import oai
|
||||
from flaml import autogen
|
||||
from flaml.autogen.math_utils import eval_math_responses
|
||||
|
||||
KEY_LOC = "test/autogen"
|
||||
from test_code import KEY_LOC
|
||||
|
||||
|
||||
@pytest.mark.skipif(openai is None, reason="openai not installed")
|
||||
def test_eval_math_responses():
|
||||
config_list = oai.config_list_from_models(
|
||||
config_list = autogen.config_list_from_models(
|
||||
KEY_LOC, exclude="aoai", model_list=["gpt-4-0613", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k"]
|
||||
)
|
||||
functions = [
|
||||
@@ -36,7 +35,7 @@ def test_eval_math_responses():
|
||||
},
|
||||
},
|
||||
]
|
||||
response = oai.ChatCompletion.create(
|
||||
response = autogen.ChatCompletion.create(
|
||||
config_list=config_list,
|
||||
messages=[
|
||||
{
|
||||
@@ -47,7 +46,7 @@ def test_eval_math_responses():
|
||||
functions=functions,
|
||||
)
|
||||
print(response)
|
||||
responses = oai.ChatCompletion.extract_text_or_function_call(response)
|
||||
responses = autogen.ChatCompletion.extract_text_or_function_call(response)
|
||||
print(responses[0])
|
||||
function_call = responses[0]["function_call"]
|
||||
name, arguments = function_call["name"], json.loads(function_call["arguments"])
|
||||
|
||||
Reference in New Issue
Block a user