mirror of
https://github.com/microsoft/autogen.git
synced 2026-04-20 03:02:16 -04:00
Using a more robust "reflection_with_llm" summary method (#1575)
* summary exception * badrequest error * test * skip reason * error * address func call in summary * reflection_with_llm enhancement and tests * remove old * update notebook * update notebook
This commit is contained in:
@@ -108,10 +108,12 @@ def test_agent_usage():
|
||||
)
|
||||
|
||||
math_problem = "$x^3=125$. What is x?"
|
||||
ai_user_proxy.initiate_chat(
|
||||
res = ai_user_proxy.initiate_chat(
|
||||
assistant,
|
||||
message=math_problem,
|
||||
summary_method="reflection_with_llm",
|
||||
)
|
||||
print("Result summary:", res.summary)
|
||||
|
||||
# test print
|
||||
captured_output = io.StringIO()
|
||||
|
||||
@@ -55,11 +55,12 @@ def test_ai_user_proxy_agent():
|
||||
assistant.reset()
|
||||
|
||||
math_problem = "$x^3=125$. What is x?"
|
||||
ai_user_proxy.initiate_chat(
|
||||
res = ai_user_proxy.initiate_chat(
|
||||
assistant,
|
||||
message=math_problem,
|
||||
)
|
||||
print(conversations)
|
||||
print("Result summary:", res.summary)
|
||||
|
||||
|
||||
@pytest.mark.skipif(skip, reason="openai not installed OR requested to skip")
|
||||
@@ -149,7 +150,7 @@ def test_create_execute_script(human_input_mode="NEVER", max_consecutive_auto_re
|
||||
max_consecutive_auto_reply=max_consecutive_auto_reply,
|
||||
is_termination_msg=lambda x: x.get("content", "").rstrip().endswith("TERMINATE"),
|
||||
)
|
||||
user.initiate_chat(
|
||||
res = user.initiate_chat(
|
||||
assistant,
|
||||
message="""Create a temp.py file with the following content:
|
||||
```
|
||||
@@ -157,12 +158,14 @@ print('Hello world!')
|
||||
```""",
|
||||
)
|
||||
print(conversations)
|
||||
print("Result summary:", res.summary)
|
||||
# autogen.ChatCompletion.print_usage_summary()
|
||||
# autogen.ChatCompletion.start_logging(compact=False)
|
||||
user.send("""Execute temp.py""", assistant)
|
||||
res = user.send("""Execute temp.py""", assistant)
|
||||
# print(autogen.ChatCompletion.logged_history)
|
||||
# autogen.ChatCompletion.print_usage_summary()
|
||||
# autogen.ChatCompletion.stop_logging()
|
||||
print("Execution result summary:", res.summary)
|
||||
|
||||
|
||||
@pytest.mark.skipif(skip, reason="openai not installed OR requested to skip")
|
||||
|
||||
@@ -153,14 +153,18 @@ async def test_stream():
|
||||
|
||||
user_proxy.register_reply(autogen.AssistantAgent, add_data_reply, position=2, config={"news_stream": data})
|
||||
|
||||
await user_proxy.a_initiate_chat(
|
||||
assistant,
|
||||
message="""Give me investment suggestion in 3 bullet points.""",
|
||||
chat_res = await user_proxy.a_initiate_chat(
|
||||
assistant, message="""Give me investment suggestion in 3 bullet points.""", summary_method="reflection_with_llm"
|
||||
)
|
||||
|
||||
print("Chat summary:", chat_res.summary)
|
||||
print("Chat cost:", chat_res.cost)
|
||||
|
||||
while not data_task.done() and not data_task.cancelled():
|
||||
reply = await user_proxy.a_generate_reply(sender=assistant)
|
||||
if reply is not None:
|
||||
await user_proxy.a_send(reply, assistant)
|
||||
res = await user_proxy.a_send(reply, assistant)
|
||||
print("Chat summary and cost:", res.summary, res.cost)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -38,6 +38,12 @@ async def test_async_get_human_input():
|
||||
|
||||
await user_proxy.a_initiate_chat(assistant, clear_history=True, message="Hello.")
|
||||
# Test without message
|
||||
await user_proxy.a_initiate_chat(assistant, clear_history=True)
|
||||
res = await user_proxy.a_initiate_chat(assistant, clear_history=True, summary_method="reflection_with_llm")
|
||||
# Assert that custom a_get_human_input was called at least once
|
||||
user_proxy.a_get_human_input.assert_called()
|
||||
print("Result summary:", res.summary)
|
||||
print("Human input:", res.human_input)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(test_async_get_human_input())
|
||||
|
||||
@@ -234,23 +234,27 @@ def test_update_function():
|
||||
},
|
||||
is_remove=False,
|
||||
)
|
||||
user_proxy.initiate_chat(
|
||||
res1 = user_proxy.initiate_chat(
|
||||
assistant,
|
||||
message="What functions do you know about in the context of this conversation? End your response with 'TERMINATE'.",
|
||||
summary_method="reflection_with_llm",
|
||||
)
|
||||
messages1 = assistant.chat_messages[user_proxy][-1]["content"]
|
||||
print(messages1)
|
||||
print("Chat summary and cost", res1.summary, res1.cost)
|
||||
|
||||
assistant.update_function_signature("greet_user", is_remove=True)
|
||||
user_proxy.initiate_chat(
|
||||
res2 = user_proxy.initiate_chat(
|
||||
assistant,
|
||||
message="What functions do you know about in the context of this conversation? End your response with 'TERMINATE'.",
|
||||
summary_method="reflection_with_llm",
|
||||
)
|
||||
messages2 = assistant.chat_messages[user_proxy][-1]["content"]
|
||||
print(messages2)
|
||||
# The model should know about the function in the context of the conversation
|
||||
assert "greet_user" in messages1
|
||||
assert "greet_user" not in messages2
|
||||
print("Chat summary and cost", res2.summary, res2.cost)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -95,10 +95,14 @@ async def test_function_call_groupchat(key, value, sync):
|
||||
manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=llm_config_no_function)
|
||||
|
||||
if sync:
|
||||
observer.initiate_chat(manager, message="Let's start the game!")
|
||||
res = observer.initiate_chat(manager, message="Let's start the game!", summary_method="reflection_with_llm")
|
||||
else:
|
||||
await observer.a_initiate_chat(manager, message="Let's start the game!")
|
||||
res = await observer.a_initiate_chat(
|
||||
manager, message="Let's start the game!", summary_method="reflection_with_llm"
|
||||
)
|
||||
assert func.call_count >= 1, "The function get_random_number should be called at least once."
|
||||
print("Chat summary:", res.summary)
|
||||
print("Chat cost:", res.cost)
|
||||
|
||||
|
||||
def test_no_function_map():
|
||||
|
||||
@@ -606,12 +606,15 @@ def test_clear_agents_history():
|
||||
|
||||
# testing pure "clear history" statement
|
||||
with mock.patch.object(builtins, "input", lambda _: "clear history. How you doing?"):
|
||||
agent1.initiate_chat(group_chat_manager, message="hello")
|
||||
res = agent1.initiate_chat(group_chat_manager, message="hello", summary_method="last_msg")
|
||||
agent1_history = list(agent1._oai_messages.values())[0]
|
||||
agent2_history = list(agent2._oai_messages.values())[0]
|
||||
assert agent1_history == [{"content": "How you doing?", "name": "sam", "role": "user"}]
|
||||
assert agent2_history == [{"content": "How you doing?", "name": "sam", "role": "user"}]
|
||||
assert groupchat.messages == [{"content": "How you doing?", "name": "sam", "role": "user"}]
|
||||
print("Chat summary", res.summary)
|
||||
print("Chat cost", res.cost)
|
||||
print("Chat history", res.chat_history)
|
||||
|
||||
# testing clear history for defined agent
|
||||
with mock.patch.object(builtins, "input", lambda _: "clear history bob. How you doing?"):
|
||||
|
||||
@@ -34,9 +34,14 @@ def test_get_human_input():
|
||||
|
||||
user_proxy.register_reply([autogen.Agent, None], autogen.ConversableAgent.a_check_termination_and_human_reply)
|
||||
|
||||
user_proxy.initiate_chat(assistant, clear_history=True, message="Hello.")
|
||||
res = user_proxy.initiate_chat(assistant, clear_history=True, message="Hello.")
|
||||
print("Result summary:", res.summary)
|
||||
print("Human input:", res.human_input)
|
||||
|
||||
# Test without supplying messages parameter
|
||||
user_proxy.initiate_chat(assistant, clear_history=True)
|
||||
res = user_proxy.initiate_chat(assistant, clear_history=True)
|
||||
print("Result summary:", res.summary)
|
||||
print("Human input:", res.human_input)
|
||||
|
||||
# Assert that custom_a_get_human_input was called at least once
|
||||
user_proxy.get_human_input.assert_called()
|
||||
|
||||
@@ -55,8 +55,10 @@ def test_math_user_proxy_agent():
|
||||
# message=mathproxyagent.generate_init_message(math_problem),
|
||||
# sender=mathproxyagent,
|
||||
# )
|
||||
mathproxyagent.initiate_chat(assistant, problem=math_problem)
|
||||
res = mathproxyagent.initiate_chat(assistant, problem=math_problem)
|
||||
print(conversations)
|
||||
print("Chat summary:", res.summary)
|
||||
print("Chat history:", res.chat_history)
|
||||
|
||||
|
||||
def test_add_remove_print():
|
||||
|
||||
@@ -165,23 +165,29 @@ def test_update_tool():
|
||||
},
|
||||
is_remove=False,
|
||||
)
|
||||
user_proxy.initiate_chat(
|
||||
res = user_proxy.initiate_chat(
|
||||
assistant,
|
||||
message="What functions do you know about in the context of this conversation? End your response with 'TERMINATE'.",
|
||||
)
|
||||
messages1 = assistant.chat_messages[user_proxy][-1]["content"]
|
||||
print(messages1)
|
||||
print("Message:", messages1)
|
||||
print("Summary:", res.summary)
|
||||
assert (
|
||||
messages1.replace("TERMINATE", "") == res.summary
|
||||
), "Message (removing TERMINATE) and summary should be the same"
|
||||
|
||||
assistant.update_tool_signature("greet_user", is_remove=True)
|
||||
user_proxy.initiate_chat(
|
||||
res = user_proxy.initiate_chat(
|
||||
assistant,
|
||||
message="What functions do you know about in the context of this conversation? End your response with 'TERMINATE'.",
|
||||
summary_method="reflection_with_llm",
|
||||
)
|
||||
messages2 = assistant.chat_messages[user_proxy][-1]["content"]
|
||||
print(messages2)
|
||||
print("Message2:", messages2)
|
||||
# The model should know about the function in the context of the conversation
|
||||
assert "greet_user" in messages1
|
||||
assert "greet_user" not in messages2
|
||||
print("Summary2:", res.summary)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not TOOL_ENABLED, reason="openai>=1.1.0 not installed")
|
||||
@@ -366,7 +372,7 @@ async def test_async_multi_tool_call():
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# test_update_tool()
|
||||
test_update_tool()
|
||||
# test_eval_math_responses()
|
||||
# test_multi_tool_call()
|
||||
test_eval_math_responses_api_style_function()
|
||||
# test_eval_math_responses_api_style_function()
|
||||
|
||||
Reference in New Issue
Block a user