From 3f63db32b97296a3cf6c90fbf19c4ce11de32244 Mon Sep 17 00:00:00 2001
From: Mark Sze <66362098+marklysze@users.noreply.github.com>
Date: Mon, 1 Apr 2024 09:54:17 +1100
Subject: [PATCH] Added ability to specify 'role' field for select speaker
 messages for Group Chats (Replaces PR #2167) (#2199)

* Re-commit of code from PR (#2167) addressing #1861, due to wrong basing

* Update website/docs/topics/non-openai-models/best-tips-for-nonopenai-models.md

Co-authored-by: Eric Zhu <ekzhu@users.noreply.github.com>

* Removed unnecessary notebook images

* Update conversation-patterns.ipynb

Updated to include note about being applicable when auto.

* Updated to include checks that the role is not blank/None. Added tests.

* Changed try-except to use pytest

---------

Co-authored-by: Eric Zhu <ekzhu@users.noreply.github.com>
---
 autogen/agentchat/groupchat.py                | 12 +++-
 test/agentchat/test_groupchat.py              | 68 ++++++++++++++++++-
 .../best-tips-for-nonopenai-models.md         | 59 ++++++++++++++++
 .../non-openai-models/cloud-mistralai.ipynb   | 12 ++--
 .../docs/tutorial/conversation-patterns.ipynb | 23 +++++++
 5 files changed, 165 insertions(+), 9 deletions(-)
 create mode 100644 website/docs/topics/non-openai-models/best-tips-for-nonopenai-models.md

diff --git a/autogen/agentchat/groupchat.py b/autogen/agentchat/groupchat.py
index 6c0ecec90f..53beebf57f 100644
--- a/autogen/agentchat/groupchat.py
+++ b/autogen/agentchat/groupchat.py
@@ -61,6 +61,7 @@ class GroupChat:
         "clear history" phrase in user prompt. This is experimental feature.
         See description of GroupChatManager.clear_agents_history function for more info.
     - send_introductions: send a round of introductions at the start of the group chat, so agents know who they can speak to (default: False)
+    - role_for_select_speaker_messages: sets the role name for speaker selection when in 'auto' mode, typically 'user' or 'system'. (default: 'system')
     """
 
     agents: List[Agent]
@@ -74,6 +75,7 @@ class GroupChat:
     speaker_transitions_type: Literal["allowed", "disallowed", None] = None
     enable_clear_history: Optional[bool] = False
     send_introductions: bool = False
+    role_for_select_speaker_messages: Optional[str] = "system"
 
     _VALID_SPEAKER_SELECTION_METHODS = ["auto", "manual", "random", "round_robin"]
     _VALID_SPEAKER_TRANSITIONS_TYPE = ["allowed", "disallowed", None]
@@ -162,6 +164,9 @@ class GroupChat:
             agents=self.agents,
         )
 
+        if self.role_for_select_speaker_messages is None or len(self.role_for_select_speaker_messages) == 0:
+            raise ValueError("role_for_select_speaker_messages cannot be empty or None.")
+
     @property
     def agent_names(self) -> List[str]:
         """Return the names of the agents in the group chat."""
@@ -411,7 +416,7 @@ Then select the next role from {[agent.name for agent in agents]} to play. Only
             selected_agent = self.next_agent(last_speaker, graph_eligible_agents)
         elif speaker_selection_method.lower() == "random":
             selected_agent = self.random_select_speaker(graph_eligible_agents)
-        else:
+        else:  # auto
             selected_agent = None
             select_speaker_messages = self.messages.copy()
             # If last message is a tool call or function call, blank the call so the api doesn't throw
@@ -420,7 +425,10 @@ Then select the next role from {[agent.name for agent in agents]} to play. Only
             if select_speaker_messages[-1].get("tool_calls", False):
                 select_speaker_messages[-1] = dict(select_speaker_messages[-1], tool_calls=None)
             select_speaker_messages = select_speaker_messages + [
-                {"role": "system", "content": self.select_speaker_prompt(graph_eligible_agents)}
+                {
+                    "role": self.role_for_select_speaker_messages,
+                    "content": self.select_speaker_prompt(graph_eligible_agents),
+                }
             ]
         return selected_agent, graph_eligible_agents, select_speaker_messages
 
diff --git a/test/agentchat/test_groupchat.py b/test/agentchat/test_groupchat.py
index 7117582f2e..94d241585b 100755
--- a/test/agentchat/test_groupchat.py
+++ b/test/agentchat/test_groupchat.py
@@ -1176,6 +1176,71 @@ def test_custom_speaker_selection_overrides_transition_graph():
     assert "teamA_executor" in speakers
 
 
+def test_role_for_select_speaker_messages():
+    agent1 = autogen.ConversableAgent(
+        "alice",
+        max_consecutive_auto_reply=10,
+        human_input_mode="NEVER",
+        llm_config=False,
+        default_auto_reply="This is alice speaking.",
+    )
+    agent2 = autogen.ConversableAgent(
+        "bob",
+        max_consecutive_auto_reply=10,
+        human_input_mode="NEVER",
+        llm_config=False,
+        default_auto_reply="This is bob speaking.",
+    )
+
+    groupchat = autogen.GroupChat(
+        agents=[agent1, agent2],
+        messages=[{"role": "user", "content": "Let's have a chat!"}],
+        max_round=3,
+    )
+
+    # Run the select agents function to get the select speaker messages
+    selected_agent, agents, messages = groupchat._prepare_and_select_agents(agent1)
+
+    # Test default is "system"
+    assert len(messages) == 2
+    assert messages[-1]["role"] == "system"
+
+    # Test as "user"
+    groupchat.role_for_select_speaker_messages = "user"
+    selected_agent, agents, messages = groupchat._prepare_and_select_agents(agent1)
+
+    assert len(messages) == 2
+    assert messages[-1]["role"] == "user"
+
+    # Test as something unusual
+    groupchat.role_for_select_speaker_messages = "SockS"
+    selected_agent, agents, messages = groupchat._prepare_and_select_agents(agent1)
+
+    assert len(messages) == 2
+    assert messages[-1]["role"] == "SockS"
+
+    # Test empty string and None isn't accepted
+
+    # Test with empty strings
+    with pytest.raises(ValueError) as e:
+        groupchat = autogen.GroupChat(
+            agents=[agent1, agent2],
+            messages=[{"role": "user", "content": "Let's have a chat!"}],
+            max_round=3,
+            role_for_select_speaker_messages="",
+        )
+    assert "role_for_select_speaker_messages cannot be empty or None." in str(e.value)
+
+    with pytest.raises(ValueError) as e:
+        groupchat = autogen.GroupChat(
+            agents=[agent1, agent2],
+            messages=[{"role": "user", "content": "Let's have a chat!"}],
+            max_round=3,
+            role_for_select_speaker_messages=None,
+        )
+    assert "role_for_select_speaker_messages cannot be empty or None." in str(e.value)
+
+
 if __name__ == "__main__":
     # test_func_call_groupchat()
     # test_broadcast()
@@ -1190,5 +1255,6 @@ if __name__ == "__main__":
     # test_invalid_allow_repeat_speaker()
     # test_graceful_exit_before_max_round()
     # test_clear_agents_history()
-    test_custom_speaker_selection_overrides_transition_graph()
+    # test_custom_speaker_selection_overrides_transition_graph()
+    test_role_for_select_speaker_messages()
     # pass
diff --git a/website/docs/topics/non-openai-models/best-tips-for-nonopenai-models.md b/website/docs/topics/non-openai-models/best-tips-for-nonopenai-models.md
new file mode 100644
index 0000000000..0b3a325ff0
--- /dev/null
+++ b/website/docs/topics/non-openai-models/best-tips-for-nonopenai-models.md
@@ -0,0 +1,59 @@
+# Tips for Non-OpenAI Models
+
+Here are some tips for using non-OpenAI Models with AutoGen.
+
+## Finding the right model
+Every model will perform differently across the operations within your AutoGen
+setup, such as speaker selection, coding, function calling, content creation,
+etc. On the whole, larger models (13B+) perform better with following directions
+and providing more cohesive responses.
+
+Content creation can be performed by most models.
+
+Fine-tuned models can be great for very specific tasks, such as function calling
+and coding.
+
+Specific tasks, such as speaker selection in a Group Chat scenario, that require
+very accurate outputs can be a challenge with most open source/weight models. The
+use of chain-of-thought and/or few-shot prompting can help guide the LLM to provide
+the output in the format you want.
+
+## Validating your program
+Testing your AutoGen setup against a very large LLM, such as OpenAI's ChatGPT or
+Anthropic's Claude 3, can help validate your agent setup and configuration.
+
+Once a setup is performing as you want, you can replace the models for your agents
+with non-OpenAI models and iteratively tweak system messages, prompts, and model
+selection.
+
+## Chat template
+AutoGen utilises a set of chat messages for the conversation between AutoGen/user
+and LLMs. Each chat message has a role attribute that is typically `user`,
+`assistant`, or `system`.
+
+A chat template is applied during inference and some chat templates implement rules about
+what roles can be used in specific sequences of messages.
+
+For example, when using Mistral AI's API the last chat message must have a role of `user`.
+In a Group Chat scenario the message used to select the next speaker will have a role of
+`system` by default and the API will throw an exception for this step. To overcome this the
+GroupChat's constructor has a parameter called `role_for_select_speaker_messages` that can
+be used to change the role name to `user`.
+
+```python
+groupchat = autogen.GroupChat(
+    agents=[user_proxy, coder, pm],
+    messages=[],
+    max_round=12,
+    # Role for select speaker message will be set to 'user' instead of 'system'
+    role_for_select_speaker_messages='user',
+)
+```
+
+If the chat template associated with a model you want to use doesn't support the role
+sequence and names used in AutoGen you can modify the chat template. See an example of
+this on our [vLLM page](/docs/topics/non-openai-models/local-vllm#chat-template).
+
+## Discord
+Join AutoGen's [#alt-models](https://discord.com/channels/1153072414184452236/1201369716057440287)
+channel on their Discord and discuss non-OpenAI models and configurations.
diff --git a/website/docs/topics/non-openai-models/cloud-mistralai.ipynb b/website/docs/topics/non-openai-models/cloud-mistralai.ipynb
index 9c18d6d040..883cd46db6 100644
--- a/website/docs/topics/non-openai-models/cloud-mistralai.ipynb
+++ b/website/docs/topics/non-openai-models/cloud-mistralai.ipynb
@@ -31,7 +31,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Now you can set up the Mistral model you want to use."
+    "Now you can set up the Mistral model you want to use. See the list of [models here](https://docs.mistral.ai/platform/endpoints/)."
    ]
   },
   {
@@ -59,7 +59,7 @@
    "source": [
     "## Two-Agent Coding Example\n",
     "\n",
-    "In this example, we run a two-agent chat to count how many prime numbers between 1 and 10000 using coding."
+    "In this example, we run a two-agent chat to count the number of prime numbers between 1 and 10,000 using coding."
    ]
   },
   {
@@ -182,7 +182,7 @@
    "source": [
     "## Tool Call Example\n",
     "\n",
-    "In this example, instead of writing code, we will have two agent playing chess against each other using tool to make moves.\n",
+    "In this example, instead of writing code, we will have two agents playing chess against each other using tool calling to make moves.\n",
     "\n",
     "First install the `chess` package by running the following command:"
    ]
@@ -200,7 +200,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Write function for making a move."
+    "Write the function for making a move."
    ]
   },
   {
@@ -269,7 +269,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Register tools for the agents. See [tutorial chapter on tool use](/docs/tutorial/tool-use) \n",
+    "Register tools for the agents. See the [tutorial chapter on tool use](/docs/tutorial/tool-use) \n",
     "for more information."
    ]
   },
@@ -303,7 +303,7 @@
     "Register nested chats for the player agents.\n",
     "Nested chats allows each player agent to chat with the board proxy agent\n",
     "to make a move, before communicating with the other player agent.\n",
-    "See [nested chats tutorial chapter](/docs/tutorial/conversation-patterns#nested-chats)\n",
+    "See the [nested chats tutorial chapter](/docs/tutorial/conversation-patterns#nested-chats)\n",
     "for more information."
    ]
   },
diff --git a/website/docs/tutorial/conversation-patterns.ipynb b/website/docs/tutorial/conversation-patterns.ipynb
index a7d486e9f7..d032175e05 100644
--- a/website/docs/tutorial/conversation-patterns.ipynb
+++ b/website/docs/tutorial/conversation-patterns.ipynb
@@ -1183,6 +1183,29 @@
    ]
   },
   {
+    "cell_type": "markdown",
+    "metadata": {},
+    "source": [
+     "### Changing the select speaker role name\n",
+     "\n",
+     "As part of the Group chat process, when the select_speaker_method is set to 'auto' (the default value),\n",
+     "a select speaker message is sent to the LLM to determine the next speaker.\n",
+     "\n",
+     "Each message in the chat sequence has a `role` attribute that is typically `user`,\n",
+     "`assistant`, or `system`. The select speaker message is the last in the chat\n",
+     "sequence when used and, by default, has a role of `system`.\n",
+     "\n",
+     "When using some models, such as Mistral through Mistral.AI's API, the role on\n",
+     "the last message in the chat sequence has to be `user`.\n",
+     "\n",
+     "To change the default behaviour, Autogen provides a way to set the value of the\n",
+     "select speaker message's role to any string value by setting the\n",
+     "`role_for_select_speaker_messages` parameter in the GroupChat's constructor. The\n",
+     "default value is `system` and by setting it to `user` you can accommodate the\n",
+     "last message role requirement of Mistral.AI's API."
+    ]
+   },
+   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [