mirror of
https://github.com/microsoft/autogen.git
synced 2026-04-20 03:02:16 -04:00
added litellm+ollama cookbook (#509)
* added litellm+ollama cookbook * fix annotations * fix type * fix for ruff * fix for ruff * resolve comments * add to index.rst * reverting index.rst * fix to ignore type --------- Co-authored-by: prankur <prankurrusia@microsoft.com> Co-authored-by: Eric Zhu <ekzhu@users.noreply.github.com>
This commit is contained in:
@@ -0,0 +1,257 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Local LLMs with LiteLLM & Ollama\n",
|
||||
"\n",
|
||||
"In this notebook we'll create two agents, Joe and Cathy who like to tell jokes to each other. The agents will use locally running LLMs.\n",
|
||||
"\n",
|
||||
"Follow the guide at https://microsoft.github.io/autogen/docs/topics/non-openai-models/local-litellm-ollama/ to understand how to install LiteLLM and Ollama.\n",
|
||||
"\n",
|
||||
"We encourage going through the link, but if you're in a hurry and using Linux, run these: \n",
|
||||
" \n",
|
||||
"```\n",
|
||||
"curl -fsSL https://ollama.com/install.sh | sh\n",
|
||||
"\n",
|
||||
"ollama pull llama3:instruct\n",
|
||||
"\n",
|
||||
"pip install 'litellm[proxy]'\n",
|
||||
"litellm --model ollama/llama3:instruct\n",
|
||||
"``` \n",
|
||||
"\n",
|
||||
"This will run the proxy server and it will be available at 'http://0.0.0.0:4000/'."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To get started, let's import some classes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from dataclasses import dataclass\n",
|
||||
"from typing import List\n",
|
||||
"\n",
|
||||
"from autogen_core.base import MessageContext\n",
|
||||
"from autogen_core.components import DefaultTopicId, RoutedAgent, message_handler\n",
|
||||
"from autogen_core.components.code_executor import CodeExecutor, extract_markdown_code_blocks\n",
|
||||
"from autogen_core.components.models import (\n",
|
||||
" AssistantMessage,\n",
|
||||
" ChatCompletionClient,\n",
|
||||
" LLMMessage,\n",
|
||||
" SystemMessage,\n",
|
||||
" UserMessage,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Set up out local LLM model client."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from autogen_core.components.models import OpenAIChatCompletionClient\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_model_client() -> OpenAIChatCompletionClient:\n",
|
||||
" \"Mimic OpenAI API using Local LLM Server.\"\n",
|
||||
" return OpenAIChatCompletionClient(\n",
|
||||
" model=\"gpt-4o\", # Need to use one of the OpenAI models as a placeholder for now.\n",
|
||||
" api_key=\"NotRequiredSinceWeAreLocal\",\n",
|
||||
" base_url=\"http://0.0.0.0:4000\",\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Define a simple message class"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"@dataclass\n",
|
||||
"class Message:\n",
|
||||
" content: str"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, the Agent.\n",
|
||||
"\n",
|
||||
"We define the role of the Agent using the `SystemMessage` and set up a condition for termination."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from autogen_core.components.model_context import BufferedChatCompletionContext\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class Assistant(RoutedAgent):\n",
|
||||
" def __init__(self, name: str, model_client: ChatCompletionClient) -> None:\n",
|
||||
" super().__init__(\"An assistant agent.\")\n",
|
||||
" self._model_client = model_client\n",
|
||||
" self.name = name\n",
|
||||
" self.count = 0\n",
|
||||
" self._system_messages = [\n",
|
||||
" SystemMessage(\n",
|
||||
" content=f\"Your name is {name} and you are a part of a duo of comedians.\"\n",
|
||||
" \"You laugh when you find the joke funny, else reply 'I need to go now'.\",\n",
|
||||
" )\n",
|
||||
" ]\n",
|
||||
" self._model_context = BufferedChatCompletionContext(buffer_size=5)\n",
|
||||
"\n",
|
||||
" @message_handler\n",
|
||||
" async def handle_message(self, message: Message, ctx: MessageContext) -> None:\n",
|
||||
" self.count += 1\n",
|
||||
" await self._model_context.add_message(UserMessage(content=message.content, source=\"user\"))\n",
|
||||
" result = await self._model_client.create(self._system_messages + await self._model_context.get_messages())\n",
|
||||
"\n",
|
||||
" print(f\"\\n{self.name}: {message.content}\")\n",
|
||||
"\n",
|
||||
" if \"I need to go\".lower() in message.content.lower() or self.count > 2:\n",
|
||||
" return\n",
|
||||
"\n",
|
||||
" await self._model_context.add_message(AssistantMessage(content=result.content, source=\"assistant\")) # type: ignore\n",
|
||||
" await self.publish_message(Message(content=result.content), DefaultTopicId()) # type: ignore"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Set up the agents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from autogen_core.application import SingleThreadedAgentRuntime\n",
|
||||
"from autogen_core.base import AgentId\n",
|
||||
"from autogen_core.components import DefaultSubscription\n",
|
||||
"\n",
|
||||
"runtime = SingleThreadedAgentRuntime()\n",
|
||||
"\n",
|
||||
"cathy = await runtime.register(\n",
|
||||
" \"cathy\",\n",
|
||||
" lambda: Assistant(name=\"Cathy\", model_client=get_model_client()),\n",
|
||||
" subscriptions=lambda: [DefaultSubscription()],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"joe = await runtime.register(\n",
|
||||
" \"joe\",\n",
|
||||
" lambda: Assistant(name=\"Joe\", model_client=get_model_client()),\n",
|
||||
" subscriptions=lambda: [DefaultSubscription()],\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's run everything!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/tmp/ipykernel_1417357/2124203426.py:22: UserWarning: Resolved model mismatch: gpt-4o-2024-05-13 != ollama/llama3.1:8b. Model mapping may be incorrect.\n",
|
||||
" result = await self._model_client.create(self._system_messages + await self._model_context.get_messages())\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"Joe: Joe, tell me a joke.\n",
|
||||
"\n",
|
||||
"Cathy: Here's one:\n",
|
||||
"\n",
|
||||
"Why couldn't the bicycle stand up by itself?\n",
|
||||
"\n",
|
||||
"(waiting for your reaction...)\n",
|
||||
"\n",
|
||||
"Joe: *laughs* It's because it was two-tired! Ahahaha! That's a good one! I love it!\n",
|
||||
"\n",
|
||||
"Cathy: *roars with laughter* HAHAHAHA! Oh man, that's a classic! I'm glad you liked it! The setup is perfect and the punchline is just... *chuckles* Two-tired! I mean, come on! That's genius! We should definitely add that one to our act!\n",
|
||||
"\n",
|
||||
"Joe: I need to go now.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"runtime.start()\n",
|
||||
"await runtime.send_message(\n",
|
||||
" Message(\"Joe, tell me a joke.\"),\n",
|
||||
" recipient=AgentId(joe, \"default\"),\n",
|
||||
" sender=AgentId(cathy, \"default\"),\n",
|
||||
")\n",
|
||||
"await runtime.stop_when_idle()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "pyautogen",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.14"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
Reference in New Issue
Block a user