From eb4a2618800ff8c03e996ee3ce0147d8a757e9bb Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Tue, 26 Mar 2024 00:10:23 -0400 Subject: [PATCH] Create generic LLM client using LiteLLM (#114) * add generic llm client * fix lint errors * fix lint issues * a potential suggestion for llm wrapper to keep all the function sigatures for ide * use completion partial * fix resp * remove unused args * add back truncation logic * fix add_event * fix merge issues * more merge issues fixed * fix codeact agent * remove dead code * remove import * unused imports * fix ruff * update requirements * mypy fixes * more lint fixes * fix browser errors * fix up observation conversion * fix format of error * change max iter default back to 100 * fix kill action * fix docker cleanup * add RUN_AS_DEVIN flag * fix condense * revert some files * unused imports --------- Co-authored-by: Xingyao Wang Co-authored-by: Robert Brennan --- agenthub/codeact_agent/__init__.py | 13 +-- agenthub/langchains_agent/__init__.py | 66 ++++------- agenthub/langchains_agent/utils/monologue.py | 15 +-- .../utils/{llm.py => prompts.py} | 104 ++++++++++-------- opendevin/agent.py | 11 +- opendevin/llm/llm.py | 21 ++++ opendevin/main.py | 7 +- opendevin/server/session.py | 8 +- 8 files changed, 126 insertions(+), 119 deletions(-) rename agenthub/langchains_agent/utils/{llm.py => prompts.py} (76%) create mode 100644 opendevin/llm/llm.py diff --git a/agenthub/codeact_agent/__init__.py b/agenthub/codeact_agent/__init__.py index 90ee2bf40d..aa5a2e4d0e 100644 --- a/agenthub/codeact_agent/__init__.py +++ b/agenthub/codeact_agent/__init__.py @@ -1,6 +1,5 @@ import os import re -from litellm import completion from termcolor import colored from typing import List, Mapping @@ -17,6 +16,7 @@ from opendevin.observation import ( AgentMessageObservation, ) +from opendevin.llm.llm import LLM assert ( "OPENAI_API_KEY" in os.environ @@ -62,7 +62,7 @@ def parse_response(response) -> str: class CodeActAgent(Agent): def __init__( self, - model_name: str + llm: LLM, ) -> None: """ Initializes a new instance of the CodeActAgent class. @@ -71,7 +71,7 @@ class CodeActAgent(Agent): - instruction (str): The instruction for the agent to execute. - max_steps (int): The maximum number of steps to run the agent. """ - super().__init__(model_name) + super().__init__(llm) self.messages: List[Mapping[str, str]] = [] self.instruction: str = "" @@ -83,13 +83,10 @@ class CodeActAgent(Agent): {"role": "user", "content": self.instruction}, ] print(colored("===USER:===\n" + self.instruction, "green")) - updated_info = state.updated_info - if updated_info: for prev_action, obs in updated_info: assert isinstance(prev_action, (CmdRunAction, AgentEchoAction)), "Expecting CmdRunAction or AgentEchoAction for Action" - if isinstance(obs, AgentMessageObservation): # warning message from itself self.messages.append({"role": "user", "content": obs.content}) print(colored("===USER:===\n" + obs.content, "green")) @@ -100,10 +97,8 @@ class CodeActAgent(Agent): print(colored("===ENV OBSERVATION:===\n" + content, "blue")) else: raise NotImplementedError(f"Unknown observation type: {obs.__class__}") - - response = completion( + response = self.llm.completion( messages=self.messages, - model=self.model_name, stop=[""], temperature=0.0, seed=42, diff --git a/agenthub/langchains_agent/__init__.py b/agenthub/langchains_agent/__init__.py index 5546eaf33e..825e4ee43a 100644 --- a/agenthub/langchains_agent/__init__.py +++ b/agenthub/langchains_agent/__init__.py @@ -1,9 +1,14 @@ -from typing import List, Dict, Type +from typing import List -import agenthub.langchains_agent.utils.llm as llm +from opendevin.llm.llm import LLM from opendevin.agent import Agent +from opendevin.state import State +from opendevin.action import Action +import agenthub.langchains_agent.utils.prompts as prompts +from agenthub.langchains_agent.utils.monologue import Monologue +from agenthub.langchains_agent.utils.memory import LongTermMemory + from opendevin.action import ( - Action, CmdRunAction, CmdKillAction, BrowseURLAction, @@ -14,15 +19,12 @@ from opendevin.action import ( AgentFinishAction, ) from opendevin.observation import ( - Observation, CmdOutputObservation, - BrowserOutputObservation, ) -from opendevin.state import State -from agenthub.langchains_agent.utils.monologue import Monologue -from agenthub.langchains_agent.utils.memory import LongTermMemory +MAX_MONOLOGUE_LENGTH = 20000 +MAX_OUTPUT_LENGTH = 5000 INITIAL_THOUGHTS = [ "I exist!", @@ -66,26 +68,12 @@ INITIAL_THOUGHTS = [ MAX_OUTPUT_LENGTH = 5000 MAX_MONOLOGUE_LENGTH = 20000 - -ACTION_TYPE_TO_CLASS: Dict[str, Type[Action]] = { - "run": CmdRunAction, - "kill": CmdKillAction, - "browse": BrowseURLAction, - "read": FileReadAction, - "write": FileWriteAction, - "recall": AgentRecallAction, - "think": AgentThinkAction, - "finish": AgentFinishAction, -} - -CLASS_TO_ACTION_TYPE: Dict[Type[Action], str] = {v: k for k, v in ACTION_TYPE_TO_CLASS.items()} - class LangchainsAgent(Agent): _initialized = False - def __init__(self, model_name: str): - super().__init__(model_name) - self.monologue = Monologue(self.model_name) + def __init__(self, llm: LLM): + super().__init__(llm) + self.monologue = Monologue() self.memory = LongTermMemory() def _add_event(self, event: dict): @@ -95,7 +83,7 @@ class LangchainsAgent(Agent): self.monologue.add_event(event) self.memory.add_event(event) if self.monologue.get_total_length() > MAX_MONOLOGUE_LENGTH: - self.monologue.condense() + self.monologue.condense(self.llm) def _initialize(self): if self._initialized: @@ -103,6 +91,8 @@ class LangchainsAgent(Agent): if self.instruction is None or self.instruction == "": raise ValueError("Instruction must be provided") + self.monologue = Monologue() + self.memory = LongTermMemory() next_is_output = False for thought in INITIAL_THOUGHTS: @@ -128,7 +118,7 @@ class LangchainsAgent(Agent): else: d = {"action": "think", "args": {"thought": thought}} - self._add_event(d) + self._add_event(d) self._initialized = True def step(self, state: State) -> Action: @@ -143,14 +133,8 @@ class LangchainsAgent(Agent): d = {"action": "error", "args": {"output": obs.content}} else: d = {"action": "output", "args": {"output": obs.content}} - # elif isinstance(obs, UserMessageObservation): - # d = {"action": "output", "args": {"output": obs.message}} - # elif isinstance(obs, AgentMessageObservation): - # d = {"action": "output", "args": {"output": obs.message}} - elif isinstance(obs, (BrowserOutputObservation, Observation)): - d = {"action": "output", "args": {"output": obs.content}} else: - raise NotImplementedError(f"Unknown observation type: {obs}") + d = {"action": "output", "args": {"output": obs.content}} self._add_event(d) @@ -175,18 +159,16 @@ class LangchainsAgent(Agent): self._add_event(d) state.updated_info = [] - - action_dict = llm.request_action( + + prompt = prompts.get_request_action_prompt( self.instruction, self.monologue.get_thoughts(), - self.model_name, state.background_commands_obs, ) - if action_dict is None: - action_dict = {"action": "think", "args": {"thought": "..."}} - - # Translate action_dict to Action - action = ACTION_TYPE_TO_CLASS[action_dict["action"]](**action_dict["args"]) + messages = [{"content": prompt,"role": "user"}] + resp = self.llm.completion(messages=messages) + action_resp = resp['choices'][0]['message']['content'] + action = prompts.parse_action_response(action_resp) self.latest_action = action return action diff --git a/agenthub/langchains_agent/utils/monologue.py b/agenthub/langchains_agent/utils/monologue.py index 99e0de6b67..74b2f8e97b 100644 --- a/agenthub/langchains_agent/utils/monologue.py +++ b/agenthub/langchains_agent/utils/monologue.py @@ -1,11 +1,10 @@ import agenthub.langchains_agent.utils.json as json -import agenthub.langchains_agent.utils.llm as llm +import agenthub.langchains_agent.utils.prompts as prompts class Monologue: - def __init__(self, model_name): + def __init__(self): self.thoughts = [] - self.model_name = model_name def add_event(self, t: dict): if not isinstance(t, dict): @@ -24,13 +23,11 @@ class Monologue: print(f"Error serializing thought: {e}") return total_length - def condense(self): + def condense(self, llm): try: - new_thoughts = llm.summarize_monologue(self.thoughts, self.model_name) - # Ensure new_thoughts is not empty or significantly malformed before assigning - if not new_thoughts or len(new_thoughts) > len(self.thoughts): - raise ValueError("Condensing resulted in invalid state.") - self.thoughts = new_thoughts + prompt = prompts.get_summarize_monologue_prompt(self.thoughts) + response = llm.prompt(prompt) + self.thoughts = prompts.parse_summary_response(response) except Exception as e: # Consider logging the error here instead of or in addition to raising an exception raise RuntimeError(f"Error condensing thoughts: {e}") diff --git a/agenthub/langchains_agent/utils/llm.py b/agenthub/langchains_agent/utils/prompts.py similarity index 76% rename from agenthub/langchains_agent/utils/llm.py rename to agenthub/langchains_agent/utils/prompts.py index d3885ad67f..0520cc6254 100644 --- a/agenthub/langchains_agent/utils/llm.py +++ b/agenthub/langchains_agent/utils/prompts.py @@ -1,23 +1,44 @@ import os -from . import json +from typing import List, Dict, Type + +from langchain_core.pydantic_v1 import BaseModel +from langchain.prompts import PromptTemplate +from langchain_core.output_parsers import JsonOutputParser if os.getenv("DEBUG"): from langchain.globals import set_debug - set_debug(True) -from typing import List -from langchain_core.pydantic_v1 import BaseModel +from . import json +from opendevin.action import ( + Action, + CmdRunAction, + CmdKillAction, + BrowseURLAction, + FileReadAction, + FileWriteAction, + AgentRecallAction, + AgentThinkAction, + AgentFinishAction, +) from opendevin.observation import ( CmdOutputObservation, ) -from langchain.chains import LLMChain -from langchain.prompts import PromptTemplate -from langchain_core.output_parsers import JsonOutputParser -from langchain_openai import ChatOpenAI + +ACTION_TYPE_TO_CLASS: Dict[str, Type[Action]] = { + "run": CmdRunAction, + "kill": CmdKillAction, + "browse": BrowseURLAction, + "read": FileReadAction, + "write": FileWriteAction, + "recall": AgentRecallAction, + "think": AgentThinkAction, + "finish": AgentFinishAction, +} +CLASS_TO_ACTION_TYPE: Dict[Type[Action], str] = {v: k for k, v in ACTION_TYPE_TO_CLASS.items()} ACTION_PROMPT = """ You're a thoughtful robot. Your main task is to {task}. @@ -103,36 +124,16 @@ class NewMonologue(BaseModel): new_monologue: List[_ActionDict] -def get_chain(template, model_name): - assert ( - "OPENAI_API_KEY" in os.environ - ), "Please set the OPENAI_API_KEY environment variable to use langchains_agent." - llm = ChatOpenAI(openai_api_key=os.getenv("OPENAI_API_KEY"), model_name=model_name) # type: ignore - prompt = PromptTemplate.from_template(template) - llm_chain = LLMChain(prompt=prompt, llm=llm) - return llm_chain +def get_summarize_monologue_prompt(thoughts): + prompt = PromptTemplate.from_template(MONOLOGUE_SUMMARY_PROMPT) + return prompt.format(monologue=json.dumps({'old_monologue': thoughts})) - -def summarize_monologue(thoughts: List[dict], model_name): - llm_chain = get_chain(MONOLOGUE_SUMMARY_PROMPT, model_name) - parser = JsonOutputParser(pydantic_object=NewMonologue) - resp = llm_chain.invoke({"monologue": json.dumps({"old_monologue": thoughts})}) - - if os.getenv("DEBUG"): - print("resp", resp) - parsed = parser.parse(resp["text"]) - return parsed["new_monologue"] - - -def request_action( - task, - thoughts: List[dict], - model_name: str, - background_commands_obs: List[CmdOutputObservation] = [], +def get_request_action_prompt( + task: str, + thoughts: List[dict], + background_commands_obs: List[CmdOutputObservation] = [], ): - llm_chain = get_chain(ACTION_PROMPT, model_name) - parser = JsonOutputParser(pydantic_object=_ActionDict) - hint = "" + hint = '' if len(thoughts) > 0: latest_thought = thoughts[-1] if latest_thought["action"] == 'think': @@ -149,17 +150,24 @@ def request_action( for command_obs in background_commands_obs: bg_commands_message += f"\n`{command_obs.command_id}`: {command_obs.command}" bg_commands_message += "\nYou can end any process by sending a `kill` action with the numerical `id` above." - latest_thought = thoughts[-1] - resp = llm_chain.invoke( - { - "monologue": json.dumps(thoughts), - "hint": hint, - "task": task, - "background_commands": bg_commands_message, - } + + prompt = PromptTemplate.from_template(ACTION_PROMPT) + return prompt.format( + task=task, + monologue=json.dumps(thoughts), + background_commands=bg_commands_message, + hint=hint, ) - if os.getenv("DEBUG"): - print("resp", resp) - parsed = parser.parse(resp["text"]) - return parsed + +def parse_action_response(response: str) -> Action: + parser = JsonOutputParser(pydantic_object=_ActionDict) + action_dict = parser.parse(response) + action = ACTION_TYPE_TO_CLASS[action_dict["action"]](**action_dict["args"]) + return action + +def parse_summary_response(response: str) -> List[Action]: + parser = JsonOutputParser(pydantic_object=NewMonologue) + parsed = parser.parse(response) + thoughts = [ACTION_TYPE_TO_CLASS[t['action']](**t['args']) for t in parsed['new_monologue']] + return thoughts diff --git a/opendevin/agent.py b/opendevin/agent.py index 09ede60cc7..916db4dc05 100644 --- a/opendevin/agent.py +++ b/opendevin/agent.py @@ -4,7 +4,7 @@ from typing import List, Dict, Type, TYPE_CHECKING if TYPE_CHECKING: from opendevin.action import Action from opendevin.state import State - +from opendevin.llm.llm import LLM class Agent(ABC): """ @@ -19,9 +19,12 @@ class Agent(ABC): _registry: Dict[str, Type["Agent"]] = {} - def __init__(self, model_name: str): - self.model_name = model_name - self.instruction: str = "" # need to be set before step + def __init__( + self, + llm: LLM, + ): + self.instruction = "" + self.llm = llm self._complete = False @property diff --git a/opendevin/llm/llm.py b/opendevin/llm/llm.py new file mode 100644 index 0000000000..040f28b69e --- /dev/null +++ b/opendevin/llm/llm.py @@ -0,0 +1,21 @@ +from litellm import completion as litellm_completion +from functools import partial +import os + +DEFAULT_MODEL = os.getenv("LLM_MODEL", "gpt-4-0125-preview") +DEFAULT_API_KEY = os.getenv("LLM_API_KEY") + +class LLM: + def __init__(self, model=DEFAULT_MODEL, api_key=DEFAULT_API_KEY): + self.model = model if model else DEFAULT_MODEL + self.api_key = api_key if api_key else DEFAULT_API_KEY + + self._completion = partial(litellm_completion, model=self.model, api_key=self.api_key) + + + @property + def completion(self): + """ + Decorator for the litellm completion function. + """ + return self._completion diff --git a/opendevin/main.py b/opendevin/main.py index dfca536400..7cdb5682b9 100644 --- a/opendevin/main.py +++ b/opendevin/main.py @@ -6,6 +6,7 @@ from typing import Type import agenthub # noqa F401 (we import this to get the agents registered) from opendevin.agent import Agent from opendevin.controller import AgentController +from opendevin.llm.llm import LLM if __name__ == "__main__": parser = argparse.ArgumentParser(description="Run an agent with a specific task") @@ -40,15 +41,15 @@ if __name__ == "__main__": parser.add_argument( "-i", "--max-iterations", - default=10, + default=100, type=int, help="The maximum number of iterations to run the agent", ) args = parser.parse_args() print(f"Running agent {args.agent_cls} (model: {args.model_name}, directory: {args.directory}) with task: \"{args.task}\"") - + llm = LLM(args.model_name) AgentCls: Type[Agent] = Agent.get_cls(args.agent_cls) - agent = AgentCls(model_name=args.model_name) + agent = AgentCls(llm=llm) controller = AgentController(agent, workdir=args.directory, max_iterations=args.max_iterations) asyncio.run(controller.start_loop(args.task)) diff --git a/opendevin/server/session.py b/opendevin/server/session.py index dc6675dbc9..49465b4ec0 100644 --- a/opendevin/server/session.py +++ b/opendevin/server/session.py @@ -6,6 +6,7 @@ from fastapi import WebSocketDisconnect from opendevin.agent import Agent from opendevin.controller import AgentController +from opendevin.llm.llm import LLM from opendevin.action import ( Action, @@ -121,15 +122,14 @@ class Session: model = "gpt-4-0125-preview" if start_event and "model" in start_event.args: model = start_event.args["model"] - if not os.path.exists(directory): print(f"Workspace directory {directory} does not exist. Creating it...") os.makedirs(directory) directory = os.path.relpath(directory, os.getcwd()) - + llm = LLM(model) AgentCls = Agent.get_cls(agent_cls) - self.agent = AgentCls(model_name=model) - self.controller = AgentController(self.agent, directory, callbacks=[self.on_agent_event]) + self.agent = AgentCls(llm) + self.controller = AgentController(self.agent, workdir=directory, callbacks=[self.on_agent_event]) await self.send({"action": "initialize", "message": "Control loop started."}) async def start_task(self, start_event):