From eb4a2618800ff8c03e996ee3ce0147d8a757e9bb Mon Sep 17 00:00:00 2001
From: Robert Brennan <accounts@rbren.io>
Date: Tue, 26 Mar 2024 00:10:23 -0400
Subject: [PATCH] Create generic LLM client using LiteLLM (#114)

* add generic llm client

* fix lint errors

* fix lint issues

* a potential suggestion for llm wrapper to keep all the function sigatures for ide

* use completion partial

* fix resp

* remove unused args

* add back truncation logic

* fix add_event

* fix merge issues

* more merge issues fixed

* fix codeact agent

* remove dead code

* remove import

* unused imports

* fix ruff

* update requirements

* mypy fixes

* more lint fixes

* fix browser errors

* fix up observation conversion

* fix format of error

* change max iter default back to 100

* fix kill action

* fix docker cleanup

* add RUN_AS_DEVIN flag

* fix condense

* revert some files

* unused imports

---------

Co-authored-by: Xingyao Wang <xingyao6@illinois.edu>
Co-authored-by: Robert Brennan <rbren@Roberts-MacBook-Pro.local>
---
 agenthub/codeact_agent/__init__.py            |  13 +--
 agenthub/langchains_agent/__init__.py         |  66 ++++-------
 agenthub/langchains_agent/utils/monologue.py  |  15 +--
 .../utils/{llm.py => prompts.py}              | 104 ++++++++++--------
 opendevin/agent.py                            |  11 +-
 opendevin/llm/llm.py                          |  21 ++++
 opendevin/main.py                             |   7 +-
 opendevin/server/session.py                   |   8 +-
 8 files changed, 126 insertions(+), 119 deletions(-)
 rename agenthub/langchains_agent/utils/{llm.py => prompts.py} (76%)
 create mode 100644 opendevin/llm/llm.py
diff --git a/agenthub/codeact_agent/__init__.py b/agenthub/codeact_agent/__init__.py
index 90ee2bf40d..aa5a2e4d0e 100644
--- a/agenthub/codeact_agent/__init__.py
+++ b/agenthub/codeact_agent/__init__.py
@@ -1,6 +1,5 @@
 import os
 import re
-from litellm import completion
 from termcolor import colored
 from typing import List, Mapping
 
@@ -17,6 +16,7 @@ from opendevin.observation import (
     AgentMessageObservation,
 )
 
+from opendevin.llm.llm import LLM
 
 assert (
     "OPENAI_API_KEY" in os.environ
@@ -62,7 +62,7 @@ def parse_response(response) -> str:
 class CodeActAgent(Agent):
     def __init__(
         self,
-        model_name: str
+        llm: LLM,
     ) -> None:
         """
         Initializes a new instance of the CodeActAgent class.
@@ -71,7 +71,7 @@ class CodeActAgent(Agent):
         - instruction (str): The instruction for the agent to execute.
         - max_steps (int): The maximum number of steps to run the agent.
         """
-        super().__init__(model_name)
+        super().__init__(llm)
         self.messages: List[Mapping[str, str]] = []
         self.instruction: str = ""
 
@@ -83,13 +83,10 @@ class CodeActAgent(Agent):
                 {"role": "user", "content": self.instruction},
             ]
             print(colored("===USER:===\n" + self.instruction, "green"))
-
         updated_info = state.updated_info
-
         if updated_info:
             for prev_action, obs in updated_info:
                 assert isinstance(prev_action, (CmdRunAction, AgentEchoAction)), "Expecting CmdRunAction or AgentEchoAction for Action"
-
                 if isinstance(obs, AgentMessageObservation):  # warning message from itself
                     self.messages.append({"role": "user", "content": obs.content})
                     print(colored("===USER:===\n" + obs.content, "green"))
@@ -100,10 +97,8 @@ class CodeActAgent(Agent):
                     print(colored("===ENV OBSERVATION:===\n" + content, "blue"))
                 else:
                     raise NotImplementedError(f"Unknown observation type: {obs.__class__}")
-
-        response = completion(
+        response = self.llm.completion(
             messages=self.messages,
-            model=self.model_name,
             stop=["</execute>"],
             temperature=0.0,
             seed=42,
diff --git a/agenthub/langchains_agent/__init__.py b/agenthub/langchains_agent/__init__.py
index 5546eaf33e..825e4ee43a 100644
--- a/agenthub/langchains_agent/__init__.py
+++ b/agenthub/langchains_agent/__init__.py
@@ -1,9 +1,14 @@
-from typing import List, Dict, Type
+from typing import List
 
-import agenthub.langchains_agent.utils.llm as llm
+from opendevin.llm.llm import LLM
 from opendevin.agent import Agent
+from opendevin.state import State
+from opendevin.action import Action
+import agenthub.langchains_agent.utils.prompts as prompts
+from agenthub.langchains_agent.utils.monologue import Monologue
+from agenthub.langchains_agent.utils.memory import LongTermMemory
+
 from opendevin.action import (
-    Action,
     CmdRunAction,
     CmdKillAction,
     BrowseURLAction,
@@ -14,15 +19,12 @@ from opendevin.action import (
     AgentFinishAction,
 )
 from opendevin.observation import (
-    Observation,
     CmdOutputObservation,
-    BrowserOutputObservation,
 )
-from opendevin.state import State
 
-from agenthub.langchains_agent.utils.monologue import Monologue
-from agenthub.langchains_agent.utils.memory import LongTermMemory
 
+MAX_MONOLOGUE_LENGTH = 20000
+MAX_OUTPUT_LENGTH = 5000
 
 INITIAL_THOUGHTS = [
     "I exist!",
@@ -66,26 +68,12 @@ INITIAL_THOUGHTS = [
 MAX_OUTPUT_LENGTH = 5000
 MAX_MONOLOGUE_LENGTH = 20000
 
-
-ACTION_TYPE_TO_CLASS: Dict[str, Type[Action]] = {
-    "run": CmdRunAction,
-    "kill": CmdKillAction,
-    "browse": BrowseURLAction,
-    "read": FileReadAction,
-    "write": FileWriteAction,
-    "recall": AgentRecallAction,
-    "think": AgentThinkAction,
-    "finish": AgentFinishAction,
-}
-
-CLASS_TO_ACTION_TYPE: Dict[Type[Action], str] = {v: k for k, v in ACTION_TYPE_TO_CLASS.items()}
-
 class LangchainsAgent(Agent):
     _initialized = False
 
-    def __init__(self, model_name: str):
-        super().__init__(model_name)
-        self.monologue = Monologue(self.model_name)
+    def __init__(self, llm: LLM):
+        super().__init__(llm)
+        self.monologue = Monologue()
         self.memory = LongTermMemory()
 
     def _add_event(self, event: dict):
@@ -95,7 +83,7 @@ class LangchainsAgent(Agent):
         self.monologue.add_event(event)
         self.memory.add_event(event)
         if self.monologue.get_total_length() > MAX_MONOLOGUE_LENGTH:
-            self.monologue.condense()
+            self.monologue.condense(self.llm)
 
     def _initialize(self):
         if self._initialized:
@@ -103,6 +91,8 @@ class LangchainsAgent(Agent):
 
         if self.instruction is None or self.instruction == "":
             raise ValueError("Instruction must be provided")
+        self.monologue = Monologue()
+        self.memory = LongTermMemory()
 
         next_is_output = False
         for thought in INITIAL_THOUGHTS:
@@ -128,7 +118,7 @@ class LangchainsAgent(Agent):
                 else:
                     d = {"action": "think", "args": {"thought": thought}}
 
-        self._add_event(d)
+            self._add_event(d)
         self._initialized = True
 
     def step(self, state: State) -> Action:
@@ -143,14 +133,8 @@ class LangchainsAgent(Agent):
                     d = {"action": "error", "args": {"output": obs.content}}
                 else:
                     d = {"action": "output", "args": {"output": obs.content}}
-            # elif isinstance(obs, UserMessageObservation):
-            #     d = {"action": "output", "args": {"output": obs.message}}
-            # elif isinstance(obs, AgentMessageObservation):
-            #     d = {"action": "output", "args": {"output": obs.message}}
-            elif isinstance(obs, (BrowserOutputObservation, Observation)):
-                d = {"action": "output", "args": {"output": obs.content}}
             else:
-                raise NotImplementedError(f"Unknown observation type: {obs}")
+                d = {"action": "output", "args": {"output": obs.content}}
             self._add_event(d)
 
 
@@ -175,18 +159,16 @@ class LangchainsAgent(Agent):
             self._add_event(d)
 
         state.updated_info = []
-            
-        action_dict = llm.request_action(
+
+        prompt = prompts.get_request_action_prompt(
             self.instruction,
             self.monologue.get_thoughts(),
-            self.model_name,
             state.background_commands_obs,
         )
-        if action_dict is None:
-            action_dict = {"action": "think", "args": {"thought": "..."}}
-
-        # Translate action_dict to Action
-        action = ACTION_TYPE_TO_CLASS[action_dict["action"]](**action_dict["args"])
+        messages = [{"content": prompt,"role": "user"}]
+        resp = self.llm.completion(messages=messages)
+        action_resp = resp['choices'][0]['message']['content']
+        action = prompts.parse_action_response(action_resp)
         self.latest_action = action
         return action
 
diff --git a/agenthub/langchains_agent/utils/monologue.py b/agenthub/langchains_agent/utils/monologue.py
index 99e0de6b67..74b2f8e97b 100644
--- a/agenthub/langchains_agent/utils/monologue.py
+++ b/agenthub/langchains_agent/utils/monologue.py
@@ -1,11 +1,10 @@
 
 import agenthub.langchains_agent.utils.json as json
-import agenthub.langchains_agent.utils.llm as llm
+import agenthub.langchains_agent.utils.prompts as prompts
 
 class Monologue:
-    def __init__(self, model_name):
+    def __init__(self):
         self.thoughts = []
-        self.model_name = model_name
 
     def add_event(self, t: dict):
         if not isinstance(t, dict):
@@ -24,13 +23,11 @@ class Monologue:
                 print(f"Error serializing thought: {e}")
         return total_length
 
-    def condense(self):
+    def condense(self, llm):
         try:
-            new_thoughts = llm.summarize_monologue(self.thoughts, self.model_name)
-            # Ensure new_thoughts is not empty or significantly malformed before assigning
-            if not new_thoughts or len(new_thoughts) > len(self.thoughts):
-                raise ValueError("Condensing resulted in invalid state.")
-            self.thoughts = new_thoughts
+            prompt = prompts.get_summarize_monologue_prompt(self.thoughts)
+            response = llm.prompt(prompt)
+            self.thoughts = prompts.parse_summary_response(response)
         except Exception as e:
             # Consider logging the error here instead of or in addition to raising an exception
             raise RuntimeError(f"Error condensing thoughts: {e}")
diff --git a/agenthub/langchains_agent/utils/llm.py b/agenthub/langchains_agent/utils/prompts.py
similarity index 76%
rename from agenthub/langchains_agent/utils/llm.py
rename to agenthub/langchains_agent/utils/prompts.py
index d3885ad67f..0520cc6254 100644
--- a/agenthub/langchains_agent/utils/llm.py
+++ b/agenthub/langchains_agent/utils/prompts.py
@@ -1,23 +1,44 @@
 import os
 
-from . import json
+from typing import List, Dict, Type
+
+from langchain_core.pydantic_v1 import BaseModel
+from langchain.prompts import PromptTemplate
+from langchain_core.output_parsers import JsonOutputParser
 
 if os.getenv("DEBUG"):
     from langchain.globals import set_debug
-
     set_debug(True)
 
-from typing import List
-from langchain_core.pydantic_v1 import BaseModel
+from . import json
 
+from opendevin.action import (
+    Action,
+    CmdRunAction,
+    CmdKillAction,
+    BrowseURLAction,
+    FileReadAction,
+    FileWriteAction,
+    AgentRecallAction,
+    AgentThinkAction,
+    AgentFinishAction,
+)
 from opendevin.observation import (
     CmdOutputObservation,
 )
 
-from langchain.chains import LLMChain
-from langchain.prompts import PromptTemplate
-from langchain_core.output_parsers import JsonOutputParser
-from langchain_openai import ChatOpenAI
+
+ACTION_TYPE_TO_CLASS: Dict[str, Type[Action]] = {
+    "run": CmdRunAction,
+    "kill": CmdKillAction,
+    "browse": BrowseURLAction,
+    "read": FileReadAction,
+    "write": FileWriteAction,
+    "recall": AgentRecallAction,
+    "think": AgentThinkAction,
+    "finish": AgentFinishAction,
+}
+CLASS_TO_ACTION_TYPE: Dict[Type[Action], str] = {v: k for k, v in ACTION_TYPE_TO_CLASS.items()}
 
 ACTION_PROMPT = """
 You're a thoughtful robot. Your main task is to {task}.
@@ -103,36 +124,16 @@ class NewMonologue(BaseModel):
     new_monologue: List[_ActionDict]
 
 
-def get_chain(template, model_name):
-    assert (
-        "OPENAI_API_KEY" in os.environ
-    ), "Please set the OPENAI_API_KEY environment variable to use langchains_agent."
-    llm = ChatOpenAI(openai_api_key=os.getenv("OPENAI_API_KEY"), model_name=model_name)  # type: ignore
-    prompt = PromptTemplate.from_template(template)
-    llm_chain = LLMChain(prompt=prompt, llm=llm)
-    return llm_chain
+def get_summarize_monologue_prompt(thoughts):
+    prompt = PromptTemplate.from_template(MONOLOGUE_SUMMARY_PROMPT)
+    return prompt.format(monologue=json.dumps({'old_monologue': thoughts}))
 
-
-def summarize_monologue(thoughts: List[dict], model_name):
-    llm_chain = get_chain(MONOLOGUE_SUMMARY_PROMPT, model_name)
-    parser = JsonOutputParser(pydantic_object=NewMonologue)
-    resp = llm_chain.invoke({"monologue": json.dumps({"old_monologue": thoughts})})
-
-    if os.getenv("DEBUG"):
-        print("resp", resp)
-    parsed = parser.parse(resp["text"])
-    return parsed["new_monologue"]
-
-
-def request_action(
-    task,
-    thoughts: List[dict],
-    model_name: str,
-    background_commands_obs: List[CmdOutputObservation] = [],
+def get_request_action_prompt(
+        task: str,
+        thoughts: List[dict],
+        background_commands_obs: List[CmdOutputObservation] = [],
 ):
-    llm_chain = get_chain(ACTION_PROMPT, model_name)
-    parser = JsonOutputParser(pydantic_object=_ActionDict)
-    hint = ""
+    hint = ''
     if len(thoughts) > 0:
         latest_thought = thoughts[-1]
         if latest_thought["action"] == 'think':
@@ -149,17 +150,24 @@ def request_action(
         for command_obs in background_commands_obs:
             bg_commands_message += f"\n`{command_obs.command_id}`: {command_obs.command}"
         bg_commands_message += "\nYou can end any process by sending a `kill` action with the numerical `id` above."
-
     latest_thought = thoughts[-1]
-    resp = llm_chain.invoke(
-        {
-            "monologue": json.dumps(thoughts),
-            "hint": hint,
-            "task": task,
-            "background_commands": bg_commands_message,
-        }
+
+    prompt = PromptTemplate.from_template(ACTION_PROMPT)
+    return prompt.format(
+        task=task,
+        monologue=json.dumps(thoughts),
+        background_commands=bg_commands_message,
+        hint=hint,
     )
-    if os.getenv("DEBUG"):
-        print("resp", resp)
-    parsed = parser.parse(resp["text"])
-    return parsed
+
+def parse_action_response(response: str) -> Action:
+    parser = JsonOutputParser(pydantic_object=_ActionDict)
+    action_dict = parser.parse(response)
+    action = ACTION_TYPE_TO_CLASS[action_dict["action"]](**action_dict["args"])
+    return action
+
+def parse_summary_response(response: str) -> List[Action]:
+    parser = JsonOutputParser(pydantic_object=NewMonologue)
+    parsed = parser.parse(response)
+    thoughts = [ACTION_TYPE_TO_CLASS[t['action']](**t['args']) for t in parsed['new_monologue']]
+    return thoughts
diff --git a/opendevin/agent.py b/opendevin/agent.py
index 09ede60cc7..916db4dc05 100644
--- a/opendevin/agent.py
+++ b/opendevin/agent.py
@@ -4,7 +4,7 @@ from typing import List, Dict, Type, TYPE_CHECKING
 if TYPE_CHECKING:
     from opendevin.action import Action
     from opendevin.state import State
-
+from opendevin.llm.llm import LLM
 
 class Agent(ABC):
     """
@@ -19,9 +19,12 @@ class Agent(ABC):
 
     _registry: Dict[str, Type["Agent"]] = {}
 
-    def __init__(self, model_name: str):
-        self.model_name = model_name
-        self.instruction: str = ""  # need to be set before step
+    def __init__(
+        self,
+        llm: LLM,
+    ):
+        self.instruction = ""
+        self.llm = llm
         self._complete = False
 
     @property
diff --git a/opendevin/llm/llm.py b/opendevin/llm/llm.py
new file mode 100644
index 0000000000..040f28b69e
--- /dev/null
+++ b/opendevin/llm/llm.py
@@ -0,0 +1,21 @@
+from litellm import completion as litellm_completion
+from functools import partial
+import os
+
+DEFAULT_MODEL = os.getenv("LLM_MODEL", "gpt-4-0125-preview")
+DEFAULT_API_KEY = os.getenv("LLM_API_KEY")
+
+class LLM:
+    def __init__(self, model=DEFAULT_MODEL, api_key=DEFAULT_API_KEY):
+        self.model = model if model else DEFAULT_MODEL
+        self.api_key = api_key if api_key else DEFAULT_API_KEY
+
+        self._completion = partial(litellm_completion, model=self.model, api_key=self.api_key)
+
+
+    @property
+    def completion(self):
+        """
+        Decorator for the litellm completion function.
+        """
+        return self._completion
diff --git a/opendevin/main.py b/opendevin/main.py
index dfca536400..7cdb5682b9 100644
--- a/opendevin/main.py
+++ b/opendevin/main.py
@@ -6,6 +6,7 @@ from typing import Type
 import agenthub # noqa F401 (we import this to get the agents registered)
 from opendevin.agent import Agent
 from opendevin.controller import AgentController
+from opendevin.llm.llm import LLM
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Run an agent with a specific task")
@@ -40,15 +41,15 @@ if __name__ == "__main__":
     parser.add_argument(
         "-i",
         "--max-iterations",
-        default=10,
+        default=100,
         type=int,
         help="The maximum number of iterations to run the agent",
     )
     args = parser.parse_args()
 
     print(f"Running agent {args.agent_cls} (model: {args.model_name}, directory: {args.directory}) with task: \"{args.task}\"")
-
+    llm = LLM(args.model_name)
     AgentCls: Type[Agent] = Agent.get_cls(args.agent_cls)
-    agent = AgentCls(model_name=args.model_name)
+    agent = AgentCls(llm=llm)
     controller = AgentController(agent, workdir=args.directory, max_iterations=args.max_iterations)
     asyncio.run(controller.start_loop(args.task))
diff --git a/opendevin/server/session.py b/opendevin/server/session.py
index dc6675dbc9..49465b4ec0 100644
--- a/opendevin/server/session.py
+++ b/opendevin/server/session.py
@@ -6,6 +6,7 @@ from fastapi import WebSocketDisconnect
 
 from opendevin.agent import Agent
 from opendevin.controller import AgentController
+from opendevin.llm.llm import LLM
 
 from opendevin.action import (
     Action,
@@ -121,15 +122,14 @@ class Session:
         model = "gpt-4-0125-preview"
         if start_event and "model" in start_event.args:
             model = start_event.args["model"]
-        
         if not os.path.exists(directory):
             print(f"Workspace directory {directory} does not exist. Creating it...")
             os.makedirs(directory)
         directory = os.path.relpath(directory, os.getcwd())
-        
+        llm = LLM(model)
         AgentCls = Agent.get_cls(agent_cls)
-        self.agent = AgentCls(model_name=model)
-        self.controller = AgentController(self.agent, directory, callbacks=[self.on_agent_event])
+        self.agent = AgentCls(llm)
+        self.controller = AgentController(self.agent, workdir=directory, callbacks=[self.on_agent_event])
         await self.send({"action": "initialize", "message": "Control loop started."})
 
     async def start_task(self, start_event):