From bccb8297b83f3efc739e267288794a50b0b080b7 Mon Sep 17 00:00:00 2001
From: Jiayi Pan <jiayidotpan@gmail.com>
Date: Fri, 3 May 2024 15:15:39 -0400
Subject: [PATCH] feat: ability to configure temperature and top-p sampling for
 llm generation (#1556)

Co-authored-by: Jim Su <jimsu@protonmail.com>
---
 opendevin/core/config.py        | 2 ++
 opendevin/core/schema/config.py | 2 ++
 opendevin/llm/llm.py            | 6 ++++++
 3 files changed, 10 insertions(+)

diff --git a/opendevin/core/config.py b/opendevin/core/config.py
index 819fcdee07..5a4bb8ea69 100644
--- a/opendevin/core/config.py
+++ b/opendevin/core/config.py
@@ -42,6 +42,8 @@ DEFAULT_CONFIG: dict = {
     ConfigType.AGENT_MEMORY_ENABLED: False,
     ConfigType.LLM_TIMEOUT: None,
     ConfigType.LLM_MAX_RETURN_TOKENS: None,
+    ConfigType.LLM_TEMPERATURE: None,
+    ConfigType.LLM_TOP_P: None,
     # GPT-4 pricing is $10 per 1M input tokens. Since tokenization happens on LLM side,
     # we cannot easily count number of tokens, but we can count characters.
     # Assuming 5 characters per token, 5 million is a reasonable default limit.
diff --git a/opendevin/core/schema/config.py b/opendevin/core/schema/config.py
index 6570ec847a..ccde824df1 100644
--- a/opendevin/core/schema/config.py
+++ b/opendevin/core/schema/config.py
@@ -2,6 +2,8 @@ from enum import Enum
 
 
 class ConfigType(str, Enum):
+    LLM_TOP_P = 'LLM_TOP_P'
+    LLM_TEMPERATURE = 'LLM_TEMPERATURE'
     LLM_MAX_RETURN_TOKENS = 'LLM_MAX_RETURN_TOKENS'
     LLM_TIMEOUT = 'LLM_TIMEOUT'
     LLM_API_KEY = 'LLM_API_KEY'
diff --git a/opendevin/llm/llm.py b/opendevin/llm/llm.py
index 3f75ecde3e..f9ce38dc19 100644
--- a/opendevin/llm/llm.py
+++ b/opendevin/llm/llm.py
@@ -27,6 +27,8 @@ LLM_RETRY_MIN_WAIT = config.get(ConfigType.LLM_RETRY_MIN_WAIT)
 LLM_RETRY_MAX_WAIT = config.get(ConfigType.LLM_RETRY_MAX_WAIT)
 LLM_TIMEOUT = config.get(ConfigType.LLM_TIMEOUT)
 LLM_MAX_RETURN_TOKENS = config.get(ConfigType.LLM_MAX_RETURN_TOKENS)
+LLM_TEMPERATURE = config.get(ConfigType.LLM_TEMPERATURE)
+LLM_TOP_P = config.get(ConfigType.LLM_TOP_P)
 
 
 class LLM:
@@ -45,6 +47,8 @@ class LLM:
         retry_max_wait=LLM_RETRY_MAX_WAIT,
         llm_timeout=LLM_TIMEOUT,
         llm_max_return_tokens=LLM_MAX_RETURN_TOKENS,
+        llm_temperature=LLM_TEMPERATURE,
+        llm_top_p=LLM_TOP_P,
     ):
         """
         Args:
@@ -80,6 +84,8 @@ class LLM:
             api_version=self.api_version,
             max_tokens=self.llm_max_return_tokens,
             timeout=self.llm_timeout,
+            temperature=llm_temperature,
+            top_p=llm_top_p,
         )
 
         completion_unwrapped = self._completion