chat completion check (#1024)

* chat completion check

* add test

* doc

* timeout

* bump version to 1.2.4
This commit is contained in:
Chi Wang
2023-05-09 13:39:46 -07:00
committed by GitHub
parent 51c8768bcf
commit 59e882e5cc
6 changed files with 41 additions and 15 deletions

View File

@@ -46,7 +46,7 @@ _IMPROVE_FUNCTION_CONFIG = {
The current implementation of the function is as follows:
{file_string}""",
"model": DEFAULT_MODEL,
"request_timeout": 300,
"request_timeout": 600,
}

View File

@@ -171,11 +171,7 @@ class Completion(openai_Completion):
Try cache first. If not found, call the openai api. If the api call fails, retry after retry_time.
"""
config = config.copy()
openai.api_key = config.pop("api_key", openai.api_key)
openai.api_base = config.pop("api_base", openai.api_base)
openai.api_key_path = config.pop("api_key_path", openai.api_key_path)
openai.api_type = config.pop("api_type", openai.api_type)
openai.api_version = config.pop("api_version", openai.api_version)
key = get_key(config)
if use_cache:
response = cls._cache.get(key, None)
@@ -183,7 +179,11 @@ class Completion(openai_Completion):
# print("using cached response")
cls._book_keeping(config, response)
return response
openai_completion = openai.ChatCompletion if config["model"] in cls.chat_models else openai.Completion
openai_completion = (
openai.ChatCompletion
if config["model"] in cls.chat_models or issubclass(cls, ChatCompletion)
else openai.Completion
)
start_time = time.time()
request_timeout = cls.request_timeout
while True:
@@ -227,7 +227,7 @@ class Completion(openai_Completion):
request_timeout = min(request_timeout, time_left)
sleep(cls.retry_time)
except InvalidRequestError:
if "azure" == openai.api_type and "model" in config:
if "azure" == config.get("api_type", openai.api_type) and "model" in config:
# azure api uses "engine" instead of "model"
config["engine"] = config.pop("model").replace("gpt-3.5-turbo", "gpt-35-turbo")
else:
@@ -291,7 +291,7 @@ class Completion(openai_Completion):
@classmethod
def _get_prompt_messages_from_config(cls, model, config):
prompt, messages = None, None
if model in cls.chat_models:
if model in cls.chat_models or issubclass(cls, ChatCompletion):
# either "prompt" should be in config (for being compatible with non-chat models)
# or "messages" should be in config (for tuning chat models only)
prompt = config.get("prompt")
@@ -778,7 +778,7 @@ class Completion(openai_Completion):
messages = config.get("messages") if messages is None else messages
# either "prompt" should be in config (for being compatible with non-chat models)
# or "messages" should be in config (for tuning chat models only)
if prompt is None and model in cls.chat_models:
if prompt is None and (model in cls.chat_models or issubclass(cls, ChatCompletion)):
if messages is None:
raise ValueError("Either prompt or messages should be in config for chat models.")
if prompt is None:
@@ -793,7 +793,7 @@ class Completion(openai_Completion):
if data_instance
else messages
)
elif model in cls.chat_models:
elif model in cls.chat_models or issubclass(cls, ChatCompletion):
# convert prompt to messages
params["messages"] = [
{

View File

@@ -1 +1 @@
__version__ = "1.2.3"
__version__ = "1.2.4"

View File

@@ -18,6 +18,27 @@ from flaml.autogen.code_utils import (
from flaml.autogen.math_utils import eval_math_responses, solve_problem
def test_chatcompletion():
params = oai.ChatCompletion._construct_params(
data_instance=None,
config={"model": "unknown"},
prompt="hi",
)
assert "messages" in params
params = oai.Completion._construct_params(
data_instance=None,
config={"model": "unknown"},
prompt="hi",
)
assert "messages" not in params
params = oai.Completion._construct_params(
data_instance=None,
config={"model": "gpt-4"},
prompt="hi",
)
assert "messages" in params
def test_multi_model():
try:
import openai
@@ -389,9 +410,10 @@ if __name__ == "__main__":
openai.api_key = os.environ["OPENAI_API_KEY"] = open("test/openai/key.txt").read().strip()
os.environ["AZURE_OPENAI_API_KEY"] = open("test/openai/key_azure.txt").read().strip()
os.environ["AZURE_OPENAI_API_BASE"] = open("test/openai/base_azure.txt").read().strip()
test_chatcompletion()
# test_multi_model()
# test_execute_code()
test_improve()
# test_improve()
# test_nocontext()
# test_humaneval(1)
# test_math(1)

View File

@@ -20,7 +20,7 @@ def run_notebook(input_nb, output_nb="executed_openai_notebook.ipynb", save=Fals
file_path = os.path.join(here, os.pardir, os.pardir, "notebook", input_nb)
with open(file_path) as f:
nb = nbformat.read(f, as_version=4)
ep = ExecutePreprocessor(timeout=3600, kernel_name="python3")
ep = ExecutePreprocessor(timeout=4800, kernel_name="python3")
ep.preprocess(nb, {"metadata": {"path": here}})
output_file_name = "executed_openai_notebook_output.txt"

View File

@@ -107,10 +107,13 @@ There are a number of benefits of using `flaml.oai.Completion.create` to perform
### API unification
`flaml.oai.Completion.create` is compatible with both `openai.Completion.create` and `openai.ChatCompletion.create`, and both OpenAI API and Azure OpenAI API. So models such as "text-davinci-003", "gpt-3.5-turbo" and "gpt-4" can share a common API. When only tuning the chat-based models, `flaml.oai.ChatCompletion` can be used.
`flaml.oai.Completion.create` is compatible with both `openai.Completion.create` and `openai.ChatCompletion.create`, and both OpenAI API and Azure OpenAI API. So models such as "text-davinci-003", "gpt-3.5-turbo" and "gpt-4" can share a common API.
When chat models are used and `prompt` is given as the input to `flaml.oai.Completion.create`, the prompt will be automatically converted into `messages` to fit the chat completion API requirement. One advantage is that one can experiment with both chat and non-chat models for the same prompt in a unified API.
For local LLMs, one can spin up an endpoint using a package like [simple_ai_server](https://github.com/lhenault/simpleAI), and then use the same API to send a request.
When only working with the chat-based models, `flaml.oai.ChatCompletion` can be used. It also does automatic conversion from prompt to messages, if prompt is provided instead of messages.
### Caching
API call results are cached locally and reused when the same request is issued. This is useful when repeating or continuing experiments for reproducibility and cost saving. It still allows controlled randomness by setting the "seed", using [`set_cache`](../reference/autogen/oai/completion#set_cache) or specifying in `create()`.
@@ -149,7 +152,8 @@ response = oai.Completion.create(
)
```
It will try querying Azure OpenAI gpt-4, OpenAI gpt-3.5-turbo, and llama-7B one by one, until a valid result is returned. This can speed up the development process where the rate limit is a bottleneck.
It will try querying Azure OpenAI gpt-4, OpenAI gpt-3.5-turbo, and a locally hosted llama-7B one by one, ignoring AuthenticationError, RateLimitError and Timeout,
until a valid result is returned. This can speed up the development process where the rate limit is a bottleneck. An error will be raised if the last choice fails. So make sure the last choice in the list has the best availability.
### Templating