chat completion check (#1024)

* chat completion check * add test * doc * timeout * bump version to 1.2.4
2026-04-20 03:02:16 -04:00 · 2023-05-09 13:39:46 -07:00
parent 51c8768bcf
commit 59e882e5cc
6 changed files with 41 additions and 15 deletions
--- a/flaml/autogen/code_utils.py
+++ b/flaml/autogen/code_utils.py
@@ -46,7 +46,7 @@ _IMPROVE_FUNCTION_CONFIG = {
 The current implementation of the function is as follows:
 {file_string}""",
    "model": DEFAULT_MODEL,
-    "request_timeout": 300,
+    "request_timeout": 600,
 }


--- a/flaml/autogen/oai/completion.py
+++ b/flaml/autogen/oai/completion.py
@@ -171,11 +171,7 @@ class Completion(openai_Completion):
        Try cache first. If not found, call the openai api. If the api call fails, retry after retry_time.
        """
        config = config.copy()
-        openai.api_key = config.pop("api_key", openai.api_key)
-        openai.api_base = config.pop("api_base", openai.api_base)
        openai.api_key_path = config.pop("api_key_path", openai.api_key_path)
-        openai.api_type = config.pop("api_type", openai.api_type)
-        openai.api_version = config.pop("api_version", openai.api_version)
        key = get_key(config)
        if use_cache:
            response = cls._cache.get(key, None)
@@ -183,7 +179,11 @@ class Completion(openai_Completion):
                # print("using cached response")
                cls._book_keeping(config, response)
                return response
-        openai_completion = openai.ChatCompletion if config["model"] in cls.chat_models else openai.Completion
+        openai_completion = (
+            openai.ChatCompletion
+            if config["model"] in cls.chat_models or issubclass(cls, ChatCompletion)
+            else openai.Completion
+        )
        start_time = time.time()
        request_timeout = cls.request_timeout
        while True:
@@ -227,7 +227,7 @@ class Completion(openai_Completion):
                request_timeout = min(request_timeout, time_left)
                sleep(cls.retry_time)
            except InvalidRequestError:
-                if "azure" == openai.api_type and "model" in config:
+                if "azure" == config.get("api_type", openai.api_type) and "model" in config:
                    # azure api uses "engine" instead of "model"
                    config["engine"] = config.pop("model").replace("gpt-3.5-turbo", "gpt-35-turbo")
                else:
@@ -291,7 +291,7 @@ class Completion(openai_Completion):
    @classmethod
    def _get_prompt_messages_from_config(cls, model, config):
        prompt, messages = None, None
-        if model in cls.chat_models:
+        if model in cls.chat_models or issubclass(cls, ChatCompletion):
            # either "prompt" should be in config (for being compatible with non-chat models)
            # or "messages" should be in config (for tuning chat models only)
            prompt = config.get("prompt")
@@ -778,7 +778,7 @@ class Completion(openai_Completion):
        messages = config.get("messages") if messages is None else messages
        # either "prompt" should be in config (for being compatible with non-chat models)
        # or "messages" should be in config (for tuning chat models only)
-        if prompt is None and model in cls.chat_models:
+        if prompt is None and (model in cls.chat_models or issubclass(cls, ChatCompletion)):
            if messages is None:
                raise ValueError("Either prompt or messages should be in config for chat models.")
        if prompt is None:
@@ -793,7 +793,7 @@ class Completion(openai_Completion):
                if data_instance
                else messages
            )
-        elif model in cls.chat_models:
+        elif model in cls.chat_models or issubclass(cls, ChatCompletion):
            # convert prompt to messages
            params["messages"] = [
                {
--- a/flaml/version.py
+++ b/flaml/version.py
@@ -1 +1 @@
-__version__ = "1.2.3"
+__version__ = "1.2.4"
--- a/test/openai/test_completion.py
+++ b/test/openai/test_completion.py
@@ -18,6 +18,27 @@ from flaml.autogen.code_utils import (
 from flaml.autogen.math_utils import eval_math_responses, solve_problem


+def test_chatcompletion():
+    params = oai.ChatCompletion._construct_params(
+        data_instance=None,
+        config={"model": "unknown"},
+        prompt="hi",
+    )
+    assert "messages" in params
+    params = oai.Completion._construct_params(
+        data_instance=None,
+        config={"model": "unknown"},
+        prompt="hi",
+    )
+    assert "messages" not in params
+    params = oai.Completion._construct_params(
+        data_instance=None,
+        config={"model": "gpt-4"},
+        prompt="hi",
+    )
+    assert "messages" in params
+
+
 def test_multi_model():
    try:
        import openai
@@ -389,9 +410,10 @@ if __name__ == "__main__":
    openai.api_key = os.environ["OPENAI_API_KEY"] = open("test/openai/key.txt").read().strip()
    os.environ["AZURE_OPENAI_API_KEY"] = open("test/openai/key_azure.txt").read().strip()
    os.environ["AZURE_OPENAI_API_BASE"] = open("test/openai/base_azure.txt").read().strip()
+    test_chatcompletion()
    # test_multi_model()
    # test_execute_code()
-    test_improve()
+    # test_improve()
    # test_nocontext()
    # test_humaneval(1)
    # test_math(1)
--- a/test/openai/test_notebook.py
+++ b/test/openai/test_notebook.py
@@ -20,7 +20,7 @@ def run_notebook(input_nb, output_nb="executed_openai_notebook.ipynb", save=Fals
        file_path = os.path.join(here, os.pardir, os.pardir, "notebook", input_nb)
        with open(file_path) as f:
            nb = nbformat.read(f, as_version=4)
-        ep = ExecutePreprocessor(timeout=3600, kernel_name="python3")
+        ep = ExecutePreprocessor(timeout=4800, kernel_name="python3")
        ep.preprocess(nb, {"metadata": {"path": here}})

        output_file_name = "executed_openai_notebook_output.txt"
--- a/website/docs/Use-Cases/Auto-Generation.md
+++ b/website/docs/Use-Cases/Auto-Generation.md
@@ -107,10 +107,13 @@ There are a number of benefits of using `flaml.oai.Completion.create` to perform

 ### API unification

-`flaml.oai.Completion.create` is compatible with both `openai.Completion.create` and `openai.ChatCompletion.create`, and both OpenAI API and Azure OpenAI API. So models such as "text-davinci-003", "gpt-3.5-turbo" and "gpt-4" can share a common API. When only tuning the chat-based models, `flaml.oai.ChatCompletion` can be used.
+`flaml.oai.Completion.create` is compatible with both `openai.Completion.create` and `openai.ChatCompletion.create`, and both OpenAI API and Azure OpenAI API. So models such as "text-davinci-003", "gpt-3.5-turbo" and "gpt-4" can share a common API.
+When chat models are used and `prompt` is given as the input to `flaml.oai.Completion.create`, the prompt will be automatically converted into `messages` to fit the chat completion API requirement. One advantage is that one can experiment with both chat and non-chat models for the same prompt in a unified API.

 For local LLMs, one can spin up an endpoint using a package like [simple_ai_server](https://github.com/lhenault/simpleAI), and then use the same API to send a request.

+When only working with the chat-based models, `flaml.oai.ChatCompletion` can be used. It also does automatic conversion from prompt to messages, if prompt is provided instead of messages.
+
 ### Caching

 API call results are cached locally and reused when the same request is issued. This is useful when repeating or continuing experiments for reproducibility and cost saving. It still allows controlled randomness by setting the "seed", using [`set_cache`](../reference/autogen/oai/completion#set_cache) or specifying in `create()`.
@@ -149,7 +152,8 @@ response = oai.Completion.create(
 )
 ```

-It will try querying Azure OpenAI gpt-4, OpenAI gpt-3.5-turbo, and llama-7B one by one, until a valid result is returned. This can speed up the development process where the rate limit is a bottleneck.
+It will try querying Azure OpenAI gpt-4, OpenAI gpt-3.5-turbo, and a locally hosted llama-7B one by one, ignoring AuthenticationError, RateLimitError and Timeout,
+until a valid result is returned. This can speed up the development process where the rate limit is a bottleneck. An error will be raised if the last choice fails. So make sure the last choice in the list has the best availability.

 ### Templating