seed -> cache_seed (#600)

This commit is contained in:
Chi Wang
2023-11-08 15:39:02 -08:00
committed by GitHub
parent 2a96e4d9d2
commit dfcbea9777
29 changed files with 87 additions and 79 deletions

View File

@@ -120,7 +120,6 @@ class GroupChatManager(ConversableAgent):
max_consecutive_auto_reply: Optional[int] = sys.maxsize,
human_input_mode: Optional[str] = "NEVER",
system_message: Optional[str] = "Group chat manager.",
# seed: Optional[int] = 4,
**kwargs,
):
super().__init__(
@@ -136,8 +135,6 @@ class GroupChatManager(ConversableAgent):
# Allow async chat if initiated using a_initiate_chat
self.register_reply(Agent, GroupChatManager.a_run_chat, config=groupchat, reset_config=GroupChat.reset)
# self._random = random.Random(seed)
def run_chat(
self,
messages: Optional[List[Dict]] = None,

View File

@@ -509,11 +509,11 @@ def eval_function_completions(
_FUNC_COMPLETION_PROMPT = "# Python 3{definition}"
_FUNC_COMPLETION_STOP = ["\nclass", "\ndef", "\nif", "\nprint"]
_IMPLEMENT_CONFIGS = [
{"model": FAST_MODEL, "prompt": _FUNC_COMPLETION_PROMPT, "temperature": 0, "seed": 0},
{"model": FAST_MODEL, "prompt": _FUNC_COMPLETION_PROMPT, "stop": _FUNC_COMPLETION_STOP, "n": 7, "seed": 0},
{"model": DEFAULT_MODEL, "prompt": _FUNC_COMPLETION_PROMPT, "temperature": 0, "seed": 1},
{"model": DEFAULT_MODEL, "prompt": _FUNC_COMPLETION_PROMPT, "stop": _FUNC_COMPLETION_STOP, "n": 2, "seed": 2},
{"model": DEFAULT_MODEL, "prompt": _FUNC_COMPLETION_PROMPT, "stop": _FUNC_COMPLETION_STOP, "n": 1, "seed": 2},
{"model": FAST_MODEL, "prompt": _FUNC_COMPLETION_PROMPT, "temperature": 0, "cache_seed": 0},
{"model": FAST_MODEL, "prompt": _FUNC_COMPLETION_PROMPT, "stop": _FUNC_COMPLETION_STOP, "n": 7, "cache_seed": 0},
{"model": DEFAULT_MODEL, "prompt": _FUNC_COMPLETION_PROMPT, "temperature": 0, "cache_seed": 1},
{"model": DEFAULT_MODEL, "prompt": _FUNC_COMPLETION_PROMPT, "stop": _FUNC_COMPLETION_STOP, "n": 2, "cache_seed": 2},
{"model": DEFAULT_MODEL, "prompt": _FUNC_COMPLETION_PROMPT, "stop": _FUNC_COMPLETION_STOP, "n": 1, "cache_seed": 2},
]

View File

@@ -31,7 +31,7 @@ class OpenAIWrapper:
"""A wrapper class for openai client."""
cache_path_root: str = ".cache"
extra_kwargs = {"seed", "filter_func", "allow_format_str_template", "context", "api_version"}
extra_kwargs = {"cache_seed", "filter_func", "allow_format_str_template", "context", "api_version"}
openai_kwargs = set(inspect.getfullargspec(OpenAI.__init__).kwonlyargs)
def __init__(self, *, config_list: List[Dict] = None, **base_config):
@@ -191,8 +191,8 @@ class OpenAIWrapper:
The actual prompt will be:
"Complete the following sentence: Today I feel".
More examples can be found at [templating](/docs/Use-Cases/enhanced_inference#templating).
- `seed` (int | None) for the cache. Default to 41.
An integer seed is useful when implementing "controlled randomness" for the completion.
- `cache_seed` (int | None) for the cache. Default to 41.
An integer cache_seed is useful when implementing "controlled randomness" for the completion.
None for no caching.
- filter_func (Callable | None): A function that takes in the context and the response
and returns a boolean to indicate whether the response is valid. E.g.,
@@ -219,12 +219,12 @@ class OpenAIWrapper:
self._process_for_azure(create_config, extra_kwargs, "extra")
# construct the create params
params = self._construct_create_params(create_config, extra_kwargs)
# get the seed, filter_func and context
seed = extra_kwargs.get("seed", 41)
# get the cache_seed, filter_func and context
cache_seed = extra_kwargs.get("cache_seed", 41)
filter_func = extra_kwargs.get("filter_func")
context = extra_kwargs.get("context")
with diskcache.Cache(f"{self.cache_path_root}/{seed}") as cache:
if seed is not None:
with diskcache.Cache(f"{self.cache_path_root}/{cache_seed}") as cache:
if cache_seed is not None:
# Try to get the response from cache
key = get_key(params)
response = cache.get(key, None)
@@ -245,7 +245,7 @@ class OpenAIWrapper:
if i == last:
raise
else:
if seed is not None:
if cache_seed is not None:
# Cache the response
cache.set(key, response)
return response

View File

@@ -109,8 +109,8 @@ class Completion(openai_Completion):
"prompt": "{prompt}",
}
seed = 41
cache_path = f".cache/{seed}"
cache_seed = 41
cache_path = f".cache/{cache_seed}"
# retry after this many seconds
retry_wait_time = 10
# fail a request after hitting RateLimitError for this many seconds
@@ -134,7 +134,7 @@ class Completion(openai_Completion):
cache_path (str, Optional): The root path for the cache.
The complete cache path will be {cache_path}/{seed}.
"""
cls.seed = seed
cls.cache_seed = seed
cls.cache_path = f"{cache_path_root}/{seed}"
@classmethod
@@ -145,7 +145,7 @@ class Completion(openai_Completion):
seed (int, Optional): The integer identifier for the pseudo seed.
If omitted, all caches under cache_path_root will be cleared.
cache_path (str, Optional): The root path for the cache.
The complete cache path will be {cache_path}/{seed}.
The complete cache path will be {cache_path}/{cache_seed}.
"""
if seed is None:
shutil.rmtree(cache_path_root, ignore_errors=True)
@@ -773,7 +773,7 @@ class Completion(openai_Completion):
Besides the parameters for the openai API call, it can also contain:
- `max_retry_period` (int): the total time (in seconds) allowed for retrying failed requests.
- `retry_wait_time` (int): the time interval to wait (in seconds) before retrying a failed request.
- `seed` (int) for the cache. This is useful when implementing "controlled randomness" for the completion.
- `cache_seed` (int) for the cache. This is useful when implementing "controlled randomness" for the completion.
Returns:
Responses from OpenAI API, with additional fields.
@@ -831,11 +831,11 @@ class Completion(openai_Completion):
return cls._get_response(
params, raise_on_ratelimit_or_timeout=raise_on_ratelimit_or_timeout, use_cache=False
)
seed = cls.seed
if "seed" in params:
cls.set_cache(params.pop("seed"))
cache_seed = cls.cache_seed
if "cache_seed" in params:
cls.set_cache(params.pop("cache_seed"))
with diskcache.Cache(cls.cache_path) as cls._cache:
cls.set_cache(seed)
cls.set_cache(cache_seed)
return cls._get_response(params, raise_on_ratelimit_or_timeout=raise_on_ratelimit_or_timeout)
@classmethod

View File

@@ -1 +1 @@
__version__ = "0.2.0b3"
__version__ = "0.2.0b4"

View File

@@ -185,7 +185,7 @@
" system_message=\"You are a helpful assistant.\",\n",
" llm_config={\n",
" \"timeout\": 600,\n",
" \"seed\": 42,\n",
" \"cache_seed\": 42,\n",
" \"config_list\": config_list,\n",
" },\n",
")\n",

View File

@@ -45,7 +45,7 @@
},
"outputs": [],
"source": [
"# %pip install pyautogen~=0.2.0b2"
"# %pip install pyautogen~=0.2.0b4"
]
},
{
@@ -330,7 +330,7 @@
"assistant = autogen.AssistantAgent(\n",
" name=\"assistant\",\n",
" llm_config={\n",
" \"seed\": 42, # seed for caching and reproducibility\n",
" \"cache_seed\": 42, # seed for caching and reproducibility\n",
" \"config_list\": config_list, # a list of OpenAI API configurations\n",
" \"temperature\": 0, # temperature for sampling\n",
" }, # configuration for autogen's enhanced inference API which is compatible with OpenAI API\n",
@@ -806,7 +806,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.17"
"version": "3.11.4"
},
"vscode": {
"interpreter": {

View File

@@ -35,7 +35,7 @@
"outputs": [],
"source": [
"%%capture --no-stderr\n",
"# %pip install \"pyautogen~=0.2.0b2\"\n",
"# %pip install \"pyautogen~=0.2.0b4\"\n",
"%pip install chess -U"
]
},
@@ -292,13 +292,13 @@
" color=\"black\",\n",
" board_agent=board_agent,\n",
" max_turns=max_turn,\n",
" llm_config={\"temperature\": 0.5, \"seed\": 1, \"config_list\": config_list_gpt4},\n",
" llm_config={\"temperature\": 0.5, \"cache_seed\": 1, \"config_list\": config_list_gpt4},\n",
")\n",
"player_white = ChessPlayerAgent(\n",
" color=\"white\",\n",
" board_agent=board_agent,\n",
" max_turns=max_turn,\n",
" llm_config={\"temperature\": 0.5, \"seed\": 2, \"config_list\": config_list_gpt4},\n",
" llm_config={\"temperature\": 0.5, \"cache_seed\": 2, \"config_list\": config_list_gpt4},\n",
")"
]
},

View File

@@ -35,7 +35,7 @@
"outputs": [],
"source": [
"%%capture --no-stderr\n",
"# %pip install pyautogen~=0.2.0b2"
"# %pip install pyautogen~=0.2.0b4"
]
},
{
@@ -124,7 +124,7 @@
"metadata": {},
"outputs": [],
"source": [
"llm_config = {\"config_list\": config_list_gpt4, \"seed\": 42}\n",
"llm_config = {\"config_list\": config_list_gpt4, \"cache_seed\": 42}\n",
"user_proxy = autogen.UserProxyAgent(\n",
" name=\"User_proxy\",\n",
" system_message=\"A human admin.\",\n",

View File

@@ -33,7 +33,7 @@
"outputs": [],
"source": [
"%%capture --no-stderr\n",
"# %pip install pyautogen~=0.2.0b2"
"# %pip install pyautogen~=0.2.0b4"
]
},
{
@@ -111,7 +111,7 @@
"outputs": [],
"source": [
"gpt4_config = {\n",
" \"seed\": 42, # change the seed for different trials\n",
" \"cache_seed\": 42, # change the cache_seed for different trials\n",
" \"temperature\": 0,\n",
" \"config_list\": config_list_gpt4,\n",
" \"timeout\": 120,\n",

View File

@@ -33,7 +33,7 @@
"outputs": [],
"source": [
"%%capture --no-stderr\n",
"# %pip install pyautogen~=0.2.0b2"
"# %pip install pyautogen~=0.2.0b4"
]
},
{
@@ -132,7 +132,7 @@
"metadata": {},
"outputs": [],
"source": [
"llm_config = {\"config_list\": config_list_gpt4, \"seed\": 42}\n",
"llm_config = {\"config_list\": config_list_gpt4, \"cache_seed\": 42}\n",
"user_proxy = autogen.UserProxyAgent(\n",
" name=\"User_proxy\",\n",
" system_message=\"A human admin.\",\n",

View File

@@ -50,7 +50,7 @@
"outputs": [],
"source": [
"%%capture --no-stderr\n",
"# %pip install pyautogen~=0.1.0"
"# %pip install pyautogen~=0.2.0b4"
]
},
{
@@ -163,7 +163,7 @@
"metadata": {},
"outputs": [],
"source": [
"llm_config = {\"config_list\": config_list_gpt4, \"seed\": 42}"
"llm_config = {\"config_list\": config_list_gpt4, \"cache_seed\": 42}"
]
},
{
@@ -359,7 +359,7 @@
"\n",
"\n",
"# Create the manager\n",
"llm_config = {\"config_list\": config_list_gpt4, \"seed\": 42, \"use_cache\":False} # use_cache is False because we want to observe if there is any communication pattern difference if we reran the group chat.\n",
"llm_config = {\"config_list\": config_list_gpt4, \"cache_seed\": None} # cache_seed is None because we want to observe if there is any communication pattern difference if we reran the group chat.\n",
"manager = autogen.GroupChatManager(groupchat=group_chat, llm_config=llm_config)\n",
"\n",
"\n",

View File

@@ -45,7 +45,7 @@
},
"outputs": [],
"source": [
"# %pip install pyautogen~=0.2.0b2"
"# %pip install pyautogen~=0.2.0b4"
]
},
{
@@ -123,7 +123,7 @@
"assistant = autogen.AssistantAgent(\n",
" name=\"assistant\",\n",
" llm_config={\n",
" \"seed\": 41,\n",
" \"cache_seed\": 41,\n",
" \"config_list\": config_list,\n",
" }\n",
")\n",

View File

@@ -15,7 +15,7 @@
"source": [
"### Before everything starts, install AutoGen with the `lmm` option\n",
"```bash\n",
"pip install pyautogen[lmm]\n",
"pip install \"pyautogen[lmm]~=0.2.0b4\"\n",
"```"
]
},
@@ -85,7 +85,7 @@
" },\n",
")\n",
"\n",
"gpt4_llm_config = {\"config_list\": config_list_gpt4, \"seed\": 42}"
"gpt4_llm_config = {\"config_list\": config_list_gpt4, \"cache_seed\": 42}"
]
},
{
@@ -699,7 +699,7 @@
"# },\n",
"# )\n",
"\n",
"# gpt35_llm_config = {\"config_list\": config_list_gpt35, \"seed\": 42}\n",
"# gpt35_llm_config = {\"config_list\": config_list_gpt35, \"cache_seed\": 42}\n",
"\n",
"\n",
"creator = FigureCreator(\n",

View File

@@ -5,9 +5,9 @@
"id": "2c75da30",
"metadata": {},
"source": [
"# Agent Chat with Multimodal Models\n",
"# Agent Chat with Multimodal Models: LLaVA\n",
"\n",
"We use **LLaVA** as an example for the multimodal feature. More information about LLaVA can be found in their [GitHub page](https://github.com/haotian-liu/LLaVA)\n",
"This notebook uses **LLaVA** as an example for the multimodal feature. More information about LLaVA can be found in their [GitHub page](https://github.com/haotian-liu/LLaVA)\n",
"\n",
"\n",
"This notebook contains the following information and examples:\n",
@@ -26,7 +26,7 @@
"source": [
"### Before everything starts, install AutoGen with the `lmm` option\n",
"```bash\n",
"pip install pyautogen[lmm]\n",
"pip install \"pyautogen[lmm]~=0.2.0b4\"\n",
"```"
]
},
@@ -848,7 +848,7 @@
" },\n",
")\n",
"\n",
"gpt4_llm_config = {\"config_list\": config_list_gpt4, \"seed\": 42}\n",
"gpt4_llm_config = {\"config_list\": config_list_gpt4, \"cache_seed\": 42}\n",
"\n",
"# config_list_gpt35 = autogen.config_list_from_json(\n",
"# \"OAI_CONFIG_LIST\",\n",
@@ -857,7 +857,7 @@
"# },\n",
"# )\n",
"\n",
"# gpt35_llm_config = {\"config_list\": config_list_gpt35, \"seed\": 42}\n",
"# gpt35_llm_config = {\"config_list\": config_list_gpt35, \"cache_seed\": 42}\n",
"\n",
"\n",
"creator = FigureCreator(\n",

View File

@@ -45,7 +45,7 @@
},
"outputs": [],
"source": [
"# %pip install pyautogen~=0.2.0b2 docker"
"# %pip install pyautogen~=0.2.0b4 docker"
]
},
{
@@ -156,7 +156,7 @@
" llm_config={\n",
" \"temperature\": 0,\n",
" \"timeout\": 600,\n",
" \"seed\": 42,\n",
" \"cache_seed\": 42,\n",
" \"config_list\": config_list,\n",
" \"functions\": [\n",
" {\n",

View File

@@ -45,7 +45,7 @@
},
"outputs": [],
"source": [
"# %pip install pyautogen~=0.2.0b2"
"# %pip install pyautogen~=0.2.0b4"
]
},
{
@@ -209,7 +209,7 @@
" name=\"assistant\",\n",
" llm_config={\n",
" \"timeout\": 600,\n",
" \"seed\": 41,\n",
" \"cache_seed\": 41,\n",
" \"config_list\": config_list,\n",
" \"temperature\": 0,\n",
" },\n",

View File

@@ -44,7 +44,7 @@
},
"outputs": [],
"source": [
"# %pip install pyautogen~=0.2.0b2"
"# %pip install pyautogen~=0.2.0b4"
]
},
{
@@ -161,7 +161,7 @@
" system_message=\"You are a helpful assistant. Reply TERMINATE when the task is done.\",\n",
" llm_config={\n",
" \"timeout\": 600,\n",
" \"seed\": 42,\n",
" \"cache_seed\": 42,\n",
" \"config_list\": config_list,\n",
" \"temperature\": 0,\n",
" \"functions\": [\n",

View File

@@ -49,7 +49,7 @@
},
"outputs": [],
"source": [
"# %pip install pyautogen~=0.2.0b2 docker"
"# %pip install pyautogen~=0.2.0b4 docker"
]
},
{
@@ -79,7 +79,7 @@
"\n",
"llm_config={\n",
" \"timeout\": 600,\n",
" \"seed\": 42,\n",
" \"cache_seed\": 42,\n",
" \"config_list\": config_list,\n",
" \"temperature\": 0,\n",
"}"

View File

@@ -39,7 +39,7 @@
" name=\"assistant\",\n",
" llm_config={\n",
" \"timeout\": 600,\n",
" \"seed\": 42,\n",
" \"cache_seed\": 42,\n",
" \"config_list\": config_list,\n",
" \"temperature\": 0,\n",
" },\n",
@@ -60,7 +60,7 @@
"metadata": {},
"outputs": [],
"source": [
"# ! pip install pyautogen"
"# %pip install \"pyautogen~=0.2.0b4\""
]
},
{
@@ -227,7 +227,7 @@
" name=\"3.5-assistant\",\n",
" llm_config={\n",
" \"timeout\": 600,\n",
" \"seed\": 42,\n",
" \"cache_seed\": 42,\n",
" \"config_list\": cheap_config_list,\n",
" \"temperature\": 0,\n",
" },\n",
@@ -238,7 +238,7 @@
" name=\"4-assistant\",\n",
" llm_config={\n",
" \"timeout\": 600,\n",
" \"seed\": 42,\n",
" \"cache_seed\": 42,\n",
" \"config_list\": costly_config_list,\n",
" \"temperature\": 0,\n",
" },\n",

View File

@@ -31,7 +31,7 @@ def test_ai_user_proxy_agent():
system_message="You are a helpful assistant.",
llm_config={
"timeout": 600,
"seed": 42,
"cache_seed": 42,
"config_list": config_list,
},
)
@@ -78,7 +78,7 @@ def test_gpt35(human_input_mode="NEVER", max_consecutive_auto_reply=5):
},
)
llm_config = {
"seed": 42,
"cache_seed": 42,
"config_list": config_list,
"max_tokens": 1024,
}
@@ -125,7 +125,7 @@ def test_create_execute_script(human_input_mode="NEVER", max_consecutive_auto_re
# autogen.ChatCompletion.start_logging(conversations)
llm_config = {
"timeout": 600,
"seed": 42,
"cache_seed": 42,
"config_list": config_list,
}
assistant = AssistantAgent(

View File

@@ -71,7 +71,7 @@ async def test_stream():
name="assistant",
llm_config={
"timeout": 600,
"seed": 41,
"cache_seed": 41,
"config_list": config_list,
"temperature": 0,
},

View File

@@ -16,7 +16,7 @@ async def test_async_get_human_input():
assistant = autogen.AssistantAgent(
name="assistant",
max_consecutive_auto_reply=2,
llm_config={"timeout": 600, "seed": 41, "config_list": config_list, "temperature": 0},
llm_config={"timeout": 600, "cache_seed": 41, "config_list": config_list, "temperature": 0},
)
user_proxy = autogen.UserProxyAgent(name="user", human_input_mode="ALWAYS", code_execution_config=False)

View File

@@ -30,7 +30,7 @@ def test_function_call_groupchat():
)
llm_config = {
"config_list": config_list_gpt4,
"seed": 42,
"cache_seed": 42,
"functions": [
{
"name": "get_random_number",

View File

@@ -38,7 +38,7 @@ def test_math_user_proxy_agent():
system_message="You are a helpful assistant.",
llm_config={
"timeout": 600,
"seed": 42,
"cache_seed": 42,
"config_list": config_list,
},
)

View File

@@ -25,7 +25,7 @@ skill_verbosity = 3 # 0 for basic info, 1 to add memory operations, 2 for analy
assert_on_error = False # GPT-4 nearly always succeeds on these unit tests, but GPT-3.5 is a bit less reliable.
recall_threshold = 1.5 # Higher numbers allow more (but less relevant) memos to be recalled.
seed = None
cache_seed = None
# If int, cached LLM calls will be skipped and responses pulled from cache. None exposes LLM non-determinism.
# Specify the model to use by uncommenting one of the following lines.
@@ -43,7 +43,7 @@ def create_teachable_agent(reset_db=False, verbosity=0):
config_list = config_list_from_json(env_or_file=OAI_CONFIG_LIST, filter_dict=filter_dict, file_location=KEY_LOC)
teachable_agent = TeachableAgent(
name="teachableagent",
llm_config={"config_list": config_list, "timeout": 120, "seed": seed},
llm_config={"config_list": config_list, "timeout": 120, "cache_seed": cache_seed},
teach_config={
"verbosity": verbosity,
"reset_db": reset_db,

View File

@@ -18,7 +18,11 @@ def test_aoai_chat_completion():
filter_dict={"api_type": ["azure"], "model": ["gpt-3.5-turbo"]},
)
client = OpenAIWrapper(config_list=config_list)
response = client.create(messages=[{"role": "user", "content": "2+2="}])
# for config in config_list:
# print(config)
# client = OpenAIWrapper(**config)
# response = client.create(messages=[{"role": "user", "content": "2+2="}], cache_seed=None)
response = client.create(messages=[{"role": "user", "content": "2+2="}], cache_seed=None)
print(response)
print(client.extract_text_or_function_call(response))

View File

@@ -65,7 +65,10 @@ print(client.extract_text_or_function_call(response))
```
- Inference parameter tuning and inference logging features are currently unavailable in `OpenAIWrapper`. Logging will be added in a future release.
Inference parameter tuning can be done via [`flaml.tune`](https://microsoft.github.io/FLAML/docs/Use-Cases/Tune-User-Defined-Function).
- `use_cache` is removed as a kwarg in `OpenAIWrapper.create()` for being automatically decided by `seed`: int | None.
- `seed` in autogen is renamed into `cache_seed` to accommodate the newly added `seed` param in openai chat completion api. `use_cache` is removed as a kwarg in `OpenAIWrapper.create()` for being automatically decided by `cache_seed`: int | None. The difference between autogen's `cache_seed` and openai's `seed` is that:
* autogen uses local disk cache to guarantee the exactly same output is produced for the same input and when cache is hit, no openai api call will be made.
* openai's `seed` is a best-effort deterministic sampling with no guarantee of determinism. When using openai's `seed` with `cache_seed` set to None, even for the same input, an openai api call will be made and there is no guarantee for getting exactly the same output.
### Optional Dependencies
- #### docker

View File

@@ -137,19 +137,23 @@ For local LLMs, one can spin up an endpoint using a package like [FastChat](http
## Caching
API call results are cached locally and reused when the same request is issued. This is useful when repeating or continuing experiments for reproducibility and cost saving. It still allows controlled randomness by setting the "seed" specified in `OpenAIWrapper.create()` or the constructor of `OpenAIWrapper`.
API call results are cached locally and reused when the same request is issued. This is useful when repeating or continuing experiments for reproducibility and cost saving. It still allows controlled randomness by setting the "cache_seed" specified in `OpenAIWrapper.create()` or the constructor of `OpenAIWrapper`.
```python
client = OpenAIWrapper(seed=...)
client = OpenAIWrapper(cache_seed=...)
client.create(...)
```
```python
client = OpenAIWrapper()
client.create(seed=..., ...)
client.create(cache_seed=..., ...)
```
Caching is enabled by default with seed 41. To disable it please set `seed` to None.
Caching is enabled by default with cache_seed 41. To disable it please set `cache_seed` to None.
_NOTE_. openai v1.1 introduces a new param `seed`. The difference between autogen's `cache_seed` and openai's `seed` is that:
* autogen uses local disk cache to guarantee the exactly same output is produced for the same input and when cache is hit, no openai api call will be made.
* openai's `seed` is a best-effort deterministic sampling with no guarantee of determinism. When using openai's `seed` with `cache_seed` set to None, even for the same input, an openai api call will be made and there is no guarantee for getting exactly the same output.
## Error handling