Improving logging in oai.completion to show token_count (#179)

* update

* update doc

---------

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qingyun.wu@psu.edu>
This commit is contained in:
Yiran Wu
2023-10-12 10:31:52 -04:00
committed by GitHub
parent 6b14bd6609
commit b61aeb6cce
3 changed files with 62 additions and 0 deletions

View File

@@ -9,6 +9,7 @@ from flaml import tune, BlendSearch
from flaml.tune.space import is_constant
from flaml.automl.logger import logger_formatter
from .openai_utils import get_key
from collections import defaultdict
try:
import openai
@@ -157,6 +158,7 @@ class Completion(openai_Completion):
value = {
"created_at": [],
"cost": [],
"token_count": [],
}
if "messages" in config:
messages = config["messages"]
@@ -168,6 +170,14 @@ class Completion(openai_Completion):
key = get_key([config["prompt"]] + [choice.get("text") for choice in response["choices"]])
value["created_at"].append(cls._count_create)
value["cost"].append(response["cost"])
value["token_count"].append(
{
"model": response["model"],
"prompt_tokens": response["usage"]["prompt_tokens"],
"completion_tokens": response["usage"].get("completion_tokens", 0),
"total_tokens": response["usage"]["total_tokens"],
}
)
cls._history_dict[key] = value
cls._count_create += 1
return
@@ -1067,6 +1077,44 @@ class Completion(openai_Completion):
"""Return the book keeping dictionary."""
return cls._history_dict
@classmethod
def print_usage_summary(cls) -> Dict:
"""Return the usage summary."""
if cls._history_dict is None:
print("No usage summary available.", flush=True)
token_count_summary = defaultdict(lambda: {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0})
if not cls._history_compact:
source = cls._history_dict.values()
total_cost = sum(msg_pair["response"]["cost"] for msg_pair in source)
else:
# source = cls._history_dict["token_count"]
# total_cost = sum(cls._history_dict['cost'])
total_cost = sum(sum(value_list["cost"]) for value_list in cls._history_dict.values())
source = (
token_data for value_list in cls._history_dict.values() for token_data in value_list["token_count"]
)
for entry in source:
if not cls._history_compact:
model = entry["response"]["model"]
token_data = entry["response"]["usage"]
else:
model = entry["model"]
token_data = entry
token_count_summary[model]["prompt_tokens"] += token_data["prompt_tokens"]
token_count_summary[model]["completion_tokens"] += token_data["completion_tokens"]
token_count_summary[model]["total_tokens"] += token_data["total_tokens"]
print(f"Total cost: {total_cost}", flush=True)
for model, counts in token_count_summary.items():
print(
f"Token count summary for model {model}: prompt_tokens: {counts['prompt_tokens']}, completion_tokens: {counts['completion_tokens']}, total_tokens: {counts['total_tokens']}",
flush=True,
)
@classmethod
def start_logging(
cls, history_dict: Optional[Dict] = None, compact: Optional[bool] = True, reset_counter: Optional[bool] = True

View File

@@ -148,9 +148,11 @@ print('Hello world!')
```""",
)
print(conversations)
autogen.ChatCompletion.print_usage_summary()
autogen.ChatCompletion.start_logging(compact=False)
user.send("""Execute temp.py""", assistant)
print(autogen.ChatCompletion.logged_history)
autogen.ChatCompletion.print_usage_summary()
autogen.ChatCompletion.stop_logging()

View File

@@ -260,6 +260,10 @@ The API calls made after this will be automatically logged. They can be retrieve
```python
autogen.ChatCompletion.logged_history
```
There is a function that can be used to print usage summary (total cost, and token count usage from each model):
```python
autogen.ChatCompletion.print_usage_summary()
```
To stop logging, use
```python
autogen.ChatCompletion.stop_logging()
@@ -366,5 +370,13 @@ Set `compact=False` in `start_logging()` to switch.
},
}
```
* Example of printing for usage summary
```
Total cost: <cost>
Token count summary for model <model>: prompt_tokens: <count 1>, completion_tokens: <count 2>, total_tokens: <count 3>
```
It can be seen that the individual API call history contains redundant information of the conversation. For a long conversation the degree of redundancy is high.
The compact history is more efficient and the individual API call history contains more details.