mirror of
https://github.com/microsoft/autogen.git
synced 2026-01-26 05:58:24 -05:00
Improving logging in oai.completion to show token_count (#179)
* update * update doc --------- Co-authored-by: Chi Wang <wang.chi@microsoft.com> Co-authored-by: Qingyun Wu <qingyun.wu@psu.edu>
This commit is contained in:
@@ -9,6 +9,7 @@ from flaml import tune, BlendSearch
|
||||
from flaml.tune.space import is_constant
|
||||
from flaml.automl.logger import logger_formatter
|
||||
from .openai_utils import get_key
|
||||
from collections import defaultdict
|
||||
|
||||
try:
|
||||
import openai
|
||||
@@ -157,6 +158,7 @@ class Completion(openai_Completion):
|
||||
value = {
|
||||
"created_at": [],
|
||||
"cost": [],
|
||||
"token_count": [],
|
||||
}
|
||||
if "messages" in config:
|
||||
messages = config["messages"]
|
||||
@@ -168,6 +170,14 @@ class Completion(openai_Completion):
|
||||
key = get_key([config["prompt"]] + [choice.get("text") for choice in response["choices"]])
|
||||
value["created_at"].append(cls._count_create)
|
||||
value["cost"].append(response["cost"])
|
||||
value["token_count"].append(
|
||||
{
|
||||
"model": response["model"],
|
||||
"prompt_tokens": response["usage"]["prompt_tokens"],
|
||||
"completion_tokens": response["usage"].get("completion_tokens", 0),
|
||||
"total_tokens": response["usage"]["total_tokens"],
|
||||
}
|
||||
)
|
||||
cls._history_dict[key] = value
|
||||
cls._count_create += 1
|
||||
return
|
||||
@@ -1067,6 +1077,44 @@ class Completion(openai_Completion):
|
||||
"""Return the book keeping dictionary."""
|
||||
return cls._history_dict
|
||||
|
||||
@classmethod
|
||||
def print_usage_summary(cls) -> Dict:
|
||||
"""Return the usage summary."""
|
||||
if cls._history_dict is None:
|
||||
print("No usage summary available.", flush=True)
|
||||
|
||||
token_count_summary = defaultdict(lambda: {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0})
|
||||
|
||||
if not cls._history_compact:
|
||||
source = cls._history_dict.values()
|
||||
total_cost = sum(msg_pair["response"]["cost"] for msg_pair in source)
|
||||
else:
|
||||
# source = cls._history_dict["token_count"]
|
||||
# total_cost = sum(cls._history_dict['cost'])
|
||||
total_cost = sum(sum(value_list["cost"]) for value_list in cls._history_dict.values())
|
||||
source = (
|
||||
token_data for value_list in cls._history_dict.values() for token_data in value_list["token_count"]
|
||||
)
|
||||
|
||||
for entry in source:
|
||||
if not cls._history_compact:
|
||||
model = entry["response"]["model"]
|
||||
token_data = entry["response"]["usage"]
|
||||
else:
|
||||
model = entry["model"]
|
||||
token_data = entry
|
||||
|
||||
token_count_summary[model]["prompt_tokens"] += token_data["prompt_tokens"]
|
||||
token_count_summary[model]["completion_tokens"] += token_data["completion_tokens"]
|
||||
token_count_summary[model]["total_tokens"] += token_data["total_tokens"]
|
||||
|
||||
print(f"Total cost: {total_cost}", flush=True)
|
||||
for model, counts in token_count_summary.items():
|
||||
print(
|
||||
f"Token count summary for model {model}: prompt_tokens: {counts['prompt_tokens']}, completion_tokens: {counts['completion_tokens']}, total_tokens: {counts['total_tokens']}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def start_logging(
|
||||
cls, history_dict: Optional[Dict] = None, compact: Optional[bool] = True, reset_counter: Optional[bool] = True
|
||||
|
||||
@@ -148,9 +148,11 @@ print('Hello world!')
|
||||
```""",
|
||||
)
|
||||
print(conversations)
|
||||
autogen.ChatCompletion.print_usage_summary()
|
||||
autogen.ChatCompletion.start_logging(compact=False)
|
||||
user.send("""Execute temp.py""", assistant)
|
||||
print(autogen.ChatCompletion.logged_history)
|
||||
autogen.ChatCompletion.print_usage_summary()
|
||||
autogen.ChatCompletion.stop_logging()
|
||||
|
||||
|
||||
|
||||
@@ -260,6 +260,10 @@ The API calls made after this will be automatically logged. They can be retrieve
|
||||
```python
|
||||
autogen.ChatCompletion.logged_history
|
||||
```
|
||||
There is a function that can be used to print usage summary (total cost, and token count usage from each model):
|
||||
```python
|
||||
autogen.ChatCompletion.print_usage_summary()
|
||||
```
|
||||
To stop logging, use
|
||||
```python
|
||||
autogen.ChatCompletion.stop_logging()
|
||||
@@ -366,5 +370,13 @@ Set `compact=False` in `start_logging()` to switch.
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
* Example of printing for usage summary
|
||||
```
|
||||
Total cost: <cost>
|
||||
Token count summary for model <model>: prompt_tokens: <count 1>, completion_tokens: <count 2>, total_tokens: <count 3>
|
||||
```
|
||||
|
||||
|
||||
It can be seen that the individual API call history contains redundant information of the conversation. For a long conversation the degree of redundancy is high.
|
||||
The compact history is more efficient and the individual API call history contains more details.
|
||||
|
||||
Reference in New Issue
Block a user