Improving logging in oai.completion to show token_count (#179)

* update * update doc --------- Co-authored-by: Chi Wang <wang.chi@microsoft.com> Co-authored-by: Qingyun Wu <qingyun.wu@psu.edu>
2026-01-26 05:58:24 -05:00 · 2023-10-12 10:31:52 -04:00
parent 6b14bd6609
commit b61aeb6cce
3 changed files with 62 additions and 0 deletions
--- a/autogen/oai/completion.py
+++ b/autogen/oai/completion.py
@@ -9,6 +9,7 @@ from flaml import tune, BlendSearch
 from flaml.tune.space import is_constant
 from flaml.automl.logger import logger_formatter
 from .openai_utils import get_key
+from collections import defaultdict

 try:
    import openai
@@ -157,6 +158,7 @@ class Completion(openai_Completion):
            value = {
                "created_at": [],
                "cost": [],
+                "token_count": [],
            }
            if "messages" in config:
                messages = config["messages"]
@@ -168,6 +170,14 @@ class Completion(openai_Completion):
                key = get_key([config["prompt"]] + [choice.get("text") for choice in response["choices"]])
            value["created_at"].append(cls._count_create)
            value["cost"].append(response["cost"])
+            value["token_count"].append(
+                {
+                    "model": response["model"],
+                    "prompt_tokens": response["usage"]["prompt_tokens"],
+                    "completion_tokens": response["usage"].get("completion_tokens", 0),
+                    "total_tokens": response["usage"]["total_tokens"],
+                }
+            )
            cls._history_dict[key] = value
            cls._count_create += 1
            return
@@ -1067,6 +1077,44 @@ class Completion(openai_Completion):
        """Return the book keeping dictionary."""
        return cls._history_dict

+    @classmethod
+    def print_usage_summary(cls) -> Dict:
+        """Return the usage summary."""
+        if cls._history_dict is None:
+            print("No usage summary available.", flush=True)
+
+        token_count_summary = defaultdict(lambda: {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0})
+
+        if not cls._history_compact:
+            source = cls._history_dict.values()
+            total_cost = sum(msg_pair["response"]["cost"] for msg_pair in source)
+        else:
+            # source = cls._history_dict["token_count"]
+            # total_cost = sum(cls._history_dict['cost'])
+            total_cost = sum(sum(value_list["cost"]) for value_list in cls._history_dict.values())
+            source = (
+                token_data for value_list in cls._history_dict.values() for token_data in value_list["token_count"]
+            )
+
+        for entry in source:
+            if not cls._history_compact:
+                model = entry["response"]["model"]
+                token_data = entry["response"]["usage"]
+            else:
+                model = entry["model"]
+                token_data = entry
+
+            token_count_summary[model]["prompt_tokens"] += token_data["prompt_tokens"]
+            token_count_summary[model]["completion_tokens"] += token_data["completion_tokens"]
+            token_count_summary[model]["total_tokens"] += token_data["total_tokens"]
+
+        print(f"Total cost: {total_cost}", flush=True)
+        for model, counts in token_count_summary.items():
+            print(
+                f"Token count summary for model {model}: prompt_tokens: {counts['prompt_tokens']}, completion_tokens: {counts['completion_tokens']}, total_tokens: {counts['total_tokens']}",
+                flush=True,
+            )
+
    @classmethod
    def start_logging(
        cls, history_dict: Optional[Dict] = None, compact: Optional[bool] = True, reset_counter: Optional[bool] = True
--- a/test/agentchat/test_assistant_agent.py
+++ b/test/agentchat/test_assistant_agent.py
@@ -148,9 +148,11 @@ print('Hello world!')
 ```""",
    )
    print(conversations)
+    autogen.ChatCompletion.print_usage_summary()
    autogen.ChatCompletion.start_logging(compact=False)
    user.send("""Execute temp.py""", assistant)
    print(autogen.ChatCompletion.logged_history)
+    autogen.ChatCompletion.print_usage_summary()
    autogen.ChatCompletion.stop_logging()


--- a/website/docs/Use-Cases/enhanced_inference.md
+++ b/website/docs/Use-Cases/enhanced_inference.md
@@ -260,6 +260,10 @@ The API calls made after this will be automatically logged. They can be retrieve
 ```python
 autogen.ChatCompletion.logged_history
 ```
+There is a function that can be used to print usage summary (total cost, and token count usage from each model):
+```python
+autogen.ChatCompletion.print_usage_summary()
+```
 To stop logging, use
 ```python
 autogen.ChatCompletion.stop_logging()
@@ -366,5 +370,13 @@ Set `compact=False` in `start_logging()` to switch.
    },
 }
 ```
+
+* Example of printing for usage summary
+```
+Total cost: <cost>
+Token count summary for model <model>: prompt_tokens: <count 1>, completion_tokens: <count 2>, total_tokens: <count 3>
+```
+
+
 It can be seen that the individual API call history contains redundant information of the conversation. For a long conversation the degree of redundancy is high.
 The compact history is more efficient and the individual API call history contains more details.