Added token counting to TeamOne (#264)

* Added token counting to TeamOne
2026-04-20 03:02:16 -04:00 · 2024-07-26 09:45:41 -07:00
parent 25b9a91328
commit 61dec0a3ca
3 changed files with 70 additions and 37 deletions
--- a/python/benchmarks/GAIA/Templates/TeamOne/scenario.py
+++ b/python/benchmarks/GAIA/Templates/TeamOne/scenario.py
@@ -23,19 +23,22 @@ from team_one.agents.file_surfer import FileSurfer
 from team_one.utils import LogHandler, message_content_to_str

 import re
+import tiktoken

 from agnext.components.models import AssistantMessage

+encoding = None 
+def count_token(value: str) -> int:
+    # TODO:: Migrate to model_client.count_tokens
+    global encoding
+    if encoding is None:
+        encoding = tiktoken.encoding_for_model("gpt-4o-2024-05-13")
+    return len(encoding.encode(value))

 async def response_preparer(task: str, source: str, client: ChatCompletionClient, transcript: List[LLMMessage]) -> str:
-    messages: List[LLMMessage] = [
-        UserMessage(
-            content=f"Earlier you were asked the following:\n\n{task}\n\nYour team then worked diligently to address that request. Here is a transcript of that conversation:",
-            source=source,
-        )
-    ]
+    messages: List[LLMMessage] = []

-    # copy them to this context
+    # copy them to this context 
    for message in transcript:
        messages.append(
            UserMessage(
@@ -45,6 +48,18 @@ async def response_preparer(task: str, source: str, client: ChatCompletionClient
            )
        )

+    # Remove messages until we are within 2k of the context window limit
+    while len(messages) and client.remaining_tokens( messages ) < 2000:
+        messages.pop(0)
+
+    # Add the preamble
+    messages.insert(0,
+        UserMessage(
+            content=f"Earlier you were asked the following:\n\n{task}\n\nYour team then worked diligently to address that request. Here is a transcript of that conversation:",
+            source=source,
+        )
+    )
+
    # ask for the final answer
    messages.append(
        UserMessage(
@@ -164,8 +179,8 @@ async def main() -> None:
            mdconverter = MarkdownConverter()
            res = mdconverter.convert(filename)
            if res.text_content:
-                #if count_token(res.text_content) < 8000:  # Don't put overly-large documents into the prompt
-                filename_prompt += "\n\nHere are the file's contents:\n\n" + res.text_content
+                if count_token(res.text_content) < 8000:  # Don't put overly-large documents into the prompt
+                    filename_prompt += "\n\nHere are the file's contents:\n\n" + res.text_content
        except UnsupportedFormatException:
            pass

--- a/python/teams/team-one/examples/example_websurfer.py
+++ b/python/teams/team-one/examples/example_websurfer.py
@@ -35,9 +35,13 @@ async def main() -> None:

    run_context = runtime.start()

-    actual_surfer = runtime._get_agent(web_surfer.id)  # type: ignore
-    assert isinstance(actual_surfer, MultimodalWebSurfer)
-    await actual_surfer.init(model_client=client, downloads_folder=os.getcwd(), browser_channel="chromium")
+    actual_surfer = await runtime.try_get_underlying_agent_instance(web_surfer.id, type=MultimodalWebSurfer)
+    await actual_surfer.init(
+        model_client=client,
+        downloads_folder=os.getcwd(),
+        start_page="https://www.adamfourney.com",
+        browser_channel="chromium",
+    )

    await runtime.send_message(RequestReplyMessage(), user_proxy.id)
    await run_context.stop_when_idle()
--- a/python/teams/team-one/src/team_one/agents/multimodal_web_surfer/multimodal_web_surfer.py
+++ b/python/teams/team-one/src/team_one/agents/multimodal_web_surfer/multimodal_web_surfer.py
@@ -70,6 +70,8 @@ VIEWPORT_WIDTH = 1440
 MLM_HEIGHT = 765
 MLM_WIDTH = 1224

+SCREENSHOT_TOKENS = 1105
+
 logger = logging.getLogger(EVENT_LOGGER_NAME + ".MultimodalWebSurfer")


@@ -718,20 +720,6 @@ When deciding between tools, consider if the request can be best addressed by:

        page_markdown: str = await self._get_page_markdown()

-        # TODO: Get token count working
-
-        buffer = page_markdown
-        # buffer: str = ""
-        # for line in re.split(r"([\r\n]+)", page_markdown):
-        #    tokens = count_token(buffer + line)
-        #    if tokens + 1024 > token_limit:  # Leave room for our summary
-        #        break
-        #    buffer += line
-
-        buffer = buffer.strip()
-        if len(buffer) == 0:
-            return "Nothing to summarize."
-
        title: str = self._page.url
        try:
            title = await self._page.title()
@@ -742,29 +730,55 @@ When deciding between tools, consider if the request can be best addressed by:
        screenshot = Image.open(io.BytesIO(await self._page.screenshot()))
        scaled_screenshot = screenshot.resize((MLM_WIDTH, MLM_HEIGHT))
        screenshot.close()
+        ag_image = AGImage.from_pil(scaled_screenshot)

-        prompt = f"We are visiting the webpage '{title}'. Its full-text contents are pasted below, along with a screenshot of the page's current viewport."
-        if question is not None:
-            prompt += (
-                f" Please summarize the webpage into one or two paragraphs with respect to '{question}':\n\n{buffer}"
-            )
-        else:
-            prompt += f" Please summarize the webpage into one or two paragraphs:\n\n{buffer}"
-
-        # Add the multimodal message and make the request
+        # Prepare the system prompt
        messages: List[LLMMessage] = []
        messages.append(
            SystemMessage(content="You are a helpful assistant that can summarize long documents to answer question.")
        )
+
+        # Prepare the main prompt
+        prompt = f"We are visiting the webpage '{title}'. Its full-text content are pasted below, along with a screenshot of the page's current viewport."
+        if question is not None:
+            prompt += f" Please summarize the webpage into one or two paragraphs with respect to '{question}':\n\n"
+        else:
+            prompt += " Please summarize the webpage into one or two paragraphs:\n\n"
+
+        # Grow the buffer (which is added to the prompt) until we overflow the context window or run out of lines
+        buffer = ""
+        for line in re.split(r"([\r\n]+)", page_markdown):
+            message = UserMessage(
+                # content=[
+                prompt + buffer + line,
+                #    ag_image,
+                # ],
+                source=self.metadata["name"],
+            )
+
+            remaining = self._model_client.remaining_tokens(messages + [message])
+            if remaining > SCREENSHOT_TOKENS:
+                buffer += line
+            else:
+                break
+
+        # Nothing to do
+        buffer = buffer.strip()
+        if len(buffer) == 0:
+            return "Nothing to summarize."
+
+        # Append the message
        messages.append(
            UserMessage(
                content=[
-                    prompt,
-                    AGImage.from_pil(scaled_screenshot),
+                    prompt + buffer,
+                    ag_image,
                ],
                source=self.metadata["name"],
            )
        )
+
+        # Generate the response
        response = await self._model_client.create(messages)
        scaled_screenshot.close()
        assert isinstance(response.content, str)