mirror of
https://github.com/microsoft/autogen.git
synced 2026-04-20 03:02:16 -04:00
Added token counting to TeamOne (#264)
* Added token counting to TeamOne
This commit is contained in:
@@ -23,19 +23,22 @@ from team_one.agents.file_surfer import FileSurfer
|
||||
from team_one.utils import LogHandler, message_content_to_str
|
||||
|
||||
import re
|
||||
import tiktoken
|
||||
|
||||
from agnext.components.models import AssistantMessage
|
||||
|
||||
encoding = None
|
||||
def count_token(value: str) -> int:
|
||||
# TODO:: Migrate to model_client.count_tokens
|
||||
global encoding
|
||||
if encoding is None:
|
||||
encoding = tiktoken.encoding_for_model("gpt-4o-2024-05-13")
|
||||
return len(encoding.encode(value))
|
||||
|
||||
async def response_preparer(task: str, source: str, client: ChatCompletionClient, transcript: List[LLMMessage]) -> str:
|
||||
messages: List[LLMMessage] = [
|
||||
UserMessage(
|
||||
content=f"Earlier you were asked the following:\n\n{task}\n\nYour team then worked diligently to address that request. Here is a transcript of that conversation:",
|
||||
source=source,
|
||||
)
|
||||
]
|
||||
messages: List[LLMMessage] = []
|
||||
|
||||
# copy them to this context
|
||||
# copy them to this context
|
||||
for message in transcript:
|
||||
messages.append(
|
||||
UserMessage(
|
||||
@@ -45,6 +48,18 @@ async def response_preparer(task: str, source: str, client: ChatCompletionClient
|
||||
)
|
||||
)
|
||||
|
||||
# Remove messages until we are within 2k of the context window limit
|
||||
while len(messages) and client.remaining_tokens( messages ) < 2000:
|
||||
messages.pop(0)
|
||||
|
||||
# Add the preamble
|
||||
messages.insert(0,
|
||||
UserMessage(
|
||||
content=f"Earlier you were asked the following:\n\n{task}\n\nYour team then worked diligently to address that request. Here is a transcript of that conversation:",
|
||||
source=source,
|
||||
)
|
||||
)
|
||||
|
||||
# ask for the final answer
|
||||
messages.append(
|
||||
UserMessage(
|
||||
@@ -164,8 +179,8 @@ async def main() -> None:
|
||||
mdconverter = MarkdownConverter()
|
||||
res = mdconverter.convert(filename)
|
||||
if res.text_content:
|
||||
#if count_token(res.text_content) < 8000: # Don't put overly-large documents into the prompt
|
||||
filename_prompt += "\n\nHere are the file's contents:\n\n" + res.text_content
|
||||
if count_token(res.text_content) < 8000: # Don't put overly-large documents into the prompt
|
||||
filename_prompt += "\n\nHere are the file's contents:\n\n" + res.text_content
|
||||
except UnsupportedFormatException:
|
||||
pass
|
||||
|
||||
|
||||
@@ -35,9 +35,13 @@ async def main() -> None:
|
||||
|
||||
run_context = runtime.start()
|
||||
|
||||
actual_surfer = runtime._get_agent(web_surfer.id) # type: ignore
|
||||
assert isinstance(actual_surfer, MultimodalWebSurfer)
|
||||
await actual_surfer.init(model_client=client, downloads_folder=os.getcwd(), browser_channel="chromium")
|
||||
actual_surfer = await runtime.try_get_underlying_agent_instance(web_surfer.id, type=MultimodalWebSurfer)
|
||||
await actual_surfer.init(
|
||||
model_client=client,
|
||||
downloads_folder=os.getcwd(),
|
||||
start_page="https://www.adamfourney.com",
|
||||
browser_channel="chromium",
|
||||
)
|
||||
|
||||
await runtime.send_message(RequestReplyMessage(), user_proxy.id)
|
||||
await run_context.stop_when_idle()
|
||||
|
||||
@@ -70,6 +70,8 @@ VIEWPORT_WIDTH = 1440
|
||||
MLM_HEIGHT = 765
|
||||
MLM_WIDTH = 1224
|
||||
|
||||
SCREENSHOT_TOKENS = 1105
|
||||
|
||||
logger = logging.getLogger(EVENT_LOGGER_NAME + ".MultimodalWebSurfer")
|
||||
|
||||
|
||||
@@ -718,20 +720,6 @@ When deciding between tools, consider if the request can be best addressed by:
|
||||
|
||||
page_markdown: str = await self._get_page_markdown()
|
||||
|
||||
# TODO: Get token count working
|
||||
|
||||
buffer = page_markdown
|
||||
# buffer: str = ""
|
||||
# for line in re.split(r"([\r\n]+)", page_markdown):
|
||||
# tokens = count_token(buffer + line)
|
||||
# if tokens + 1024 > token_limit: # Leave room for our summary
|
||||
# break
|
||||
# buffer += line
|
||||
|
||||
buffer = buffer.strip()
|
||||
if len(buffer) == 0:
|
||||
return "Nothing to summarize."
|
||||
|
||||
title: str = self._page.url
|
||||
try:
|
||||
title = await self._page.title()
|
||||
@@ -742,29 +730,55 @@ When deciding between tools, consider if the request can be best addressed by:
|
||||
screenshot = Image.open(io.BytesIO(await self._page.screenshot()))
|
||||
scaled_screenshot = screenshot.resize((MLM_WIDTH, MLM_HEIGHT))
|
||||
screenshot.close()
|
||||
ag_image = AGImage.from_pil(scaled_screenshot)
|
||||
|
||||
prompt = f"We are visiting the webpage '{title}'. Its full-text contents are pasted below, along with a screenshot of the page's current viewport."
|
||||
if question is not None:
|
||||
prompt += (
|
||||
f" Please summarize the webpage into one or two paragraphs with respect to '{question}':\n\n{buffer}"
|
||||
)
|
||||
else:
|
||||
prompt += f" Please summarize the webpage into one or two paragraphs:\n\n{buffer}"
|
||||
|
||||
# Add the multimodal message and make the request
|
||||
# Prepare the system prompt
|
||||
messages: List[LLMMessage] = []
|
||||
messages.append(
|
||||
SystemMessage(content="You are a helpful assistant that can summarize long documents to answer question.")
|
||||
)
|
||||
|
||||
# Prepare the main prompt
|
||||
prompt = f"We are visiting the webpage '{title}'. Its full-text content are pasted below, along with a screenshot of the page's current viewport."
|
||||
if question is not None:
|
||||
prompt += f" Please summarize the webpage into one or two paragraphs with respect to '{question}':\n\n"
|
||||
else:
|
||||
prompt += " Please summarize the webpage into one or two paragraphs:\n\n"
|
||||
|
||||
# Grow the buffer (which is added to the prompt) until we overflow the context window or run out of lines
|
||||
buffer = ""
|
||||
for line in re.split(r"([\r\n]+)", page_markdown):
|
||||
message = UserMessage(
|
||||
# content=[
|
||||
prompt + buffer + line,
|
||||
# ag_image,
|
||||
# ],
|
||||
source=self.metadata["name"],
|
||||
)
|
||||
|
||||
remaining = self._model_client.remaining_tokens(messages + [message])
|
||||
if remaining > SCREENSHOT_TOKENS:
|
||||
buffer += line
|
||||
else:
|
||||
break
|
||||
|
||||
# Nothing to do
|
||||
buffer = buffer.strip()
|
||||
if len(buffer) == 0:
|
||||
return "Nothing to summarize."
|
||||
|
||||
# Append the message
|
||||
messages.append(
|
||||
UserMessage(
|
||||
content=[
|
||||
prompt,
|
||||
AGImage.from_pil(scaled_screenshot),
|
||||
prompt + buffer,
|
||||
ag_image,
|
||||
],
|
||||
source=self.metadata["name"],
|
||||
)
|
||||
)
|
||||
|
||||
# Generate the response
|
||||
response = await self._model_client.create(messages)
|
||||
scaled_screenshot.close()
|
||||
assert isinstance(response.content, str)
|
||||
|
||||
Reference in New Issue
Block a user