mirror of
https://github.com/microsoft/autogen.git
synced 2026-02-11 09:25:46 -05:00
Added token counting to TeamOne (#264)
* Added token counting to TeamOne
This commit is contained in:
@@ -35,9 +35,13 @@ async def main() -> None:
|
||||
|
||||
run_context = runtime.start()
|
||||
|
||||
actual_surfer = runtime._get_agent(web_surfer.id) # type: ignore
|
||||
assert isinstance(actual_surfer, MultimodalWebSurfer)
|
||||
await actual_surfer.init(model_client=client, downloads_folder=os.getcwd(), browser_channel="chromium")
|
||||
actual_surfer = await runtime.try_get_underlying_agent_instance(web_surfer.id, type=MultimodalWebSurfer)
|
||||
await actual_surfer.init(
|
||||
model_client=client,
|
||||
downloads_folder=os.getcwd(),
|
||||
start_page="https://www.adamfourney.com",
|
||||
browser_channel="chromium",
|
||||
)
|
||||
|
||||
await runtime.send_message(RequestReplyMessage(), user_proxy.id)
|
||||
await run_context.stop_when_idle()
|
||||
|
||||
@@ -70,6 +70,8 @@ VIEWPORT_WIDTH = 1440
|
||||
MLM_HEIGHT = 765
|
||||
MLM_WIDTH = 1224
|
||||
|
||||
SCREENSHOT_TOKENS = 1105
|
||||
|
||||
logger = logging.getLogger(EVENT_LOGGER_NAME + ".MultimodalWebSurfer")
|
||||
|
||||
|
||||
@@ -718,20 +720,6 @@ When deciding between tools, consider if the request can be best addressed by:
|
||||
|
||||
page_markdown: str = await self._get_page_markdown()
|
||||
|
||||
# TODO: Get token count working
|
||||
|
||||
buffer = page_markdown
|
||||
# buffer: str = ""
|
||||
# for line in re.split(r"([\r\n]+)", page_markdown):
|
||||
# tokens = count_token(buffer + line)
|
||||
# if tokens + 1024 > token_limit: # Leave room for our summary
|
||||
# break
|
||||
# buffer += line
|
||||
|
||||
buffer = buffer.strip()
|
||||
if len(buffer) == 0:
|
||||
return "Nothing to summarize."
|
||||
|
||||
title: str = self._page.url
|
||||
try:
|
||||
title = await self._page.title()
|
||||
@@ -742,29 +730,55 @@ When deciding between tools, consider if the request can be best addressed by:
|
||||
screenshot = Image.open(io.BytesIO(await self._page.screenshot()))
|
||||
scaled_screenshot = screenshot.resize((MLM_WIDTH, MLM_HEIGHT))
|
||||
screenshot.close()
|
||||
ag_image = AGImage.from_pil(scaled_screenshot)
|
||||
|
||||
prompt = f"We are visiting the webpage '{title}'. Its full-text contents are pasted below, along with a screenshot of the page's current viewport."
|
||||
if question is not None:
|
||||
prompt += (
|
||||
f" Please summarize the webpage into one or two paragraphs with respect to '{question}':\n\n{buffer}"
|
||||
)
|
||||
else:
|
||||
prompt += f" Please summarize the webpage into one or two paragraphs:\n\n{buffer}"
|
||||
|
||||
# Add the multimodal message and make the request
|
||||
# Prepare the system prompt
|
||||
messages: List[LLMMessage] = []
|
||||
messages.append(
|
||||
SystemMessage(content="You are a helpful assistant that can summarize long documents to answer question.")
|
||||
)
|
||||
|
||||
# Prepare the main prompt
|
||||
prompt = f"We are visiting the webpage '{title}'. Its full-text content are pasted below, along with a screenshot of the page's current viewport."
|
||||
if question is not None:
|
||||
prompt += f" Please summarize the webpage into one or two paragraphs with respect to '{question}':\n\n"
|
||||
else:
|
||||
prompt += " Please summarize the webpage into one or two paragraphs:\n\n"
|
||||
|
||||
# Grow the buffer (which is added to the prompt) until we overflow the context window or run out of lines
|
||||
buffer = ""
|
||||
for line in re.split(r"([\r\n]+)", page_markdown):
|
||||
message = UserMessage(
|
||||
# content=[
|
||||
prompt + buffer + line,
|
||||
# ag_image,
|
||||
# ],
|
||||
source=self.metadata["name"],
|
||||
)
|
||||
|
||||
remaining = self._model_client.remaining_tokens(messages + [message])
|
||||
if remaining > SCREENSHOT_TOKENS:
|
||||
buffer += line
|
||||
else:
|
||||
break
|
||||
|
||||
# Nothing to do
|
||||
buffer = buffer.strip()
|
||||
if len(buffer) == 0:
|
||||
return "Nothing to summarize."
|
||||
|
||||
# Append the message
|
||||
messages.append(
|
||||
UserMessage(
|
||||
content=[
|
||||
prompt,
|
||||
AGImage.from_pil(scaled_screenshot),
|
||||
prompt + buffer,
|
||||
ag_image,
|
||||
],
|
||||
source=self.metadata["name"],
|
||||
)
|
||||
)
|
||||
|
||||
# Generate the response
|
||||
response = await self._model_client.create(messages)
|
||||
scaled_screenshot.close()
|
||||
assert isinstance(response.content, str)
|
||||
|
||||
Reference in New Issue
Block a user