diff --git a/python/packages/autogen-ext/src/autogen_ext/agents/web_surfer/_multimodal_web_surfer.py b/python/packages/autogen-ext/src/autogen_ext/agents/web_surfer/_multimodal_web_surfer.py
index a5281f621..cd4a68c65 100644
--- a/python/packages/autogen-ext/src/autogen_ext/agents/web_surfer/_multimodal_web_surfer.py
+++ b/python/packages/autogen-ext/src/autogen_ext/agents/web_surfer/_multimodal_web_surfer.py
@@ -4,7 +4,6 @@ import io
import json
import logging
import os
-import pathlib
import re
import time
import traceback
@@ -137,6 +136,30 @@ class MultimodalWebSurfer(BaseChatAgent):
chat_message=TextMessage(content=f"Web surfing error:\n\n{traceback.format_exc()}", source=self.name)
)
+ async def on_reset(self, cancellation_token: CancellationToken) -> None:
+ assert self._page is not None
+ self._chat_history.clear()
+ await self._visit_page(self.start_page)
+ if self.to_save_screenshots:
+ current_timestamp = "_" + int(time.time()).__str__()
+ screenshot_png_name = "screenshot" + current_timestamp + ".png"
+ await self._page.screenshot(path=os.path.join(self.debug_dir, screenshot_png_name)) # type: ignore
+ self.logger.info(
+ WebSurferEvent(
+ source=self.name,
+ url=self._page.url,
+ message="Screenshot: " + screenshot_png_name,
+ )
+ )
+
+ self.logger.info(
+ WebSurferEvent(
+ source=self.name,
+ url=self._page.url,
+ message="Resetting browser.",
+ )
+ )
+
async def init(
self,
headless: bool = True,
@@ -209,28 +232,10 @@ class MultimodalWebSurfer(BaseChatAgent):
if not os.path.isdir(self.debug_dir):
os.mkdir(self.debug_dir)
- current_timestamp = "_" + int(time.time()).__str__()
- screenshot_png_name = "screenshot" + current_timestamp + ".png"
- debug_html = os.path.join(self.debug_dir, "screenshot" + current_timestamp + ".html")
- if self.to_save_screenshots:
- async with aiofiles.open(debug_html, "wt") as file:
- await file.write(
- f"""
-
-
-
-
-
-
- """.strip(),
- )
+
if self.to_save_screenshots:
+ current_timestamp = "_" + int(time.time()).__str__()
+ screenshot_png_name = "screenshot" + current_timestamp + ".png"
await self._page.screenshot(path=os.path.join(self.debug_dir, screenshot_png_name))
self.logger.info(
WebSurferEvent(
@@ -239,33 +244,6 @@ class MultimodalWebSurfer(BaseChatAgent):
message="Screenshot: " + screenshot_png_name,
)
)
- self.logger.info(
- f"Multimodal Web Surfer debug screens: {pathlib.Path(os.path.abspath(debug_html)).as_uri()}\n"
- )
-
- async def _reset(self, cancellation_token: CancellationToken) -> None:
- assert self._page is not None
- self._chat_history.clear()
- await self._visit_page(self.start_page)
- if self.to_save_screenshots:
- current_timestamp = "_" + int(time.time()).__str__()
- screenshot_png_name = "screenshot" + current_timestamp + ".png"
- await self._page.screenshot(path=os.path.join(self.debug_dir, screenshot_png_name)) # type: ignore
- self.logger.info(
- WebSurferEvent(
- source=self.name,
- url=self._page.url,
- message="Screenshot: " + screenshot_png_name,
- )
- )
-
- self.logger.info(
- WebSurferEvent(
- source=self.name,
- url=self._page.url,
- message="Resetting browser.",
- )
- )
def _target_name(self, target: str, rects: Dict[str, InteractiveRegion]) -> str | None:
try:
@@ -641,13 +619,6 @@ When deciding between tools, consider if the request can be best addressed by:
assert isinstance(result, dict)
return cast(Dict[str, Any], result)
- async def _get_page_markdown(self) -> str:
- assert self._page is not None
- html = await self._page.evaluate("document.documentElement.outerHTML;")
- # TODO: fix types
- res = self._markdown_converter.convert_stream(io.StringIO(html), file_extension=".html", url=self._page.url) # type: ignore
- return res.text_content # type: ignore
-
async def _on_new_page(self, page: Page) -> None:
self._page = page
assert self._page is not None