mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-01-10 07:18:10 -05:00
feat(agent): add BrowseURLAction to CodeAct (produce markdown from URL) (#5285)
This commit is contained in:
@@ -19,6 +19,7 @@ from openhands.events.action import (
|
||||
AgentDelegateAction,
|
||||
AgentFinishAction,
|
||||
BrowseInteractiveAction,
|
||||
BrowseURLAction,
|
||||
CmdRunAction,
|
||||
FileEditAction,
|
||||
IPythonRunCellAction,
|
||||
@@ -266,6 +267,30 @@ StrReplaceEditorTool = ChatCompletionToolParam(
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
_WEB_DESCRIPTION = """Read (convert to markdown) content from a webpage. You should prefer using the `webpage_read` tool over the `browser` tool, but do use the `browser` tool if you need to interact with a webpage (e.g., click a button, fill out a form, etc.).
|
||||
|
||||
You may use the `webpage_read` tool to read content from a webpage, and even search the webpage content using a Google search query (e.g., url=`https://www.google.com/search?q=YOUR_QUERY`).
|
||||
"""
|
||||
|
||||
WebReadTool = ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name='web_read',
|
||||
description=_WEB_DESCRIPTION,
|
||||
parameters={
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'url': {
|
||||
'type': 'string',
|
||||
'description': 'The URL of the webpage to read. You can also use a Google search query here (e.g., `https://www.google.com/search?q=YOUR_QUERY`).',
|
||||
}
|
||||
},
|
||||
'required': ['url'],
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
# from browsergym/core/action/highlevel.py
|
||||
_browser_action_space = HighLevelActionSet(
|
||||
subsets=['bid', 'nav'],
|
||||
@@ -274,7 +299,7 @@ _browser_action_space = HighLevelActionSet(
|
||||
)
|
||||
|
||||
|
||||
_BROWSER_DESCRIPTION = """Interact with the browser using Python code.
|
||||
_BROWSER_DESCRIPTION = """Interact with the browser using Python code. Use it ONLY when you need to interact with a webpage.
|
||||
|
||||
See the description of "code" parameter for more details.
|
||||
|
||||
@@ -484,6 +509,8 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
|
||||
action = IPythonRunCellAction(code=code, include_extra=False)
|
||||
elif tool_call.function.name == 'browser':
|
||||
action = BrowseInteractiveAction(browser_actions=arguments['code'])
|
||||
elif tool_call.function.name == 'web_read':
|
||||
action = BrowseURLAction(url=arguments['url'])
|
||||
else:
|
||||
raise FunctionCallNotExistsError(
|
||||
f'Tool {tool_call.function.name} is not registered. (arguments: {arguments}). Please check the tool name and retry with an existing tool.'
|
||||
@@ -516,6 +543,7 @@ def get_tools(
|
||||
) -> list[ChatCompletionToolParam]:
|
||||
tools = [CmdRunTool, FinishTool]
|
||||
if codeact_enable_browsing:
|
||||
tools.append(WebReadTool)
|
||||
tools.append(BrowserTool)
|
||||
if codeact_enable_jupyter:
|
||||
tools.append(IPythonTool)
|
||||
|
||||
Reference in New Issue
Block a user