Files
OpenHands/openhands/core/message.py
tobitege bc31fb15fe (fix) CodeActAgent: fix issues with vision support in prompts (#3665)
* CodeActAgent: fix message prep if prompt caching is not supported

* fix python version in regen tests workflow

* fix in conftest "mock_completion" method

* add disable_vision to LLMConfig; revert change in message parsing in llm.py

* format messages in several files for completion

* refactored message(s) formatting (llm.py); added vision_is_active()

* fix a unit test

* regenerate: added LOG_TO_FILE and FORCE_REGENERATE env flags

* try to fix path to logs folder in workflow

* llm: prevent index error

* try FORCE_USE_LLM in regenerate

* tweaks everywhere...

* fix 2 random unit test errors :(

* added FORCE_REGENERATE_TESTS=true to regenerate CLI

* fix test_lint_file_fail_typescript again

* double-quotes for env vars in workflow; llm logger set to debug

* fix typo in regenerate

* regenerate iterations now 20; applied iteration counter fix by Li

* regenerate: pass FORCE_REGENERATE flag into env

* fixes for int tests. several mock files updated.

* browsing_agent: fix response_parser.py adding ) to empty response

* test_browse_internet: fix skipif and revert obsolete mock files

* regenerate: fi bracketing for http server start/kill conditions

* disable test_browse_internet for CodeAct*Agents; mock files updated after merge

* missed to include more mock files earlier

* reverts after review feedback from Li

* forgot one

* browsing agent test, partial fixes and updated mock files

* test_browse_internet works in my WSL now!

* adapt unit test test_prompt_caching.py

* add DEBUG to regenerate workflow command

* convert regenerate workflow params to inputs

* more integration test mock files updated

* more files

* test_prompt_caching: restored test_prompt_caching_headers purpose

* file_ops: fix potential exception, like "cross device copy"; fixed mock files accordingly

* reverts/changes wrt feedback from xingyao

* updated docs and config template

* code cleanup wrt review feedback
2024-09-04 17:58:30 +02:00

118 lines
3.4 KiB
Python

from enum import Enum
from typing import Union
from pydantic import BaseModel, Field, model_serializer
from typing_extensions import Literal
from openhands.core.logger import openhands_logger as logger
class ContentType(Enum):
TEXT = 'text'
IMAGE_URL = 'image_url'
class Content(BaseModel):
type: str
cache_prompt: bool = False
@model_serializer
def serialize_model(self):
raise NotImplementedError('Subclasses should implement this method.')
class TextContent(Content):
type: str = ContentType.TEXT.value
text: str
@model_serializer
def serialize_model(self):
data: dict[str, str | dict[str, str]] = {
'type': self.type,
'text': self.text,
}
if self.cache_prompt:
data['cache_control'] = {'type': 'ephemeral'}
return data
class ImageContent(Content):
type: str = ContentType.IMAGE_URL.value
image_urls: list[str]
@model_serializer
def serialize_model(self):
images: list[dict[str, str | dict[str, str]]] = []
for url in self.image_urls:
images.append({'type': self.type, 'image_url': {'url': url}})
if self.cache_prompt and images:
images[-1]['cache_control'] = {'type': 'ephemeral'}
return images
class Message(BaseModel):
role: Literal['user', 'system', 'assistant']
content: list[TextContent | ImageContent] = Field(default=list)
@property
def contains_image(self) -> bool:
return any(isinstance(content, ImageContent) for content in self.content)
@model_serializer
def serialize_model(self) -> dict:
content: list[dict[str, str | dict[str, str]]] = []
for item in self.content:
if isinstance(item, TextContent):
content.append(item.model_dump())
elif isinstance(item, ImageContent):
content.extend(item.model_dump())
return {'content': content, 'role': self.role}
def format_messages(
messages: Union[Message, list[Message]], with_images: bool
) -> list[dict]:
if not isinstance(messages, list):
messages = [messages]
if with_images:
return [message.model_dump() for message in messages]
converted_messages = []
for message in messages:
content_str = ''
role = 'user'
if 'role' in message:
role = message['role']
if isinstance(message, str):
content_str = content_str + message + '\n'
continue
if isinstance(message, dict):
if 'content' in message:
content_str = content_str + message['content'] + '\n'
elif isinstance(message, Message):
role = message.role
for content in message.content:
if isinstance(content, list):
for item in content:
if isinstance(item, TextContent):
content_str = content_str + item.text + '\n'
elif isinstance(content, TextContent):
content_str = content_str + content.text + '\n'
else:
logger.error(
f'>>> `message` is not a string, dict, or Message: {type(message)}'
)
if content_str:
converted_messages.append(
{
'role': role,
'content': content_str,
}
)
return converted_messages