Files
OpenHands/opendevin/core/message.py
Kaushik Deka 415843476c Feat: Add Vision Input Support for LLM with Vision Capabilities (#2848)
* add image feature

* fix-linting

* check model support for images

* add comment

* Add image support to other models

* Add images to chat

* fix linting

* fix test issues

* refactor variable names and import

* fix tests

* fix chat message tests

* fix linting

* add pydantic class message

* use message

* remove redundant comments

* remove redundant comments

* change Message class

* remove unintended change

* fix integration tests using regenerate.sh

* rename image_bas64 to images_url, fix tests

* rename Message.py to message, change reminder append logic, add unit tests

* remove comment, fix error to merge

* codeact_swe_agent

* fix f string

* update eventstream integration tests

* add missing if check in codeact_swe_agent

* update integration tests

* Update frontend/src/components/chat/ChatInput.tsx

* Update frontend/src/components/chat/ChatInput.tsx

* Update frontend/src/components/chat/ChatInput.tsx

* Update frontend/src/components/chat/ChatInput.tsx

* Update frontend/src/components/chat/ChatMessage.tsx

---------

Co-authored-by: tobitege <tobitege@gmx.de>
Co-authored-by: Xingyao Wang <xingyao6@illinois.edu>
Co-authored-by: sp.wack <83104063+amanape@users.noreply.github.com>
2024-08-04 02:26:22 +08:00

60 lines
1.6 KiB
Python

from enum import Enum
from pydantic import BaseModel, Field, model_serializer
from typing_extensions import Literal
class ContentType(Enum):
TEXT = 'text'
IMAGE_URL = 'image_url'
class Content(BaseModel):
type: ContentType
@model_serializer
def serialize_model(self):
raise NotImplementedError('Subclasses should implement this method.')
class TextContent(Content):
type: ContentType = ContentType.TEXT
text: str
@model_serializer
def serialize_model(self):
return {'type': self.type.value, 'text': self.text}
class ImageContent(Content):
type: ContentType = ContentType.IMAGE_URL
image_urls: list[str]
@model_serializer
def serialize_model(self):
images: list[dict[str, str | dict[str, str]]] = []
for url in self.image_urls:
images.append({'type': self.type.value, 'image_url': {'url': url}})
return images
class Message(BaseModel):
role: Literal['user', 'system', 'assistant']
content: list[TextContent | ImageContent] = Field(default=list)
@property
def contains_image(self) -> bool:
return any(isinstance(content, ImageContent) for content in self.content)
@model_serializer
def serialize_model(self) -> dict:
content: list[dict[str, str | dict[str, str]]] = []
for item in self.content:
if isinstance(item, TextContent):
content.append(item.model_dump())
elif isinstance(item, ImageContent):
content.extend(item.model_dump())
return {'role': self.role, 'content': content}