mirror of
https://github.com/danielmiessler/Fabric.git
synced 2026-01-22 04:38:07 -05:00
feat: add multi-content support for images and PDFs in Anthropic client
### CHANGES - Update toMessages to handle multi-content messages with text and attachments - Add contentBlocksFromMessage to convert message parts to Anthropic blocks - Implement support for image URLs including data URLs and base64 images - Add PDF attachment handling via data URLs and URL-based PDFs - Introduce parseDataURL for extracting MIME type and data from data URLs - Create normalizeImageMimeType to standardize supported image MIME types - Add isPDFURL to detect PDF files from URL paths - Refactor system content accumulation to use text extraction from parts - Update tests to verify PDF attachment processing in multi-content messages
This commit is contained in:
@@ -4,7 +4,9 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
neturl "net/url"
|
||||
"os"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
@@ -381,22 +383,29 @@ func (an *Client) toMessages(msgs []*chat.ChatCompletionMessage) (ret []anthropi
|
||||
lastRoleWasUser := false
|
||||
|
||||
for _, msg := range msgs {
|
||||
if strings.TrimSpace(msg.Content) == "" {
|
||||
if strings.TrimSpace(msg.Content) == "" && len(msg.MultiContent) == 0 {
|
||||
continue // Skip empty messages
|
||||
}
|
||||
|
||||
switch msg.Role {
|
||||
case chat.ChatMessageRoleSystem:
|
||||
// Accumulate system content. It will be prepended to the first user message.
|
||||
systemText := messageTextFromParts(msg)
|
||||
if systemText == "" {
|
||||
continue
|
||||
}
|
||||
if systemContent != "" {
|
||||
systemContent += "\\n" + msg.Content
|
||||
systemContent += "\\n" + systemText
|
||||
} else {
|
||||
systemContent = msg.Content
|
||||
systemContent = systemText
|
||||
}
|
||||
case chat.ChatMessageRoleUser:
|
||||
userContent := msg.Content
|
||||
blocks := contentBlocksFromMessage(msg)
|
||||
if len(blocks) == 0 {
|
||||
continue
|
||||
}
|
||||
if isFirstUserMessage && systemContent != "" {
|
||||
userContent = systemContent + "\\n\\n" + userContent
|
||||
blocks = prependSystemContentToBlocks(systemContent, blocks)
|
||||
isFirstUserMessage = false // System content now consumed
|
||||
}
|
||||
if lastRoleWasUser {
|
||||
@@ -404,7 +413,7 @@ func (an *Client) toMessages(msgs []*chat.ChatCompletionMessage) (ret []anthropi
|
||||
// This shouldn't happen with current chatter.go logic but is a safeguard.
|
||||
anthropicMessages = append(anthropicMessages, anthropic.NewAssistantMessage(anthropic.NewTextBlock("Okay.")))
|
||||
}
|
||||
anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(anthropic.NewTextBlock(userContent)))
|
||||
anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(blocks...))
|
||||
lastRoleWasUser = true
|
||||
case chat.ChatMessageRoleAssistant:
|
||||
// If the first message is an assistant message, and we have system content,
|
||||
@@ -435,6 +444,127 @@ func (an *Client) toMessages(msgs []*chat.ChatCompletionMessage) (ret []anthropi
|
||||
return anthropicMessages
|
||||
}
|
||||
|
||||
func messageTextFromParts(msg *chat.ChatCompletionMessage) string {
|
||||
textParts := []string{}
|
||||
if strings.TrimSpace(msg.Content) != "" {
|
||||
textParts = append(textParts, msg.Content)
|
||||
}
|
||||
for _, part := range msg.MultiContent {
|
||||
if part.Type == chat.ChatMessagePartTypeText && strings.TrimSpace(part.Text) != "" {
|
||||
textParts = append(textParts, part.Text)
|
||||
}
|
||||
}
|
||||
return strings.Join(textParts, "\\n")
|
||||
}
|
||||
|
||||
func contentBlocksFromMessage(msg *chat.ChatCompletionMessage) []anthropic.ContentBlockParamUnion {
|
||||
var blocks []anthropic.ContentBlockParamUnion
|
||||
if strings.TrimSpace(msg.Content) != "" {
|
||||
blocks = append(blocks, anthropic.NewTextBlock(msg.Content))
|
||||
}
|
||||
for _, part := range msg.MultiContent {
|
||||
switch part.Type {
|
||||
case chat.ChatMessagePartTypeText:
|
||||
if strings.TrimSpace(part.Text) != "" {
|
||||
blocks = append(blocks, anthropic.NewTextBlock(part.Text))
|
||||
}
|
||||
case chat.ChatMessagePartTypeImageURL:
|
||||
if part.ImageURL == nil || strings.TrimSpace(part.ImageURL.URL) == "" {
|
||||
continue
|
||||
}
|
||||
if block, ok := contentBlockFromAttachmentURL(part.ImageURL.URL); ok {
|
||||
blocks = append(blocks, block)
|
||||
}
|
||||
}
|
||||
}
|
||||
return blocks
|
||||
}
|
||||
|
||||
func prependSystemContentToBlocks(systemContent string, blocks []anthropic.ContentBlockParamUnion) []anthropic.ContentBlockParamUnion {
|
||||
if len(blocks) == 0 {
|
||||
return []anthropic.ContentBlockParamUnion{anthropic.NewTextBlock(systemContent)}
|
||||
}
|
||||
if blocks[0].OfText != nil {
|
||||
blocks[0].OfText.Text = systemContent + "\\n\\n" + blocks[0].OfText.Text
|
||||
return blocks
|
||||
}
|
||||
return append([]anthropic.ContentBlockParamUnion{anthropic.NewTextBlock(systemContent)}, blocks...)
|
||||
}
|
||||
|
||||
func contentBlockFromAttachmentURL(url string) (anthropic.ContentBlockParamUnion, bool) {
|
||||
if strings.HasPrefix(url, "data:") {
|
||||
mimeType, data, ok := parseDataURL(url)
|
||||
if !ok {
|
||||
return anthropic.ContentBlockParamUnion{}, false
|
||||
}
|
||||
if strings.EqualFold(mimeType, "application/pdf") {
|
||||
return anthropic.NewDocumentBlock(anthropic.Base64PDFSourceParam{Data: data}), true
|
||||
}
|
||||
if normalized := normalizeImageMimeType(mimeType); normalized != "" {
|
||||
return anthropic.NewImageBlockBase64(normalized, data), true
|
||||
}
|
||||
return anthropic.ContentBlockParamUnion{}, false
|
||||
}
|
||||
if isPDFURL(url) {
|
||||
return anthropic.NewDocumentBlock(anthropic.URLPDFSourceParam{URL: url}), true
|
||||
}
|
||||
return anthropic.NewImageBlock(anthropic.URLImageSourceParam{URL: url}), true
|
||||
}
|
||||
|
||||
func parseDataURL(value string) (mimeType string, data string, ok bool) {
|
||||
if !strings.HasPrefix(value, "data:") {
|
||||
return "", "", false
|
||||
}
|
||||
withoutPrefix := strings.TrimPrefix(value, "data:")
|
||||
parts := strings.SplitN(withoutPrefix, ",", 2)
|
||||
if len(parts) != 2 {
|
||||
return "", "", false
|
||||
}
|
||||
meta := strings.TrimSpace(parts[0])
|
||||
data = strings.TrimSpace(parts[1])
|
||||
if data == "" {
|
||||
return "", "", false
|
||||
}
|
||||
metaParts := strings.Split(meta, ";")
|
||||
mimeType = strings.TrimSpace(metaParts[0])
|
||||
if mimeType == "" {
|
||||
return "", "", false
|
||||
}
|
||||
hasBase64 := false
|
||||
for _, part := range metaParts[1:] {
|
||||
if strings.EqualFold(strings.TrimSpace(part), "base64") {
|
||||
hasBase64 = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !hasBase64 {
|
||||
return "", "", false
|
||||
}
|
||||
return mimeType, data, true
|
||||
}
|
||||
|
||||
func normalizeImageMimeType(mimeType string) string {
|
||||
normalized := strings.ToLower(strings.TrimSpace(mimeType))
|
||||
switch normalized {
|
||||
case "image/jpg":
|
||||
normalized = "image/jpeg"
|
||||
}
|
||||
switch normalized {
|
||||
case "image/jpeg", "image/png", "image/gif", "image/webp":
|
||||
return normalized
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
func isPDFURL(url string) bool {
|
||||
parsedURL, err := neturl.Parse(url)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return strings.EqualFold(path.Ext(parsedURL.Path), ".pdf")
|
||||
}
|
||||
|
||||
func (an *Client) NeedsRawMode(modelName string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"testing"
|
||||
|
||||
"github.com/anthropics/anthropic-sdk-go"
|
||||
"github.com/danielmiessler/fabric/internal/chat"
|
||||
"github.com/danielmiessler/fabric/internal/domain"
|
||||
)
|
||||
|
||||
@@ -325,3 +326,40 @@ func TestBuildMessageParams_ExplicitTopP(t *testing.T) {
|
||||
t.Errorf("Expected TopP %f, got %f", opts.TopP, params.TopP.Value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestToMessages_MultiContentPDFAttachment(t *testing.T) {
|
||||
client := NewClient()
|
||||
msg := &chat.ChatCompletionMessage{
|
||||
Role: chat.ChatMessageRoleUser,
|
||||
MultiContent: []chat.ChatMessagePart{
|
||||
{
|
||||
Type: chat.ChatMessagePartTypeText,
|
||||
Text: "Summarize this document.",
|
||||
},
|
||||
{
|
||||
Type: chat.ChatMessagePartTypeImageURL,
|
||||
ImageURL: &chat.ChatMessageImageURL{
|
||||
URL: "data:application/pdf;base64,SGVsbG8=",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
messages := client.toMessages([]*chat.ChatCompletionMessage{msg})
|
||||
if len(messages) != 1 {
|
||||
t.Fatalf("Expected 1 message, got %d", len(messages))
|
||||
}
|
||||
if len(messages[0].Content) != 2 {
|
||||
t.Fatalf("Expected 2 content blocks, got %d", len(messages[0].Content))
|
||||
}
|
||||
if messages[0].Content[0].OfText == nil || messages[0].Content[0].OfText.Text != "Summarize this document." {
|
||||
t.Fatalf("Expected first content block to be text, got %#v", messages[0].Content[0])
|
||||
}
|
||||
document := messages[0].Content[1].OfDocument
|
||||
if document == nil || document.Source.OfBase64 == nil {
|
||||
t.Fatalf("Expected second content block to be a base64 document, got %#v", messages[0].Content[1])
|
||||
}
|
||||
if document.Source.OfBase64.Data != "SGVsbG8=" {
|
||||
t.Fatalf("Expected document data to match base64 payload, got %s", document.Source.OfBase64.Data)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user