Merge pull request #1855 from ksylvan/kayvan/ollama_image_handling

feat: add image attachment support for Ollama vision models
This commit is contained in:
Kayvan Sylvan
2025-12-08 08:07:33 +08:00
committed by GitHub
2 changed files with 103 additions and 13 deletions

View File

@@ -0,0 +1,7 @@
### PR [#1855](https://github.com/danielmiessler/Fabric/pull/1855) by [ksylvan](https://github.com/ksylvan): feat: add image attachment support for Ollama vision models
- Add multi-modal image support to Ollama client
- Implement convertMessage to handle multi-content chat messages
- Add loadImageBytes to fetch images from URLs
- Support base64 data URLs for inline images
- Handle HTTP image URLs with context propagation

View File

@@ -2,7 +2,9 @@ package ollama
import (
"context"
"encoding/base64"
"fmt"
"io"
"net/http"
"net/url"
"os"
@@ -10,11 +12,10 @@ import (
"time"
"github.com/danielmiessler/fabric/internal/chat"
ollamaapi "github.com/ollama/ollama/api"
"github.com/samber/lo"
"github.com/danielmiessler/fabric/internal/domain"
debuglog "github.com/danielmiessler/fabric/internal/log"
"github.com/danielmiessler/fabric/internal/plugins"
ollamaapi "github.com/ollama/ollama/api"
)
const defaultBaseUrl = "http://localhost:11434"
@@ -48,6 +49,7 @@ type Client struct {
apiUrl *url.URL
client *ollamaapi.Client
ApiHttpTimeout *plugins.SetupQuestion
httpClient *http.Client
}
type transport_sec struct {
@@ -84,7 +86,8 @@ func (o *Client) configure() (err error) {
}
}
o.client = ollamaapi.NewClient(o.apiUrl, &http.Client{Timeout: timeout, Transport: &transport_sec{underlyingTransport: http.DefaultTransport, ApiKey: o.ApiKey}})
o.httpClient = &http.Client{Timeout: timeout, Transport: &transport_sec{underlyingTransport: http.DefaultTransport, ApiKey: o.ApiKey}}
o.client = ollamaapi.NewClient(o.apiUrl, o.httpClient)
return
}
@@ -104,15 +107,18 @@ func (o *Client) ListModels() (ret []string, err error) {
}
func (o *Client) SendStream(msgs []*chat.ChatCompletionMessage, opts *domain.ChatOptions, channel chan string) (err error) {
req := o.createChatRequest(msgs, opts)
ctx := context.Background()
var req ollamaapi.ChatRequest
if req, err = o.createChatRequest(ctx, msgs, opts); err != nil {
return
}
respFunc := func(resp ollamaapi.ChatResponse) (streamErr error) {
channel <- resp.Message.Content
return
}
ctx := context.Background()
if err = o.client.Chat(ctx, &req, respFunc); err != nil {
return
}
@@ -124,7 +130,10 @@ func (o *Client) SendStream(msgs []*chat.ChatCompletionMessage, opts *domain.Cha
func (o *Client) Send(ctx context.Context, msgs []*chat.ChatCompletionMessage, opts *domain.ChatOptions) (ret string, err error) {
bf := false
req := o.createChatRequest(msgs, opts)
var req ollamaapi.ChatRequest
if req, err = o.createChatRequest(ctx, msgs, opts); err != nil {
return
}
req.Stream = &bf
respFunc := func(resp ollamaapi.ChatResponse) (streamErr error) {
@@ -133,15 +142,18 @@ func (o *Client) Send(ctx context.Context, msgs []*chat.ChatCompletionMessage, o
}
if err = o.client.Chat(ctx, &req, respFunc); err != nil {
fmt.Printf("FRED --> %s\n", err)
debuglog.Debug(debuglog.Basic, "Ollama chat request failed: %v\n", err)
}
return
}
func (o *Client) createChatRequest(msgs []*chat.ChatCompletionMessage, opts *domain.ChatOptions) (ret ollamaapi.ChatRequest) {
messages := lo.Map(msgs, func(message *chat.ChatCompletionMessage, _ int) (ret ollamaapi.Message) {
return ollamaapi.Message{Role: message.Role, Content: message.Content}
})
func (o *Client) createChatRequest(ctx context.Context, msgs []*chat.ChatCompletionMessage, opts *domain.ChatOptions) (ret ollamaapi.ChatRequest, err error) {
messages := make([]ollamaapi.Message, len(msgs))
for i, message := range msgs {
if messages[i], err = o.convertMessage(ctx, message); err != nil {
return
}
}
options := map[string]interface{}{
"temperature": opts.Temperature,
@@ -162,6 +174,77 @@ func (o *Client) createChatRequest(msgs []*chat.ChatCompletionMessage, opts *dom
return
}
func (o *Client) convertMessage(ctx context.Context, message *chat.ChatCompletionMessage) (ret ollamaapi.Message, err error) {
ret = ollamaapi.Message{Role: message.Role, Content: message.Content}
if len(message.MultiContent) == 0 {
return
}
// Pre-allocate with capacity hint
textParts := make([]string, 0, len(message.MultiContent))
if strings.TrimSpace(ret.Content) != "" {
textParts = append(textParts, strings.TrimSpace(ret.Content))
}
for _, part := range message.MultiContent {
switch part.Type {
case chat.ChatMessagePartTypeText:
if trimmed := strings.TrimSpace(part.Text); trimmed != "" {
textParts = append(textParts, trimmed)
}
case chat.ChatMessagePartTypeImageURL:
// Nil guard
if part.ImageURL == nil || part.ImageURL.URL == "" {
continue
}
var img []byte
if img, err = o.loadImageBytes(ctx, part.ImageURL.URL); err != nil {
return
}
ret.Images = append(ret.Images, ollamaapi.ImageData(img))
}
}
ret.Content = strings.Join(textParts, "\n")
return
}
func (o *Client) loadImageBytes(ctx context.Context, imageURL string) (ret []byte, err error) {
// Handle data URLs (base64 encoded)
if strings.HasPrefix(imageURL, "data:") {
parts := strings.SplitN(imageURL, ",", 2)
if len(parts) != 2 {
err = fmt.Errorf("invalid data URL format")
return
}
if ret, err = base64.StdEncoding.DecodeString(parts[1]); err != nil {
err = fmt.Errorf("failed to decode data URL: %w", err)
}
return
}
// Handle HTTP URLs with context
var req *http.Request
if req, err = http.NewRequestWithContext(ctx, http.MethodGet, imageURL, nil); err != nil {
return
}
var resp *http.Response
if resp, err = o.httpClient.Do(req); err != nil {
return
}
defer resp.Body.Close()
if resp.StatusCode >= http.StatusBadRequest {
err = fmt.Errorf("failed to fetch image %s: %s", imageURL, resp.Status)
return
}
ret, err = io.ReadAll(resp.Body)
return
}
func (o *Client) NeedsRawMode(modelName string) bool {
ollamaSearchStrings := []string{
"llama3",