mirror of
https://github.com/danielmiessler/Fabric.git
synced 2026-01-09 14:28:01 -05:00
Merge pull request #1855 from ksylvan/kayvan/ollama_image_handling
feat: add image attachment support for Ollama vision models
This commit is contained in:
7
cmd/generate_changelog/incoming/1855.txt
Normal file
7
cmd/generate_changelog/incoming/1855.txt
Normal file
@@ -0,0 +1,7 @@
|
||||
### PR [#1855](https://github.com/danielmiessler/Fabric/pull/1855) by [ksylvan](https://github.com/ksylvan): feat: add image attachment support for Ollama vision models
|
||||
|
||||
- Add multi-modal image support to Ollama client
|
||||
- Implement convertMessage to handle multi-content chat messages
|
||||
- Add loadImageBytes to fetch images from URLs
|
||||
- Support base64 data URLs for inline images
|
||||
- Handle HTTP image URLs with context propagation
|
||||
@@ -2,7 +2,9 @@ package ollama
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
@@ -10,11 +12,10 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/danielmiessler/fabric/internal/chat"
|
||||
ollamaapi "github.com/ollama/ollama/api"
|
||||
"github.com/samber/lo"
|
||||
|
||||
"github.com/danielmiessler/fabric/internal/domain"
|
||||
debuglog "github.com/danielmiessler/fabric/internal/log"
|
||||
"github.com/danielmiessler/fabric/internal/plugins"
|
||||
ollamaapi "github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
const defaultBaseUrl = "http://localhost:11434"
|
||||
@@ -48,6 +49,7 @@ type Client struct {
|
||||
apiUrl *url.URL
|
||||
client *ollamaapi.Client
|
||||
ApiHttpTimeout *plugins.SetupQuestion
|
||||
httpClient *http.Client
|
||||
}
|
||||
|
||||
type transport_sec struct {
|
||||
@@ -84,7 +86,8 @@ func (o *Client) configure() (err error) {
|
||||
}
|
||||
}
|
||||
|
||||
o.client = ollamaapi.NewClient(o.apiUrl, &http.Client{Timeout: timeout, Transport: &transport_sec{underlyingTransport: http.DefaultTransport, ApiKey: o.ApiKey}})
|
||||
o.httpClient = &http.Client{Timeout: timeout, Transport: &transport_sec{underlyingTransport: http.DefaultTransport, ApiKey: o.ApiKey}}
|
||||
o.client = ollamaapi.NewClient(o.apiUrl, o.httpClient)
|
||||
|
||||
return
|
||||
}
|
||||
@@ -104,15 +107,18 @@ func (o *Client) ListModels() (ret []string, err error) {
|
||||
}
|
||||
|
||||
func (o *Client) SendStream(msgs []*chat.ChatCompletionMessage, opts *domain.ChatOptions, channel chan string) (err error) {
|
||||
req := o.createChatRequest(msgs, opts)
|
||||
ctx := context.Background()
|
||||
|
||||
var req ollamaapi.ChatRequest
|
||||
if req, err = o.createChatRequest(ctx, msgs, opts); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
respFunc := func(resp ollamaapi.ChatResponse) (streamErr error) {
|
||||
channel <- resp.Message.Content
|
||||
return
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
if err = o.client.Chat(ctx, &req, respFunc); err != nil {
|
||||
return
|
||||
}
|
||||
@@ -124,7 +130,10 @@ func (o *Client) SendStream(msgs []*chat.ChatCompletionMessage, opts *domain.Cha
|
||||
func (o *Client) Send(ctx context.Context, msgs []*chat.ChatCompletionMessage, opts *domain.ChatOptions) (ret string, err error) {
|
||||
bf := false
|
||||
|
||||
req := o.createChatRequest(msgs, opts)
|
||||
var req ollamaapi.ChatRequest
|
||||
if req, err = o.createChatRequest(ctx, msgs, opts); err != nil {
|
||||
return
|
||||
}
|
||||
req.Stream = &bf
|
||||
|
||||
respFunc := func(resp ollamaapi.ChatResponse) (streamErr error) {
|
||||
@@ -133,15 +142,18 @@ func (o *Client) Send(ctx context.Context, msgs []*chat.ChatCompletionMessage, o
|
||||
}
|
||||
|
||||
if err = o.client.Chat(ctx, &req, respFunc); err != nil {
|
||||
fmt.Printf("FRED --> %s\n", err)
|
||||
debuglog.Debug(debuglog.Basic, "Ollama chat request failed: %v\n", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (o *Client) createChatRequest(msgs []*chat.ChatCompletionMessage, opts *domain.ChatOptions) (ret ollamaapi.ChatRequest) {
|
||||
messages := lo.Map(msgs, func(message *chat.ChatCompletionMessage, _ int) (ret ollamaapi.Message) {
|
||||
return ollamaapi.Message{Role: message.Role, Content: message.Content}
|
||||
})
|
||||
func (o *Client) createChatRequest(ctx context.Context, msgs []*chat.ChatCompletionMessage, opts *domain.ChatOptions) (ret ollamaapi.ChatRequest, err error) {
|
||||
messages := make([]ollamaapi.Message, len(msgs))
|
||||
for i, message := range msgs {
|
||||
if messages[i], err = o.convertMessage(ctx, message); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
options := map[string]interface{}{
|
||||
"temperature": opts.Temperature,
|
||||
@@ -162,6 +174,77 @@ func (o *Client) createChatRequest(msgs []*chat.ChatCompletionMessage, opts *dom
|
||||
return
|
||||
}
|
||||
|
||||
func (o *Client) convertMessage(ctx context.Context, message *chat.ChatCompletionMessage) (ret ollamaapi.Message, err error) {
|
||||
ret = ollamaapi.Message{Role: message.Role, Content: message.Content}
|
||||
|
||||
if len(message.MultiContent) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// Pre-allocate with capacity hint
|
||||
textParts := make([]string, 0, len(message.MultiContent))
|
||||
if strings.TrimSpace(ret.Content) != "" {
|
||||
textParts = append(textParts, strings.TrimSpace(ret.Content))
|
||||
}
|
||||
|
||||
for _, part := range message.MultiContent {
|
||||
switch part.Type {
|
||||
case chat.ChatMessagePartTypeText:
|
||||
if trimmed := strings.TrimSpace(part.Text); trimmed != "" {
|
||||
textParts = append(textParts, trimmed)
|
||||
}
|
||||
case chat.ChatMessagePartTypeImageURL:
|
||||
// Nil guard
|
||||
if part.ImageURL == nil || part.ImageURL.URL == "" {
|
||||
continue
|
||||
}
|
||||
var img []byte
|
||||
if img, err = o.loadImageBytes(ctx, part.ImageURL.URL); err != nil {
|
||||
return
|
||||
}
|
||||
ret.Images = append(ret.Images, ollamaapi.ImageData(img))
|
||||
}
|
||||
}
|
||||
|
||||
ret.Content = strings.Join(textParts, "\n")
|
||||
return
|
||||
}
|
||||
|
||||
func (o *Client) loadImageBytes(ctx context.Context, imageURL string) (ret []byte, err error) {
|
||||
// Handle data URLs (base64 encoded)
|
||||
if strings.HasPrefix(imageURL, "data:") {
|
||||
parts := strings.SplitN(imageURL, ",", 2)
|
||||
if len(parts) != 2 {
|
||||
err = fmt.Errorf("invalid data URL format")
|
||||
return
|
||||
}
|
||||
if ret, err = base64.StdEncoding.DecodeString(parts[1]); err != nil {
|
||||
err = fmt.Errorf("failed to decode data URL: %w", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Handle HTTP URLs with context
|
||||
var req *http.Request
|
||||
if req, err = http.NewRequestWithContext(ctx, http.MethodGet, imageURL, nil); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
var resp *http.Response
|
||||
if resp, err = o.httpClient.Do(req); err != nil {
|
||||
return
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode >= http.StatusBadRequest {
|
||||
err = fmt.Errorf("failed to fetch image %s: %s", imageURL, resp.Status)
|
||||
return
|
||||
}
|
||||
|
||||
ret, err = io.ReadAll(resp.Body)
|
||||
return
|
||||
}
|
||||
|
||||
func (o *Client) NeedsRawMode(modelName string) bool {
|
||||
ollamaSearchStrings := []string{
|
||||
"llama3",
|
||||
|
||||
Reference in New Issue
Block a user