Compare commits

..

17 Commits

Author SHA1 Message Date
github-actions[bot]
8c9c3135ab chore(release): Update version to v1.4.381 2026-01-17 14:50:18 +00:00
Kayvan Sylvan
42392b0717 Merge pull request #1940 from ksylvan/kayvan/fix-ollama-serve-flow
Rewrite Ollama chat handler to support proper streaming responses
2026-01-17 06:47:56 -08:00
Kayvan Sylvan
2cb2a76200 feat: add support for pattern variables in Ollama API requests
## CHANGES

- Add `Variables` field to `OllamaRequestBody` struct for direct variable passing
- Change `Options` field from empty struct to flexible `map[string]any` type
- Extract variables from top-level `Variables` field or nested `Options.variables`
- Support parsing variables as JSON string or map format
- Pass extracted variables to `PromptRequest` for single message chats
- Pass extracted variables to `PromptRequest` for multi-message chats
- Add `omitempty` JSON tags to optional fields
2026-01-17 06:35:41 -08:00
Kayvan Sylvan
0f466a32bc chore: incoming 1940 changelog entry 2026-01-17 05:35:54 -08:00
Kayvan Sylvan
c7c9d73c01 fix: return string error payloads and fail non-stream empty upstream
## CHANGES
- Serialize JSON error field as `err.Error()` string
- Treat non-stream upstream empty content as 502 error
- Keep streaming mode tolerant when upstream returns no content
2026-01-17 05:34:38 -08:00
Kayvan Sylvan
61e8871396 fix: set NDJSON header only after successful upstream response
## CHANGES
- Move NDJSON `Content-Type` header after status validation
- Avoid setting stream headers on upstream error responses
- Log warning when upstream returns no streamed content
- Keep duration timing consistent across response paths
- Preserve existing streaming and non-streaming response behavior
2026-01-17 05:03:03 -08:00
Kayvan Sylvan
04fef11e17 fix: harden Ollama streaming flush and align metric counters with int64
## CHANGES
- Use int64 for prompt and eval count fields
- Skip sending secondary error message on stream write failure
- Allow non-http schemes and validate host only for address
- Flush response only when writer implements http.Flusher
2026-01-17 04:55:49 -08:00
Kayvan Sylvan
c50b9a61de fix: propagate request context and close Ollama stream on errors
## CHANGES
- Use Gin request context for outbound HTTP calls
- Send final stream chunk when response write fails
- Capture timing duration once for consistent metrics
- Build final Ollama response via shared helper function
- Validate Fabric base URL scheme is http/https only
- Add clarifying documentation comments for URL and writers
2026-01-17 04:21:41 -08:00
Kayvan Sylvan
665267842f fix: align Ollama duration fields to int64 nanosecond precision
## CHANGES
- Use int64 for `load_duration` JSON field values
- Use int64 for `prompt_eval_duration` JSON field values
- Remove lossy int casts when assigning nanosecond durations
- Keep duration payloads consistent with `total_duration` precision
- Prevent potential overflow on long-running request timing
2026-01-17 04:01:26 -08:00
Kayvan Sylvan
e2b63ddc2f fix: improve SSE scan errors and validate bare Fabric address inputs
## CHANGES
- Send detailed SSE stream scan errors in responses
- Detect token-too-long and return clear buffer-limit message
- Unify streaming and JSON error messaging for scan failures
- Validate bare Fabric address using URL parsing
- Reject bare addresses missing host or hostname
- Disallow path components in bare Fabric addresses
- Trim trailing slash from validated Fabric chat URL
- Add tests covering invalid bare addresses with paths
2026-01-17 03:32:07 -08:00
Kayvan Sylvan
97b6b76dd2 fix: reject hostless Fabric chat URLs like https://:8080
## CHANGES
- Validate parsed URL host not start with colon
- Return explicit error for missing hostname in URL
- Update unit test to expect error on port-only host
- Prevent accidental acceptance of malformed `https://:port` addresses
2026-01-17 03:06:17 -08:00
Kayvan Sylvan
f3eed4593f chore: incoming 1940 changelog entry 2026-01-17 02:32:12 -08:00
Kayvan Sylvan
29a32a8439 fix: validate Fabric chat URL host and tidy Ollama responses
## CHANGES
- Set NDJSON content type before checking upstream errors
- Reject parsed URLs that omit a hostname
- Remove hardcoded eval count placeholders from responses
- Add unit tests for Fabric chat URL builder
- Cover colon-port, host:port, and IP address inputs
2026-01-17 02:31:46 -08:00
Kayvan Sylvan
ae6d4d1fb3 fix: handle upstream non-2xx and return stringified error payloads
## CHANGES
- Convert JSON error responses to use err.Error()
- Detect upstream Fabric non-2xx status before scanning
- Read and log upstream error body when possible
- Return upstream status error message for non-stream mode
- Stream error message via NDJSON when streaming enabled
- Set NDJSON Content-Type header before first streaming write
- Remove per-chunk header setting during streaming output
2026-01-17 01:37:20 -08:00
Kayvan Sylvan
8310695e1a fix(ollama): address Copilot review feedback for error handling
Addresses all 8 Copilot review comments on PR #1940:

Critical fixes:
- Replace log.Fatal with proper HTTP error response to prevent
  server crashes on request failures
- Add streaming-aware error handling to maintain consistent
  response format (prevents mixing JSON with NDJSON)

Error messaging improvements:
- Replace "testing endpoint" placeholders with descriptive
  error messages
- Add clear context for unmarshal and scanning failures

Protocol compliance:
- Set Content-Type: application/x-ndjson for streaming responses
- Ensure all error paths respect stream vs non-stream mode

Code cleanup:
- Remove commented-out dead code

Tested both streaming and non-streaming modes successfully.
2026-01-17 01:19:37 -08:00
Kayvan Sylvan
4fa6abf0df chore: incoming 1940 changelog entry 2026-01-17 00:58:40 -08:00
Kayvan Sylvan
e318a939aa refactor: rewrite Ollama chat handler to support proper streaming responses
- Add `json:"-"` tag to exclude UpdateChan from JSON serialization
- Extract URL building logic into dedicated `buildFabricChatURL` helper function
- Replace single-read body parsing with streaming `bufio.Scanner` approach
- Add proper SSE data prefix parsing for fabric response format
- Implement real-time streaming with `writeOllamaResponse` helper function
- Add `writeOllamaResponseStruct` for consistent JSON response writing
- Handle both streaming and non-streaming response modes separately
- Add proper error handling for fabric error response types
- Ensure response body is properly closed with defer statement
2026-01-17 00:52:29 -08:00
7 changed files with 364 additions and 76 deletions

View File

@@ -1,5 +1,15 @@
# Changelog
## v1.4.381 (2026-01-17)
### PR [#1940](https://github.com/danielmiessler/Fabric/pull/1940) by [ksylvan](https://github.com/ksylvan): Rewrite Ollama chat handler to support proper streaming responses
- Refactor Ollama chat handler to support proper streaming responses with real-time SSE data parsing
- Replace single-read body parsing with streaming bufio.Scanner approach and implement writeOllamaResponse helper function
- Add comprehensive error handling improvements including proper HTTP error responses instead of log.Fatal to prevent server crashes
- Fix upstream error handling to return stringified error payloads and validate Fabric chat URL hosts
- Implement proper request context propagation and align duration fields to int64 nanosecond precision for consistency
## v1.4.380 (2026-01-16)
### PR [#1936](https://github.com/danielmiessler/Fabric/pull/1936) by [ksylvan](https://github.com/ksylvan): New Vendor: Microsoft Copilot

View File

@@ -1,3 +1,3 @@
package main
var version = "v1.4.380"
var version = "v1.4.381"

Binary file not shown.

View File

@@ -53,7 +53,7 @@ type ChatOptions struct {
NotificationCommand string
ShowMetadata bool
Quiet bool
UpdateChan chan StreamUpdate
UpdateChan chan StreamUpdate `json:"-"`
}
// NormalizeMessages remove empty messages and ensure messages order user-assist-user

View File

@@ -1,13 +1,14 @@
package restapi
import (
"bufio"
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"net/url"
"strings"
"time"
@@ -43,11 +44,11 @@ type APIConvert struct {
}
type OllamaRequestBody struct {
Messages []OllamaMessage `json:"messages"`
Model string `json:"model"`
Options struct {
} `json:"options"`
Stream bool `json:"stream"`
Messages []OllamaMessage `json:"messages"`
Model string `json:"model"`
Options map[string]any `json:"options,omitempty"`
Stream bool `json:"stream"`
Variables map[string]string `json:"variables,omitempty"` // Fabric-specific: pattern variables (direct)
}
type OllamaMessage struct {
@@ -65,10 +66,10 @@ type OllamaResponse struct {
DoneReason string `json:"done_reason,omitempty"`
Done bool `json:"done"`
TotalDuration int64 `json:"total_duration,omitempty"`
LoadDuration int `json:"load_duration,omitempty"`
PromptEvalCount int `json:"prompt_eval_count,omitempty"`
PromptEvalDuration int `json:"prompt_eval_duration,omitempty"`
EvalCount int `json:"eval_count,omitempty"`
LoadDuration int64 `json:"load_duration,omitempty"`
PromptEvalCount int64 `json:"prompt_eval_count,omitempty"`
PromptEvalDuration int64 `json:"prompt_eval_duration,omitempty"`
EvalCount int64 `json:"eval_count,omitempty"`
EvalDuration int64 `json:"eval_duration,omitempty"`
}
@@ -163,6 +164,29 @@ func (f APIConvert) ollamaChat(c *gin.Context) {
now := time.Now()
var chat ChatRequest
// Extract variables from either top-level Variables field or Options.variables
variables := prompt.Variables
if variables == nil && prompt.Options != nil {
if optVars, ok := prompt.Options["variables"]; ok {
// Options.variables can be either a JSON string or a map
switch v := optVars.(type) {
case string:
// Parse JSON string into map
if err := json.Unmarshal([]byte(v), &variables); err != nil {
log.Printf("Warning: failed to parse options.variables as JSON: %v", err)
}
case map[string]any:
// Convert map[string]any to map[string]string
variables = make(map[string]string)
for k, val := range v {
if s, ok := val.(string); ok {
variables[k] = s
}
}
}
}
}
if len(prompt.Messages) == 1 {
chat.Prompts = []PromptRequest{{
UserInput: prompt.Messages[0].Content,
@@ -170,6 +194,7 @@ func (f APIConvert) ollamaChat(c *gin.Context) {
Model: "",
ContextName: "",
PatternName: strings.Split(prompt.Model, ":")[0],
Variables: variables,
}}
} else if len(prompt.Messages) > 1 {
var content string
@@ -182,89 +207,242 @@ func (f APIConvert) ollamaChat(c *gin.Context) {
Model: "",
ContextName: "",
PatternName: strings.Split(prompt.Model, ":")[0],
Variables: variables,
}}
}
fabricChatReq, err := json.Marshal(chat)
if err != nil {
log.Printf("Error marshalling body: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": err})
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
ctx := context.Background()
var req *http.Request
if strings.Contains(*f.addr, "http") {
req, err = http.NewRequest("POST", fmt.Sprintf("%s/chat", *f.addr), bytes.NewBuffer(fabricChatReq))
} else {
req, err = http.NewRequest("POST", fmt.Sprintf("http://127.0.0.1%s/chat", *f.addr), bytes.NewBuffer(fabricChatReq))
}
baseURL, err := buildFabricChatURL(*f.addr)
if err != nil {
log.Fatal(err)
log.Printf("Error building /chat URL: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
req, err = http.NewRequest("POST", fmt.Sprintf("%s/chat", baseURL), bytes.NewBuffer(fabricChatReq))
if err != nil {
log.Printf("Error creating /chat request: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to create request"})
return
}
req = req.WithContext(ctx)
req = req.WithContext(c.Request.Context())
fabricRes, err := http.DefaultClient.Do(req)
if err != nil {
log.Printf("Error getting /chat body: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": err})
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
body, err = io.ReadAll(fabricRes.Body)
if err != nil {
log.Printf("Error reading body: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "testing endpoint"})
return
}
var forwardedResponse OllamaResponse
var forwardedResponses []OllamaResponse
var fabricResponse FabricResponseFormat
err = json.Unmarshal([]byte(strings.Split(strings.Split(string(body), "\n")[0], "data: ")[1]), &fabricResponse)
if err != nil {
log.Printf("Error unmarshalling body: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "testing endpoint"})
return
}
for word := range strings.SplitSeq(fabricResponse.Content, " ") {
forwardedResponse = OllamaResponse{
Model: "",
CreatedAt: "",
Message: struct {
Role string `json:"role"`
Content string `json:"content"`
}(struct {
Role string
Content string
}{Content: fmt.Sprintf("%s ", word), Role: "assistant"}),
Done: false,
}
forwardedResponses = append(forwardedResponses, forwardedResponse)
}
forwardedResponse.Model = prompt.Model
forwardedResponse.CreatedAt = time.Now().UTC().Format("2006-01-02T15:04:05.999999999Z")
forwardedResponse.Message.Role = "assistant"
forwardedResponse.Message.Content = ""
forwardedResponse.DoneReason = "stop"
forwardedResponse.Done = true
forwardedResponse.TotalDuration = time.Since(now).Nanoseconds()
forwardedResponse.LoadDuration = int(time.Since(now).Nanoseconds())
forwardedResponse.PromptEvalCount = 42
forwardedResponse.PromptEvalDuration = int(time.Since(now).Nanoseconds())
forwardedResponse.EvalCount = 420
forwardedResponse.EvalDuration = time.Since(now).Nanoseconds()
forwardedResponses = append(forwardedResponses, forwardedResponse)
defer fabricRes.Body.Close()
var res []byte
for _, response := range forwardedResponses {
marshalled, err := json.Marshal(response)
if err != nil {
log.Printf("Error marshalling body: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": err})
if fabricRes.StatusCode < http.StatusOK || fabricRes.StatusCode >= http.StatusMultipleChoices {
bodyBytes, readErr := io.ReadAll(fabricRes.Body)
if readErr != nil {
log.Printf("Upstream Fabric server returned non-2xx status %d and body could not be read: %v", fabricRes.StatusCode, readErr)
} else {
log.Printf("Upstream Fabric server returned non-2xx status %d: %s", fabricRes.StatusCode, string(bodyBytes))
}
errorMessage := fmt.Sprintf("upstream Fabric server returned status %d", fabricRes.StatusCode)
if prompt.Stream {
_ = writeOllamaResponse(c, prompt.Model, fmt.Sprintf("Error: %s", errorMessage), true)
} else {
c.JSON(fabricRes.StatusCode, gin.H{"error": errorMessage})
}
return
}
if prompt.Stream {
c.Header("Content-Type", "application/x-ndjson")
}
var contentBuilder strings.Builder
scanner := bufio.NewScanner(fabricRes.Body)
scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
for scanner.Scan() {
line := scanner.Text()
if !strings.HasPrefix(line, "data: ") {
continue
}
payload := strings.TrimPrefix(line, "data: ")
var fabricResponse FabricResponseFormat
if err := json.Unmarshal([]byte(payload), &fabricResponse); err != nil {
log.Printf("Error unmarshalling body: %v", err)
if prompt.Stream {
// In streaming mode, send the error in the same streaming format
_ = writeOllamaResponse(c, prompt.Model, "Error: failed to parse upstream response", true)
} else {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to unmarshal Fabric response"})
}
return
}
res = append(res, marshalled...)
res = append(res, '\n')
if fabricResponse.Type == "error" {
if prompt.Stream {
// In streaming mode, propagate the upstream error via a final streaming chunk
_ = writeOllamaResponse(c, prompt.Model, fmt.Sprintf("Error: %s", fabricResponse.Content), true)
} else {
c.JSON(http.StatusInternalServerError, gin.H{"error": fabricResponse.Content})
}
return
}
if fabricResponse.Type != "content" {
continue
}
contentBuilder.WriteString(fabricResponse.Content)
if prompt.Stream {
if err := writeOllamaResponse(c, prompt.Model, fabricResponse.Content, false); err != nil {
log.Printf("Error writing response: %v", err)
return
}
}
}
if err := scanner.Err(); err != nil {
log.Printf("Error scanning body: %v", err)
errorMsg := fmt.Sprintf("failed to scan SSE response stream: %v", err)
// Check for buffer size exceeded error
if strings.Contains(err.Error(), "token too long") {
errorMsg = "SSE line exceeds 1MB buffer limit - data line too large"
}
if prompt.Stream {
// In streaming mode, send the error in the same streaming format
_ = writeOllamaResponse(c, prompt.Model, fmt.Sprintf("Error: %s", errorMsg), true)
} else {
c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg})
}
return
}
c.Data(200, "application/json", res)
//c.JSON(200, forwardedResponse)
// Capture duration once for consistent timing values
duration := time.Since(now).Nanoseconds()
// Check if we received any content from upstream
if contentBuilder.Len() == 0 {
log.Printf("Warning: no content received from upstream Fabric server")
// In non-streaming mode, treat absence of content as an error
if !prompt.Stream {
c.JSON(http.StatusBadGateway, gin.H{"error": "no content received from upstream Fabric server"})
return
}
}
if !prompt.Stream {
response := buildFinalOllamaResponse(prompt.Model, contentBuilder.String(), duration)
c.JSON(200, response)
return
}
finalResponse := buildFinalOllamaResponse(prompt.Model, "", duration)
if err := writeOllamaResponseStruct(c, finalResponse); err != nil {
log.Printf("Error writing response: %v", err)
}
}
// buildFinalOllamaResponse constructs the final OllamaResponse with timing metrics
// and the complete message content. Used for both streaming and non-streaming final responses.
func buildFinalOllamaResponse(model string, content string, duration int64) OllamaResponse {
return OllamaResponse{
Model: model,
CreatedAt: time.Now().UTC().Format("2006-01-02T15:04:05.999999999Z"),
Message: struct {
Role string `json:"role"`
Content string `json:"content"`
}(struct {
Role string
Content string
}{Content: content, Role: "assistant"}),
DoneReason: "stop",
Done: true,
TotalDuration: duration,
LoadDuration: duration,
PromptEvalDuration: duration,
EvalDuration: duration,
}
}
// buildFabricChatURL constructs a valid HTTP/HTTPS base URL from various address
// formats. It accepts fully-qualified URLs (http:// or https://), :port shorthand
// which is resolved to http://127.0.0.1:port, and bare host[:port] addresses. It
// returns a normalized URL string without a trailing slash, or an error if the
// address is empty, invalid, missing a host/hostname, or (for bare addresses)
// contains a path component.
func buildFabricChatURL(addr string) (string, error) {
if addr == "" {
return "", fmt.Errorf("empty address")
}
if strings.HasPrefix(addr, "http://") || strings.HasPrefix(addr, "https://") {
parsed, err := url.Parse(addr)
if err != nil {
return "", fmt.Errorf("invalid address: %w", err)
}
if parsed.Host == "" {
return "", fmt.Errorf("invalid address: missing host")
}
if strings.HasPrefix(parsed.Host, ":") {
return "", fmt.Errorf("invalid address: missing hostname")
}
return strings.TrimRight(parsed.String(), "/"), nil
}
if strings.HasPrefix(addr, ":") {
return fmt.Sprintf("http://127.0.0.1%s", addr), nil
}
// Validate bare addresses (without http/https prefix)
parsed, err := url.Parse("http://" + addr)
if err != nil {
return "", fmt.Errorf("invalid address: %w", err)
}
if parsed.Host == "" {
return "", fmt.Errorf("invalid address: missing host")
}
if strings.HasPrefix(parsed.Host, ":") {
return "", fmt.Errorf("invalid address: missing hostname")
}
// Bare addresses should be host[:port] only - reject path components
if parsed.Path != "" && parsed.Path != "/" {
return "", fmt.Errorf("invalid address: path component not allowed in bare address")
}
return strings.TrimRight(parsed.String(), "/"), nil
}
// writeOllamaResponse constructs an Ollama-formatted response chunk and writes it
// to the streaming output associated with the provided Gin context. The model
// parameter identifies the model, content is the assistant message text, and
// done indicates whether this is the final chunk in the stream.
func writeOllamaResponse(c *gin.Context, model string, content string, done bool) error {
response := OllamaResponse{
Model: model,
CreatedAt: time.Now().UTC().Format("2006-01-02T15:04:05.999999999Z"),
Message: struct {
Role string `json:"role"`
Content string `json:"content"`
}(struct {
Role string
Content string
}{Content: content, Role: "assistant"}),
Done: done,
}
return writeOllamaResponseStruct(c, response)
}
// writeOllamaResponseStruct marshals the provided OllamaResponse and writes it
// as newline-delimited JSON to the HTTP response stream.
func writeOllamaResponseStruct(c *gin.Context, response OllamaResponse) error {
marshalled, err := json.Marshal(response)
if err != nil {
return err
}
if _, err := c.Writer.Write(marshalled); err != nil {
return err
}
if _, err := c.Writer.Write([]byte("\n")); err != nil {
return err
}
if flusher, ok := c.Writer.(http.Flusher); ok {
flusher.Flush()
}
return nil
}

View File

@@ -0,0 +1,100 @@
package restapi
import (
"testing"
)
func TestBuildFabricChatURL(t *testing.T) {
tests := []struct {
name string
addr string
want string
wantErr bool
}{
{
name: "empty address",
addr: "",
want: "",
wantErr: true,
},
{
name: "valid http URL",
addr: "http://localhost:8080",
want: "http://localhost:8080",
wantErr: false,
},
{
name: "valid https URL",
addr: "https://api.example.com",
want: "https://api.example.com",
wantErr: false,
},
{
name: "http URL with trailing slash",
addr: "http://localhost:8080/",
want: "http://localhost:8080",
wantErr: false,
},
{
name: "malformed URL - missing host",
addr: "http://",
want: "",
wantErr: true,
},
{
name: "malformed URL - port only with http",
addr: "https://:8080",
want: "",
wantErr: true,
},
{
name: "colon-prefixed port",
addr: ":8080",
want: "http://127.0.0.1:8080",
wantErr: false,
},
{
name: "bare host:port",
addr: "localhost:8080",
want: "http://localhost:8080",
wantErr: false,
},
{
name: "bare hostname",
addr: "localhost",
want: "http://localhost",
wantErr: false,
},
{
name: "IP address with port",
addr: "192.168.1.1:3000",
want: "http://192.168.1.1:3000",
wantErr: false,
},
{
name: "bare address with path - invalid",
addr: "localhost:8080/some/path",
want: "",
wantErr: true,
},
{
name: "bare hostname with path - invalid",
addr: "localhost/api",
want: "",
wantErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := buildFabricChatURL(tt.addr)
if (err != nil) != tt.wantErr {
t.Errorf("buildFabricChatURL() error = %v, wantErr %v", err, tt.wantErr)
return
}
if got != tt.want {
t.Errorf("buildFabricChatURL() = %v, want %v", got, tt.want)
}
})
}
}

View File

@@ -1 +1 @@
"1.4.380"
"1.4.381"