feat: add image generation support with OpenAI image generation model

## CHANGES

- Add `--image-file` flag for saving generated images
- Implement image generation tool integration with OpenAI
- Support image editing with attachment input files
- Add comprehensive test coverage for image features
- Update documentation with image generation examples
- Fix HTML formatting issues in README
- Improve PowerShell code block indentation
- Clean up help text formatting and spacing
This commit is contained in:
Kayvan Sylvan
2025-07-04 14:36:55 -07:00
parent fc8c4babf8
commit 12fc6e2000
7 changed files with 300 additions and 80 deletions

View File

@@ -82,6 +82,9 @@ func (c *Client) formatOptions(opts *common.ChatOptions) string {
builder.WriteString(fmt.Sprintf("SearchLocation: %s\n", opts.SearchLocation))
}
}
if opts.ImageFile != "" {
builder.WriteString(fmt.Sprintf("ImageFile: %s\n", opts.ImageFile))
}
return builder.String()
}

View File

@@ -134,6 +134,12 @@ func (o *Client) sendResponses(ctx context.Context, msgs []*chat.ChatCompletionM
if resp, err = o.ApiClient.Responses.New(ctx, req); err != nil {
return
}
// Extract and save images if requested
if err = o.extractAndSaveImages(resp, opts); err != nil {
return
}
ret = o.extractText(resp)
return
}
@@ -183,6 +189,9 @@ func (o *Client) buildResponseParams(
},
}
// Add tools if enabled
var tools []responses.ToolUnionParam
// Add web search tool if enabled
if opts.Search {
webSearchTool := responses.ToolParamOfWebSearchPreview("web_search_preview")
@@ -195,7 +204,14 @@ func (o *Client) buildResponseParams(
}
}
ret.Tools = []responses.ToolUnionParam{webSearchTool}
tools = append(tools, webSearchTool)
}
// Add image generation tool if needed
tools = o.addImageGenerationTool(opts, tools)
if len(tools) > 0 {
ret.Tools = tools
}
if !opts.Raw {

View File

@@ -0,0 +1,77 @@
package openai
// This file contains helper methods for image generation and processing
// using OpenAI's Responses API and Image API.
import (
"encoding/base64"
"fmt"
"os"
"path/filepath"
"github.com/danielmiessler/fabric/common"
"github.com/openai/openai-go/responses"
)
// addImageGenerationTool adds the image generation tool to the request if needed
func (o *Client) addImageGenerationTool(opts *common.ChatOptions, tools []responses.ToolUnionParam) []responses.ToolUnionParam {
// Check if the request seems to be asking for image generation
if o.shouldUseImageGeneration(opts) {
imageGenTool := responses.ToolUnionParam{
OfImageGeneration: &responses.ToolImageGenerationParam{
Type: "image_generation",
Model: "gpt-image-1",
OutputFormat: "png",
Quality: "auto",
Size: "auto",
},
}
tools = append(tools, imageGenTool)
}
return tools
}
// shouldUseImageGeneration determines if image generation should be enabled
// This is a heuristic based on the presence of --image-file flag
func (o *Client) shouldUseImageGeneration(opts *common.ChatOptions) bool {
return opts.ImageFile != ""
}
// extractAndSaveImages extracts generated images from the response and saves them
func (o *Client) extractAndSaveImages(resp *responses.Response, opts *common.ChatOptions) error {
if opts.ImageFile == "" {
return nil // No image file specified, skip saving
}
// Extract image data from response
for _, item := range resp.Output {
if item.Type == "image_generation_call" {
imageCall := item.AsImageGenerationCall()
if imageCall.Status == "completed" && imageCall.Result != "" {
// Decode base64 image data
imageData, err := base64.StdEncoding.DecodeString(imageCall.Result)
if err != nil {
return fmt.Errorf("failed to decode image data: %w", err)
}
// Ensure directory exists
dir := filepath.Dir(opts.ImageFile)
if dir != "." {
if err := os.MkdirAll(dir, 0755); err != nil {
return fmt.Errorf("failed to create directory %s: %w", dir, err)
}
}
// Save image to file
if err := os.WriteFile(opts.ImageFile, imageData, 0644); err != nil {
return fmt.Errorf("failed to save image to %s: %w", opts.ImageFile, err)
}
fmt.Printf("Image saved to: %s\n", opts.ImageFile)
return nil
}
}
}
return nil
}

View File

@@ -0,0 +1,114 @@
package openai
import (
"testing"
"github.com/danielmiessler/fabric/chat"
"github.com/danielmiessler/fabric/common"
"github.com/openai/openai-go/responses"
"github.com/stretchr/testify/assert"
)
func TestShouldUseImageGeneration(t *testing.T) {
client := NewClient()
// Test with image file specified
opts := &common.ChatOptions{
ImageFile: "output.png",
}
assert.True(t, client.shouldUseImageGeneration(opts), "Should use image generation when image file is specified")
// Test without image file
opts = &common.ChatOptions{
ImageFile: "",
}
assert.False(t, client.shouldUseImageGeneration(opts), "Should not use image generation when no image file is specified")
}
func TestAddImageGenerationTool(t *testing.T) {
client := NewClient()
// Test with image generation enabled
opts := &common.ChatOptions{
ImageFile: "output.png",
}
tools := []responses.ToolUnionParam{}
result := client.addImageGenerationTool(opts, tools)
assert.Len(t, result, 1, "Should add one image generation tool")
assert.NotNil(t, result[0].OfImageGeneration, "Should have image generation tool")
assert.Equal(t, "image_generation", string(result[0].OfImageGeneration.Type))
assert.Equal(t, "gpt-image-1", result[0].OfImageGeneration.Model)
assert.Equal(t, "png", result[0].OfImageGeneration.OutputFormat)
// Test without image generation
opts = &common.ChatOptions{
ImageFile: "",
}
tools = []responses.ToolUnionParam{}
result = client.addImageGenerationTool(opts, tools)
assert.Len(t, result, 0, "Should not add image generation tool when not needed")
}
func TestBuildResponseParams_WithImageGeneration(t *testing.T) {
client := NewClient()
opts := &common.ChatOptions{
Model: "gpt-image-1",
ImageFile: "output.png",
}
msgs := []*chat.ChatCompletionMessage{
{Role: "user", Content: "Generate an image of a cat"},
}
params := client.buildResponseParams(msgs, opts)
assert.NotNil(t, params.Tools, "Expected tools when image generation is enabled")
// Should have image generation tool
hasImageTool := false
for _, tool := range params.Tools {
if tool.OfImageGeneration != nil {
hasImageTool = true
assert.Equal(t, "image_generation", string(tool.OfImageGeneration.Type))
assert.Equal(t, "gpt-image-1", tool.OfImageGeneration.Model)
break
}
}
assert.True(t, hasImageTool, "Should have image generation tool")
}
func TestBuildResponseParams_WithBothSearchAndImage(t *testing.T) {
client := NewClient()
opts := &common.ChatOptions{
Model: "gpt-image-1",
Search: true,
SearchLocation: "America/Los_Angeles",
ImageFile: "output.png",
}
msgs := []*chat.ChatCompletionMessage{
{Role: "user", Content: "Search for cat images and generate one"},
}
params := client.buildResponseParams(msgs, opts)
assert.NotNil(t, params.Tools, "Expected tools when both search and image generation are enabled")
assert.Len(t, params.Tools, 2, "Should have both search and image generation tools")
hasSearchTool := false
hasImageTool := false
for _, tool := range params.Tools {
if tool.OfWebSearchPreview != nil {
hasSearchTool = true
}
if tool.OfImageGeneration != nil {
hasImageTool = true
}
}
assert.True(t, hasSearchTool, "Should have web search tool")
assert.True(t, hasImageTool, "Should have image generation tool")
}