diff --git a/cmd/cmd.go b/cmd/cmd.go index e4ad0366b..257a2d157 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -1019,8 +1019,10 @@ func showInfo(resp *api.ShowResponse, verbose bool, w io.Writer) error { } if resp.ModelInfo != nil { - arch := resp.ModelInfo["general.architecture"].(string) - rows = append(rows, []string{"", "architecture", arch}) + arch, _ := resp.ModelInfo["general.architecture"].(string) + if arch != "" { + rows = append(rows, []string{"", "architecture", arch}) + } var paramStr string if resp.Details.ParameterSize != "" { @@ -1030,7 +1032,9 @@ func showInfo(resp *api.ShowResponse, verbose bool, w io.Writer) error { paramStr = format.HumanNumber(uint64(f)) } } - rows = append(rows, []string{"", "parameters", paramStr}) + if paramStr != "" { + rows = append(rows, []string{"", "parameters", paramStr}) + } if v, ok := resp.ModelInfo[fmt.Sprintf("%s.context_length", arch)]; ok { if f, ok := v.(float64); ok { diff --git a/x/create/client/create.go b/x/create/client/create.go index 93ab481f2..c7e51a525 100644 --- a/x/create/client/create.go +++ b/x/create/client/create.go @@ -11,6 +11,8 @@ import ( "encoding/json" "fmt" "io" + "os" + "path/filepath" "github.com/ollama/ollama/manifest" "github.com/ollama/ollama/progress" @@ -209,10 +211,23 @@ func newManifestWriter(opts CreateOptions, capabilities []string) create.Manifes return fmt.Errorf("invalid model name: %s", modelName) } + // TODO: find a better way to detect image input support + // For now, hardcode Flux2KleinPipeline as supporting vision (image input) + caps := capabilities + modelIndex := filepath.Join(opts.ModelDir, "model_index.json") + if data, err := os.ReadFile(modelIndex); err == nil { + var cfg struct { + ClassName string `json:"_class_name"` + } + if json.Unmarshal(data, &cfg) == nil && cfg.ClassName == "Flux2KleinPipeline" { + caps = append(caps, "vision") + } + } + // Create config blob with version requirement configData := model.ConfigV2{ ModelFormat: "safetensors", - Capabilities: capabilities, + Capabilities: caps, Requires: MinOllamaVersion, } configJSON, err := json.Marshal(configData) diff --git a/x/imagegen/cli.go b/x/imagegen/cli.go index 6c8ea0f54..e5de34efa 100644 --- a/x/imagegen/cli.go +++ b/x/imagegen/cli.go @@ -532,8 +532,10 @@ func extractFileData(input string) (string, []api.ImageData, error) { var imgs []api.ImageData for _, fp := range filePaths { - // Normalize escaped spaces + // Normalize shell escapes nfp := strings.ReplaceAll(fp, "\\ ", " ") + nfp = strings.ReplaceAll(nfp, "\\(", "(") + nfp = strings.ReplaceAll(nfp, "\\)", ")") nfp = strings.ReplaceAll(nfp, "%20", " ") data, err := getImageData(nfp) diff --git a/x/imagegen/image.go b/x/imagegen/image.go index db4d1a4c5..2dca0ee1d 100644 --- a/x/imagegen/image.go +++ b/x/imagegen/image.go @@ -7,6 +7,8 @@ import ( "encoding/base64" "fmt" "image" + "image/color" + "image/draw" _ "image/jpeg" "image/png" "os" @@ -111,6 +113,7 @@ func clampF(v, min, max float32) float32 { } // DecodeImage decodes image bytes with EXIF orientation applied. +// Transparent images are composited onto a white background. func DecodeImage(data []byte) (image.Image, error) { orientation := readJPEGOrientation(data) @@ -119,9 +122,33 @@ func DecodeImage(data []byte) (image.Image, error) { return nil, err } + img = flattenAlpha(img) return applyOrientation(img, orientation), nil } +// flattenAlpha composites an image onto a white background, +// removing any transparency. This is needed because image +// generation models don't handle alpha channels well. +func flattenAlpha(img image.Image) image.Image { + if _, ok := img.(*image.RGBA); !ok { + if _, ok := img.(*image.NRGBA); !ok { + // No alpha channel, return as-is + return img + } + } + + bounds := img.Bounds() + dst := image.NewRGBA(bounds) + + // Fill with white background + draw.Draw(dst, bounds, &image.Uniform{color.White}, image.Point{}, draw.Src) + + // Composite the image on top + draw.Draw(dst, bounds, img, bounds.Min, draw.Over) + + return dst +} + // readJPEGOrientation extracts EXIF orientation from JPEG bytes. // Returns 1 (normal) for non-JPEG or if orientation not found. func readJPEGOrientation(data []byte) int {