mirror of
https://github.com/ollama/ollama.git
synced 2026-01-29 07:12:03 +03:00
x/imagegen: fix image editing support (#13866)
- Fix panic in ollama show for image gen models (safe type assertion) - Add vision capability for Flux2KleinPipeline models at create time - Flatten transparent PNG images onto white background for better results
This commit is contained in:
10
cmd/cmd.go
10
cmd/cmd.go
@@ -1019,8 +1019,10 @@ func showInfo(resp *api.ShowResponse, verbose bool, w io.Writer) error {
|
||||
}
|
||||
|
||||
if resp.ModelInfo != nil {
|
||||
arch := resp.ModelInfo["general.architecture"].(string)
|
||||
rows = append(rows, []string{"", "architecture", arch})
|
||||
arch, _ := resp.ModelInfo["general.architecture"].(string)
|
||||
if arch != "" {
|
||||
rows = append(rows, []string{"", "architecture", arch})
|
||||
}
|
||||
|
||||
var paramStr string
|
||||
if resp.Details.ParameterSize != "" {
|
||||
@@ -1030,7 +1032,9 @@ func showInfo(resp *api.ShowResponse, verbose bool, w io.Writer) error {
|
||||
paramStr = format.HumanNumber(uint64(f))
|
||||
}
|
||||
}
|
||||
rows = append(rows, []string{"", "parameters", paramStr})
|
||||
if paramStr != "" {
|
||||
rows = append(rows, []string{"", "parameters", paramStr})
|
||||
}
|
||||
|
||||
if v, ok := resp.ModelInfo[fmt.Sprintf("%s.context_length", arch)]; ok {
|
||||
if f, ok := v.(float64); ok {
|
||||
|
||||
@@ -11,6 +11,8 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/ollama/ollama/manifest"
|
||||
"github.com/ollama/ollama/progress"
|
||||
@@ -209,10 +211,23 @@ func newManifestWriter(opts CreateOptions, capabilities []string) create.Manifes
|
||||
return fmt.Errorf("invalid model name: %s", modelName)
|
||||
}
|
||||
|
||||
// TODO: find a better way to detect image input support
|
||||
// For now, hardcode Flux2KleinPipeline as supporting vision (image input)
|
||||
caps := capabilities
|
||||
modelIndex := filepath.Join(opts.ModelDir, "model_index.json")
|
||||
if data, err := os.ReadFile(modelIndex); err == nil {
|
||||
var cfg struct {
|
||||
ClassName string `json:"_class_name"`
|
||||
}
|
||||
if json.Unmarshal(data, &cfg) == nil && cfg.ClassName == "Flux2KleinPipeline" {
|
||||
caps = append(caps, "vision")
|
||||
}
|
||||
}
|
||||
|
||||
// Create config blob with version requirement
|
||||
configData := model.ConfigV2{
|
||||
ModelFormat: "safetensors",
|
||||
Capabilities: capabilities,
|
||||
Capabilities: caps,
|
||||
Requires: MinOllamaVersion,
|
||||
}
|
||||
configJSON, err := json.Marshal(configData)
|
||||
|
||||
@@ -532,8 +532,10 @@ func extractFileData(input string) (string, []api.ImageData, error) {
|
||||
var imgs []api.ImageData
|
||||
|
||||
for _, fp := range filePaths {
|
||||
// Normalize escaped spaces
|
||||
// Normalize shell escapes
|
||||
nfp := strings.ReplaceAll(fp, "\\ ", " ")
|
||||
nfp = strings.ReplaceAll(nfp, "\\(", "(")
|
||||
nfp = strings.ReplaceAll(nfp, "\\)", ")")
|
||||
nfp = strings.ReplaceAll(nfp, "%20", " ")
|
||||
|
||||
data, err := getImageData(nfp)
|
||||
|
||||
@@ -7,6 +7,8 @@ import (
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"image"
|
||||
"image/color"
|
||||
"image/draw"
|
||||
_ "image/jpeg"
|
||||
"image/png"
|
||||
"os"
|
||||
@@ -111,6 +113,7 @@ func clampF(v, min, max float32) float32 {
|
||||
}
|
||||
|
||||
// DecodeImage decodes image bytes with EXIF orientation applied.
|
||||
// Transparent images are composited onto a white background.
|
||||
func DecodeImage(data []byte) (image.Image, error) {
|
||||
orientation := readJPEGOrientation(data)
|
||||
|
||||
@@ -119,9 +122,33 @@ func DecodeImage(data []byte) (image.Image, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
img = flattenAlpha(img)
|
||||
return applyOrientation(img, orientation), nil
|
||||
}
|
||||
|
||||
// flattenAlpha composites an image onto a white background,
|
||||
// removing any transparency. This is needed because image
|
||||
// generation models don't handle alpha channels well.
|
||||
func flattenAlpha(img image.Image) image.Image {
|
||||
if _, ok := img.(*image.RGBA); !ok {
|
||||
if _, ok := img.(*image.NRGBA); !ok {
|
||||
// No alpha channel, return as-is
|
||||
return img
|
||||
}
|
||||
}
|
||||
|
||||
bounds := img.Bounds()
|
||||
dst := image.NewRGBA(bounds)
|
||||
|
||||
// Fill with white background
|
||||
draw.Draw(dst, bounds, &image.Uniform{color.White}, image.Point{}, draw.Src)
|
||||
|
||||
// Composite the image on top
|
||||
draw.Draw(dst, bounds, img, bounds.Min, draw.Over)
|
||||
|
||||
return dst
|
||||
}
|
||||
|
||||
// readJPEGOrientation extracts EXIF orientation from JPEG bytes.
|
||||
// Returns 1 (normal) for non-JPEG or if orientation not found.
|
||||
func readJPEGOrientation(data []byte) int {
|
||||
|
||||
Reference in New Issue
Block a user