mirror of
https://github.com/ollama/ollama.git
synced 2026-01-29 07:12:03 +03:00
x/imagegen: fix image editing support (#13866)
- Fix panic in ollama show for image gen models (safe type assertion) - Add vision capability for Flux2KleinPipeline models at create time - Flatten transparent PNG images onto white background for better results
This commit is contained in:
10
cmd/cmd.go
10
cmd/cmd.go
@@ -1019,8 +1019,10 @@ func showInfo(resp *api.ShowResponse, verbose bool, w io.Writer) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if resp.ModelInfo != nil {
|
if resp.ModelInfo != nil {
|
||||||
arch := resp.ModelInfo["general.architecture"].(string)
|
arch, _ := resp.ModelInfo["general.architecture"].(string)
|
||||||
rows = append(rows, []string{"", "architecture", arch})
|
if arch != "" {
|
||||||
|
rows = append(rows, []string{"", "architecture", arch})
|
||||||
|
}
|
||||||
|
|
||||||
var paramStr string
|
var paramStr string
|
||||||
if resp.Details.ParameterSize != "" {
|
if resp.Details.ParameterSize != "" {
|
||||||
@@ -1030,7 +1032,9 @@ func showInfo(resp *api.ShowResponse, verbose bool, w io.Writer) error {
|
|||||||
paramStr = format.HumanNumber(uint64(f))
|
paramStr = format.HumanNumber(uint64(f))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
rows = append(rows, []string{"", "parameters", paramStr})
|
if paramStr != "" {
|
||||||
|
rows = append(rows, []string{"", "parameters", paramStr})
|
||||||
|
}
|
||||||
|
|
||||||
if v, ok := resp.ModelInfo[fmt.Sprintf("%s.context_length", arch)]; ok {
|
if v, ok := resp.ModelInfo[fmt.Sprintf("%s.context_length", arch)]; ok {
|
||||||
if f, ok := v.(float64); ok {
|
if f, ok := v.(float64); ok {
|
||||||
|
|||||||
@@ -11,6 +11,8 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
"github.com/ollama/ollama/manifest"
|
"github.com/ollama/ollama/manifest"
|
||||||
"github.com/ollama/ollama/progress"
|
"github.com/ollama/ollama/progress"
|
||||||
@@ -209,10 +211,23 @@ func newManifestWriter(opts CreateOptions, capabilities []string) create.Manifes
|
|||||||
return fmt.Errorf("invalid model name: %s", modelName)
|
return fmt.Errorf("invalid model name: %s", modelName)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: find a better way to detect image input support
|
||||||
|
// For now, hardcode Flux2KleinPipeline as supporting vision (image input)
|
||||||
|
caps := capabilities
|
||||||
|
modelIndex := filepath.Join(opts.ModelDir, "model_index.json")
|
||||||
|
if data, err := os.ReadFile(modelIndex); err == nil {
|
||||||
|
var cfg struct {
|
||||||
|
ClassName string `json:"_class_name"`
|
||||||
|
}
|
||||||
|
if json.Unmarshal(data, &cfg) == nil && cfg.ClassName == "Flux2KleinPipeline" {
|
||||||
|
caps = append(caps, "vision")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Create config blob with version requirement
|
// Create config blob with version requirement
|
||||||
configData := model.ConfigV2{
|
configData := model.ConfigV2{
|
||||||
ModelFormat: "safetensors",
|
ModelFormat: "safetensors",
|
||||||
Capabilities: capabilities,
|
Capabilities: caps,
|
||||||
Requires: MinOllamaVersion,
|
Requires: MinOllamaVersion,
|
||||||
}
|
}
|
||||||
configJSON, err := json.Marshal(configData)
|
configJSON, err := json.Marshal(configData)
|
||||||
|
|||||||
@@ -532,8 +532,10 @@ func extractFileData(input string) (string, []api.ImageData, error) {
|
|||||||
var imgs []api.ImageData
|
var imgs []api.ImageData
|
||||||
|
|
||||||
for _, fp := range filePaths {
|
for _, fp := range filePaths {
|
||||||
// Normalize escaped spaces
|
// Normalize shell escapes
|
||||||
nfp := strings.ReplaceAll(fp, "\\ ", " ")
|
nfp := strings.ReplaceAll(fp, "\\ ", " ")
|
||||||
|
nfp = strings.ReplaceAll(nfp, "\\(", "(")
|
||||||
|
nfp = strings.ReplaceAll(nfp, "\\)", ")")
|
||||||
nfp = strings.ReplaceAll(nfp, "%20", " ")
|
nfp = strings.ReplaceAll(nfp, "%20", " ")
|
||||||
|
|
||||||
data, err := getImageData(nfp)
|
data, err := getImageData(nfp)
|
||||||
|
|||||||
@@ -7,6 +7,8 @@ import (
|
|||||||
"encoding/base64"
|
"encoding/base64"
|
||||||
"fmt"
|
"fmt"
|
||||||
"image"
|
"image"
|
||||||
|
"image/color"
|
||||||
|
"image/draw"
|
||||||
_ "image/jpeg"
|
_ "image/jpeg"
|
||||||
"image/png"
|
"image/png"
|
||||||
"os"
|
"os"
|
||||||
@@ -111,6 +113,7 @@ func clampF(v, min, max float32) float32 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// DecodeImage decodes image bytes with EXIF orientation applied.
|
// DecodeImage decodes image bytes with EXIF orientation applied.
|
||||||
|
// Transparent images are composited onto a white background.
|
||||||
func DecodeImage(data []byte) (image.Image, error) {
|
func DecodeImage(data []byte) (image.Image, error) {
|
||||||
orientation := readJPEGOrientation(data)
|
orientation := readJPEGOrientation(data)
|
||||||
|
|
||||||
@@ -119,9 +122,33 @@ func DecodeImage(data []byte) (image.Image, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
img = flattenAlpha(img)
|
||||||
return applyOrientation(img, orientation), nil
|
return applyOrientation(img, orientation), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// flattenAlpha composites an image onto a white background,
|
||||||
|
// removing any transparency. This is needed because image
|
||||||
|
// generation models don't handle alpha channels well.
|
||||||
|
func flattenAlpha(img image.Image) image.Image {
|
||||||
|
if _, ok := img.(*image.RGBA); !ok {
|
||||||
|
if _, ok := img.(*image.NRGBA); !ok {
|
||||||
|
// No alpha channel, return as-is
|
||||||
|
return img
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bounds := img.Bounds()
|
||||||
|
dst := image.NewRGBA(bounds)
|
||||||
|
|
||||||
|
// Fill with white background
|
||||||
|
draw.Draw(dst, bounds, &image.Uniform{color.White}, image.Point{}, draw.Src)
|
||||||
|
|
||||||
|
// Composite the image on top
|
||||||
|
draw.Draw(dst, bounds, img, bounds.Min, draw.Over)
|
||||||
|
|
||||||
|
return dst
|
||||||
|
}
|
||||||
|
|
||||||
// readJPEGOrientation extracts EXIF orientation from JPEG bytes.
|
// readJPEGOrientation extracts EXIF orientation from JPEG bytes.
|
||||||
// Returns 1 (normal) for non-JPEG or if orientation not found.
|
// Returns 1 (normal) for non-JPEG or if orientation not found.
|
||||||
func readJPEGOrientation(data []byte) int {
|
func readJPEGOrientation(data []byte) int {
|
||||||
|
|||||||
Reference in New Issue
Block a user