mirror of
https://github.com/ollama/ollama.git
synced 2026-01-29 07:12:03 +03:00
x/imagegen: skip eval when no dtype conversion needed
Keep native handles alive for mmap when loading tensors that don't need dtype conversion, avoiding unnecessary evaluation.
This commit is contained in:
@@ -64,15 +64,17 @@ func (mw *ManifestWeights) Load(dtype mlx.Dtype) error {
|
||||
return fmt.Errorf("tensor 'data' not found in blob for %s", name)
|
||||
}
|
||||
|
||||
// Convert dtype if needed
|
||||
// Convert dtype if needed - only eval and free when converting
|
||||
if dtype != 0 && arr.Dtype() != dtype {
|
||||
arr = mlx.AsType(arr, dtype)
|
||||
mlx.Eval(arr)
|
||||
mw.cache[name] = arr
|
||||
sf.Free() // Safe to free - arr is now an independent copy
|
||||
} else {
|
||||
// No conversion needed - keep native handle alive for mmap
|
||||
mw.cache[name] = arr
|
||||
mw.nativeCache = append(mw.nativeCache, sf)
|
||||
}
|
||||
// ALWAYS make a contiguous copy to ensure independence from mmap
|
||||
arr = mlx.Contiguous(arr)
|
||||
mlx.Eval(arr)
|
||||
mw.cache[name] = arr
|
||||
sf.Free() // Safe to free - arr is now an independent copy
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
Reference in New Issue
Block a user