mirror of
https://github.com/ollama/ollama.git
synced 2026-01-29 07:12:03 +03:00
fix: lazy init MLX for quantization and improve library discovery
- Add lazy MLX initialization in quantizeTensor to ensure the library is loaded when quantization is requested - Add exe-relative build path search for dev mode on macOS, so the ollama binary can find libmlxc.dylib in build/lib/ollama/ when running from the repo root
This commit is contained in:
@@ -16,6 +16,11 @@ import (
|
||||
// Supported quantization types: "fp8" (affine 8-bit)
|
||||
// Uses MLX's native SaveSafetensors to ensure correct dtype handling (especially uint32 for quantized weights).
|
||||
func quantizeTensor(r io.Reader, name, dtype string, shape []int32, quantize string) (qweightData, scalesData, qbiasData []byte, qweightShape, scalesShape, qbiasShape []int32, err error) {
|
||||
// Lazy init MLX when needed for quantization
|
||||
if err := mlx.InitMLX(); err != nil {
|
||||
return nil, nil, nil, nil, nil, nil, fmt.Errorf("MLX initialization failed: %w", err)
|
||||
}
|
||||
|
||||
tmpDir := ensureTempDir()
|
||||
|
||||
// Read safetensors data to a temp file (LoadSafetensorsNative needs a path)
|
||||
|
||||
Reference in New Issue
Block a user