From 582d93ab22d8f2cf67aaefa0607af3bb01a3534e Mon Sep 17 00:00:00 2001 From: jmorganca Date: Sat, 17 Jan 2026 22:25:31 -0800 Subject: [PATCH] fix: lazy init MLX for quantization and improve library discovery - Add lazy MLX initialization in quantizeTensor to ensure the library is loaded when quantization is requested - Add exe-relative build path search for dev mode on macOS, so the ollama binary can find libmlxc.dylib in build/lib/ollama/ when running from the repo root --- x/create/client/quantize.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/x/create/client/quantize.go b/x/create/client/quantize.go index 5a4be59d0..e217e38b9 100644 --- a/x/create/client/quantize.go +++ b/x/create/client/quantize.go @@ -16,6 +16,11 @@ import ( // Supported quantization types: "fp8" (affine 8-bit) // Uses MLX's native SaveSafetensors to ensure correct dtype handling (especially uint32 for quantized weights). func quantizeTensor(r io.Reader, name, dtype string, shape []int32, quantize string) (qweightData, scalesData, qbiasData []byte, qweightShape, scalesShape, qbiasShape []int32, err error) { + // Lazy init MLX when needed for quantization + if err := mlx.InitMLX(); err != nil { + return nil, nil, nil, nil, nil, nil, fmt.Errorf("MLX initialization failed: %w", err) + } + tmpDir := ensureTempDir() // Read safetensors data to a temp file (LoadSafetensorsNative needs a path)