From 582d93ab22d8f2cf67aaefa0607af3bb01a3534e Mon Sep 17 00:00:00 2001
From: jmorganca <jmorganca@gmail.com>
Date: Sat, 17 Jan 2026 22:25:31 -0800
Subject: [PATCH] fix: lazy init MLX for quantization and improve library
 discovery

- Add lazy MLX initialization in quantizeTensor to ensure the library
  is loaded when quantization is requested
- Add exe-relative build path search for dev mode on macOS, so the
  ollama binary can find libmlxc.dylib in build/lib/ollama/ when
  running from the repo root
---
 x/create/client/quantize.go | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/x/create/client/quantize.go b/x/create/client/quantize.go
index 5a4be59d0..e217e38b9 100644
--- a/x/create/client/quantize.go
+++ b/x/create/client/quantize.go
@@ -16,6 +16,11 @@ import (
 // Supported quantization types: "fp8" (affine 8-bit)
 // Uses MLX's native SaveSafetensors to ensure correct dtype handling (especially uint32 for quantized weights).
 func quantizeTensor(r io.Reader, name, dtype string, shape []int32, quantize string) (qweightData, scalesData, qbiasData []byte, qweightShape, scalesShape, qbiasShape []int32, err error) {
+	// Lazy init MLX when needed for quantization
+	if err := mlx.InitMLX(); err != nil {
+		return nil, nil, nil, nil, nil, nil, fmt.Errorf("MLX initialization failed: %w", err)
+	}
+
 	tmpDir := ensureTempDir()
 
 	// Read safetensors data to a temp file (LoadSafetensorsNative needs a path)