mirror of
https://github.com/ollama/ollama.git
synced 2026-01-29 07:12:03 +03:00
The nvidia_fp32 config for (576, 512) head sizes had nbatch_fa=32, which caused zero-sized arrays when computing array dimensions: nbatch_fa / (np * warp_size) = 32 / (2 * 32) = 0 This resulted in CUDA compilation failures on CUDA 12 (Windows and Linux arm64): - "static assertion failed with nbatch_fa % (np*warp_size) != 0" - "the size of an array must be greater than zero" Fix by changing nbatch_fa from 32 to 64 for all (576, 512) configs in the nvidia_fp32 function, matching the nvidia_fp16 and AMD configs.
74 lines
1.3 KiB
Go
74 lines
1.3 KiB
Go
package glm4moelite
|
|
|
|
import (
|
|
"testing"
|
|
|
|
"github.com/ollama/ollama/ml/nn"
|
|
)
|
|
|
|
func TestValidate(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
model *Model
|
|
wantErr bool
|
|
}{
|
|
{
|
|
name: "valid model with KB and VB",
|
|
model: &Model{
|
|
Layers: []Layer{
|
|
{Attention: &Attention{KB: &nn.Linear{}, VB: &nn.Linear{}}},
|
|
},
|
|
},
|
|
wantErr: false,
|
|
},
|
|
{
|
|
name: "missing KB",
|
|
model: &Model{
|
|
Layers: []Layer{
|
|
{Attention: &Attention{VB: &nn.Linear{}}},
|
|
},
|
|
},
|
|
wantErr: true,
|
|
},
|
|
{
|
|
name: "missing VB",
|
|
model: &Model{
|
|
Layers: []Layer{
|
|
{Attention: &Attention{KB: &nn.Linear{}}},
|
|
},
|
|
},
|
|
wantErr: true,
|
|
},
|
|
{
|
|
name: "missing both KB and VB",
|
|
model: &Model{
|
|
Layers: []Layer{
|
|
{Attention: &Attention{}},
|
|
},
|
|
},
|
|
wantErr: true,
|
|
},
|
|
{
|
|
name: "nil Attention is ok",
|
|
model: &Model{
|
|
Layers: []Layer{
|
|
{Attention: nil},
|
|
},
|
|
},
|
|
wantErr: false,
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
err := tt.model.Validate()
|
|
if (err != nil) != tt.wantErr {
|
|
t.Errorf("Validate() error = %v, wantErr %v", err, tt.wantErr)
|
|
}
|
|
if tt.wantErr && err != ErrOldModelFormat {
|
|
t.Errorf("Validate() error = %v, want %v", err, ErrOldModelFormat)
|
|
}
|
|
})
|
|
}
|
|
}
|