internal/cpu: repair VNNI feature check

This is a pain to test.
Also the original test was never executed, because it was wrong.

It looks like processors that might lack this features
include Intel 11th generation and AMD Zen 4.  These might
or might not have bit 2 set in the 7th cpuid "leaf" (SM4)
which is what the incorrect test was checking; the bug
is triggered by ^VNNI & SM4.  Apparently the SM4 bit is
not usually set, else we would have seen a test failure.

The "Lion Cove" microarchitecture (Arrow Lake, Lunar Lake)
appears to trigger this problem, it's not clear if there are
others.  It was hard to verify this from online information.

Fixes #76881.

Change-Id: I21be6b4f47134d81e89799b0f06f89fcb6563264
Reviewed-on: https://go-review.googlesource.com/c/go/+/731240
TryBot-Bypass: David Chase <drchase@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
David Chase
2025-12-18 13:18:14 -05:00
parent cfc024daeb
commit d00e96d3ae
2 changed files with 12 additions and 8 deletions

View File

@@ -219,7 +219,7 @@ func doinit() {
if eax7 >= 1 {
eax71, _, _, _ := cpuid(7, 1)
if X86.HasAVX {
X86.HasAVXVNNI = isSet(4, eax71)
X86.HasAVXVNNI = isSet(eax71, cpuid_AVXVNNI)
}
}

View File

@@ -1135,18 +1135,22 @@ func TestDotProductQuadruple(t *testing.T) {
wanted2 := make([]int32, 4)
res1 := make([]int32, 4)
res2 := make([]int32, 4)
for i := range 4 {
xd[i] = 5
yd[i] = 6
zd[i] = 3
wanted1[i] = 30
wanted2[i] = 30
for i := range 16 {
xd[i] = int8(i + 112) // 112+15 = 127
yd[i] = uint8(i + 240) // 240+15 = 255
}
for i := range 4 {
i4 := 4 * i
wanted1[i] = int32(xd[i4])*int32(yd[i4]) + int32(xd[i4+1])*int32(yd[i4+1]) + int32(xd[i4+2])*int32(yd[i4+2]) + int32(xd[i4+3])*int32(yd[i4+3])
zd[i] = int32(i + 1)
wanted2[i] = wanted1[i] + zd[i]
}
x := archsimd.LoadInt8x16Slice(xd)
y := archsimd.LoadUint8x16Slice(yd)
z := archsimd.LoadInt32x4Slice(zd)
x.DotProductQuadruple(y).StoreSlice(res1)
x.DotProductQuadruple(y).Add(z).StoreSlice(res1)
x.DotProductQuadruple(y).Add(z).StoreSlice(res2)
for i := range 4 {
if res1[i] != wanted1[i] {
t.Errorf("got %d wanted %d", res1[i], wanted1[i])