diff --git a/cpu/cpu_x86.go b/cpu/cpu_x86.go index a5b5b5de..4c161315 100644 --- a/cpu/cpu_x86.go +++ b/cpu/cpu_x86.go @@ -64,6 +64,55 @@ func initOptions() { func archInit() { + const ( + // eax bits + cpuid_AVXVNNI = 1 << 4 + + // ecx bits + cpuid_SSE3 = 1 << 0 + cpuid_PCLMULQDQ = 1 << 1 + cpuid_AVX512VBMI = 1 << 1 + cpuid_AVX512VBMI2 = 1 << 6 + cpuid_SSSE3 = 1 << 9 + cpuid_AVX512GFNI = 1 << 8 + cpuid_AVX512VAES = 1 << 9 + cpuid_AVX512VNNI = 1 << 11 + cpuid_AVX512BITALG = 1 << 12 + cpuid_FMA = 1 << 12 + cpuid_AVX512VPOPCNTDQ = 1 << 14 + cpuid_SSE41 = 1 << 19 + cpuid_SSE42 = 1 << 20 + cpuid_POPCNT = 1 << 23 + cpuid_AES = 1 << 25 + cpuid_OSXSAVE = 1 << 27 + cpuid_AVX = 1 << 28 + + // "Extended Feature Flag" bits returned in EBX for CPUID EAX=0x7 ECX=0x0 + cpuid_BMI1 = 1 << 3 + cpuid_AVX2 = 1 << 5 + cpuid_BMI2 = 1 << 8 + cpuid_ERMS = 1 << 9 + cpuid_AVX512F = 1 << 16 + cpuid_AVX512DQ = 1 << 17 + cpuid_ADX = 1 << 19 + cpuid_AVX512CD = 1 << 28 + cpuid_SHA = 1 << 29 + cpuid_AVX512BW = 1 << 30 + cpuid_AVX512VL = 1 << 31 + + // "Extended Feature Flag" bits returned in ECX for CPUID EAX=0x7 ECX=0x0 + cpuid_AVX512_VBMI = 1 << 1 + cpuid_AVX512_VBMI2 = 1 << 6 + cpuid_GFNI = 1 << 8 + cpuid_AVX512VPCLMULQDQ = 1 << 10 + cpuid_AVX512_BITALG = 1 << 12 + + // edx bits + cpuid_FSRM = 1 << 4 + // edx bits for CPUID 0x80000001 + cpuid_RDTSCP = 1 << 27 + ) + Initialized = true maxID, _, _, _ := cpuid(0, 0) @@ -75,16 +124,16 @@ func archInit() { _, _, ecx1, edx1 := cpuid(1, 0) X86.HasSSE2 = isSet(edx1, 1<<26) - X86.HasSSE3 = isSet(ecx1, 1<<0) - X86.HasPCLMULQDQ = isSet(ecx1, 1<<1) - X86.HasSSSE3 = isSet(ecx1, 1<<9) - X86.HasFMA = isSet(ecx1, 1<<12) + X86.HasSSE3 = isSet(ecx1, cpuid_SSE3) + X86.HasPCLMULQDQ = isSet(ecx1, cpuid_PCLMULQDQ) + X86.HasSSSE3 = isSet(ecx1, cpuid_SSSE3) + X86.HasFMA = isSet(ecx1, cpuid_FMA) X86.HasCX16 = isSet(ecx1, 1<<13) - X86.HasSSE41 = isSet(ecx1, 1<<19) - X86.HasSSE42 = isSet(ecx1, 1<<20) - X86.HasPOPCNT = isSet(ecx1, 1<<23) - X86.HasAES = isSet(ecx1, 1<<25) - X86.HasOSXSAVE = isSet(ecx1, 1<<27) + X86.HasSSE41 = isSet(ecx1, cpuid_SSE41) + X86.HasSSE42 = isSet(ecx1, cpuid_SSE42) + X86.HasPOPCNT = isSet(ecx1, cpuid_POPCNT) + X86.HasAES = isSet(ecx1, cpuid_AES) + X86.HasOSXSAVE = isSet(ecx1, cpuid_OSXSAVE) X86.HasRDRAND = isSet(ecx1, 1<<30) var osSupportsAVX, osSupportsAVX512 bool @@ -103,40 +152,40 @@ func archInit() { } } - X86.HasAVX = isSet(ecx1, 1<<28) && osSupportsAVX + X86.HasAVX = isSet(ecx1, cpuid_AVX) && osSupportsAVX if maxID < 7 { return } eax7, ebx7, ecx7, edx7 := cpuid(7, 0) - X86.HasBMI1 = isSet(ebx7, 1<<3) - X86.HasAVX2 = isSet(ebx7, 1<<5) && osSupportsAVX - X86.HasBMI2 = isSet(ebx7, 1<<8) - X86.HasERMS = isSet(ebx7, 1<<9) + X86.HasBMI1 = isSet(ebx7, cpuid_BMI1) + X86.HasAVX2 = isSet(ebx7, cpuid_AVX2) && osSupportsAVX + X86.HasBMI2 = isSet(ebx7, cpuid_BMI2) + X86.HasERMS = isSet(ebx7, cpuid_ERMS) X86.HasRDSEED = isSet(ebx7, 1<<18) - X86.HasADX = isSet(ebx7, 1<<19) + X86.HasADX = isSet(ebx7, cpuid_ADX) - X86.HasAVX512 = isSet(ebx7, 1<<16) && osSupportsAVX512 // Because avx-512 foundation is the core required extension + X86.HasAVX512 = isSet(ebx7, cpuid_AVX512F) && osSupportsAVX512 // Because avx-512 foundation is the core required extension if X86.HasAVX512 { X86.HasAVX512F = true - X86.HasAVX512CD = isSet(ebx7, 1<<28) + X86.HasAVX512CD = isSet(ebx7, cpuid_AVX512CD) X86.HasAVX512ER = isSet(ebx7, 1<<27) X86.HasAVX512PF = isSet(ebx7, 1<<26) - X86.HasAVX512VL = isSet(ebx7, 1<<31) - X86.HasAVX512BW = isSet(ebx7, 1<<30) - X86.HasAVX512DQ = isSet(ebx7, 1<<17) + X86.HasAVX512VL = isSet(ebx7, cpuid_AVX512VL) + X86.HasAVX512BW = isSet(ebx7, cpuid_AVX512BW) + X86.HasAVX512DQ = isSet(ebx7, cpuid_AVX512DQ) X86.HasAVX512IFMA = isSet(ebx7, 1<<21) - X86.HasAVX512VBMI = isSet(ecx7, 1<<1) + X86.HasAVX512VBMI = isSet(ecx7, cpuid_AVX512_VBMI) X86.HasAVX5124VNNIW = isSet(edx7, 1<<2) X86.HasAVX5124FMAPS = isSet(edx7, 1<<3) - X86.HasAVX512VPOPCNTDQ = isSet(ecx7, 1<<14) - X86.HasAVX512VPCLMULQDQ = isSet(ecx7, 1<<10) - X86.HasAVX512VNNI = isSet(ecx7, 1<<11) - X86.HasAVX512GFNI = isSet(ecx7, 1<<8) - X86.HasAVX512VAES = isSet(ecx7, 1<<9) - X86.HasAVX512VBMI2 = isSet(ecx7, 1<<6) - X86.HasAVX512BITALG = isSet(ecx7, 1<<12) + X86.HasAVX512VPOPCNTDQ = isSet(ecx7, cpuid_AVX512VPOPCNTDQ) + X86.HasAVX512VPCLMULQDQ = isSet(ecx7, cpuid_AVX512VPCLMULQDQ) + X86.HasAVX512VNNI = isSet(ecx7, cpuid_AVX512VNNI) + X86.HasAVX512GFNI = isSet(ecx7, cpuid_AVX512GFNI) + X86.HasAVX512VAES = isSet(ecx7, cpuid_AVX512VAES) + X86.HasAVX512VBMI2 = isSet(ecx7, cpuid_AVX512VBMI2) + X86.HasAVX512BITALG = isSet(ecx7, cpuid_AVX512BITALG) } X86.HasAMXTile = isSet(edx7, 1<<24) @@ -151,7 +200,7 @@ func archInit() { } if X86.HasAVX { X86.HasAVXIFMA = isSet(eax71, 1<<23) - X86.HasAVXVNNI = isSet(eax71, 1<<4) + X86.HasAVXVNNI = isSet(eax71, cpuid_AVXVNNI) X86.HasAVXVNNIInt8 = isSet(edx71, 1<<4) } }