diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go index 339ec7fce39..bc9aa22104b 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go @@ -28,16 +28,16 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VADDSUBPS128", argLength: 2, reg: v21, asm: "VADDSUBPS", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VADDSUBPS256", argLength: 2, reg: v21, asm: "VADDSUBPS", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VAESDEC128", argLength: 2, reg: v21, asm: "VAESDEC", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VAESDEC256", argLength: 2, reg: w21, asm: "VAESDEC", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VAESDEC256", argLength: 2, reg: v21, asm: "VAESDEC", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VAESDEC512", argLength: 2, reg: w21, asm: "VAESDEC", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VAESDECLAST128", argLength: 2, reg: v21, asm: "VAESDECLAST", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VAESDECLAST256", argLength: 2, reg: w21, asm: "VAESDECLAST", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VAESDECLAST256", argLength: 2, reg: v21, asm: "VAESDECLAST", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VAESDECLAST512", argLength: 2, reg: w21, asm: "VAESDECLAST", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VAESENC128", argLength: 2, reg: v21, asm: "VAESENC", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VAESENC256", argLength: 2, reg: w21, asm: "VAESENC", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VAESENC256", argLength: 2, reg: v21, asm: "VAESENC", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VAESENC512", argLength: 2, reg: w21, asm: "VAESENC", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VAESENCLAST128", argLength: 2, reg: v21, asm: "VAESENCLAST", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VAESENCLAST256", argLength: 2, reg: w21, asm: "VAESENCLAST", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VAESENCLAST256", argLength: 2, reg: v21, asm: "VAESENCLAST", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VAESENCLAST512", argLength: 2, reg: w21, asm: "VAESENCLAST", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VAESIMC128", argLength: 1, reg: v11, asm: "VAESIMC", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VBROADCASTSD256", argLength: 1, reg: v11, asm: "VBROADCASTSD", commutative: false, typ: "Vec256", resultInArg0: false}, diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index ec82ff45a9d..f039139e4a9 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -21198,11 +21198,11 @@ var opcodeTable = [...]opInfo{ asm: x86.AVAESDEC, reg: regInfo{ inputs: []inputInfo{ - {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 }, outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, }, }, @@ -21240,11 +21240,11 @@ var opcodeTable = [...]opInfo{ asm: x86.AVAESDECLAST, reg: regInfo{ inputs: []inputInfo{ - {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 }, outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, }, }, @@ -21282,11 +21282,11 @@ var opcodeTable = [...]opInfo{ asm: x86.AVAESENC, reg: regInfo{ inputs: []inputInfo{ - {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 }, outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, }, }, @@ -21324,11 +21324,11 @@ var opcodeTable = [...]opInfo{ asm: x86.AVAESENCLAST, reg: regInfo{ inputs: []inputInfo{ - {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 }, outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, }, }, diff --git a/src/internal/cpu/cpu.go b/src/internal/cpu/cpu.go index 4dffeadb228..52459be3bb5 100644 --- a/src/internal/cpu/cpu.go +++ b/src/internal/cpu/cpu.go @@ -56,6 +56,7 @@ var X86 struct { HasSSSE3 bool HasSSE41 bool HasSSE42 bool + HasVAES bool _ CacheLinePad } diff --git a/src/internal/cpu/cpu_x86.go b/src/internal/cpu/cpu_x86.go index 0e97651e3d2..3c0a0adf288 100644 --- a/src/internal/cpu/cpu_x86.go +++ b/src/internal/cpu/cpu_x86.go @@ -28,7 +28,7 @@ const ( cpuid_AVX512VBMI2 = 1 << 6 cpuid_SSSE3 = 1 << 9 cpuid_AVX512GFNI = 1 << 8 - cpuid_AVX512VAES = 1 << 9 + cpuid_VAES = 1 << 9 cpuid_AVX512VNNI = 1 << 11 cpuid_AVX512BITALG = 1 << 12 cpuid_FMA = 1 << 12 @@ -173,6 +173,7 @@ func doinit() { X86.HasERMS = isSet(ebx7, cpuid_ERMS) X86.HasADX = isSet(ebx7, cpuid_ADX) X86.HasSHA = isSet(ebx7, cpuid_SHA) + X86.HasVAES = isSet(ecx7, cpuid_VAES) && X86.HasAVX X86.HasAVX512F = isSet(ebx7, cpuid_AVX512F) && osSupportsAVX512 if X86.HasAVX512F { @@ -185,7 +186,7 @@ func doinit() { X86.HasAVX512VPOPCNTDQ = isSet(ecx7, cpuid_AVX512VPOPCNTDQ) X86.HasAVX512VBMI = isSet(ecx7, cpuid_AVX512VBMI) X86.HasAVX512VBMI2 = isSet(ecx7, cpuid_AVX512VBMI2) - X86.HasAVX512VAES = isSet(ecx7, cpuid_AVX512VAES) + X86.HasAVX512VAES = isSet(ecx7, cpuid_VAES) && X86.HasAES && isSet(ebx7, cpuid_AVX512VL) X86.HasAVX512VNNI = isSet(ecx7, cpuid_AVX512VNNI) X86.HasAVX512VPCLMULQDQ = isSet(ecx7, cpuid_AVX512VPCLMULQDQ) X86.HasAVX512VBMI = isSet(ecx7, cpuid_AVX512_VBMI) diff --git a/src/simd/archsimd/_gen/simdgen/xed.go b/src/simd/archsimd/_gen/simdgen/xed.go index 5d6fac64d0b..2175fa4a8d6 100644 --- a/src/simd/archsimd/_gen/simdgen/xed.go +++ b/src/simd/archsimd/_gen/simdgen/xed.go @@ -77,7 +77,8 @@ func loadXED(xedPath string) []*unify.Value { switch { case inst.RealOpcode == "N": return // Skip unstable instructions - case !(strings.HasPrefix(inst.Extension, "AVX") || strings.HasPrefix(inst.Extension, "SHA") || inst.Extension == "FMA"): + case !(strings.HasPrefix(inst.Extension, "AVX") || strings.HasPrefix(inst.Extension, "SHA") || + inst.Extension == "FMA" || inst.Extension == "VAES"): // We're only interested in AVX and SHA instructions. return } @@ -796,6 +797,7 @@ var cpuFeatureMap = map[string]string{ "AVXAES": "AVXAES", "SHA": "SHA", "FMA": "FMA", + "VAES": "VAES", // AVX-512 foundational features. We combine all of these into one "AVX512" feature. "AVX512F": "AVX512", @@ -829,6 +831,7 @@ func init() { "AVXAES": {Virtual: true, Implies: []string{"AVX", "AES"}}, "FMA": {Implies: []string{"AVX"}}, + "VAES": {Implies: []string{"AVX"}}, // AVX-512 subfeatures. "AVX512BITALG": {Implies: []string{"AVX512"}}, diff --git a/src/simd/archsimd/cpu.go b/src/simd/archsimd/cpu.go index 8069ee7f269..a25556ae659 100644 --- a/src/simd/archsimd/cpu.go +++ b/src/simd/archsimd/cpu.go @@ -158,3 +158,13 @@ func (X86Features) FMA() bool { func (X86Features) SHA() bool { return cpu.X86.HasSHA } + +// VAES returns whether the CPU supports the VAES feature. +// +// If it returns true, then the CPU also supports AVX. +// +// VAES is defined on all GOARCHes, but will only return true on +// GOARCH amd64. +func (X86Features) VAES() bool { + return cpu.X86.HasVAES +} diff --git a/src/simd/archsimd/ops_amd64.go b/src/simd/archsimd/ops_amd64.go index ec50cc72c56..9c5bb22eee9 100644 --- a/src/simd/archsimd/ops_amd64.go +++ b/src/simd/archsimd/ops_amd64.go @@ -19,7 +19,7 @@ func (x Uint8x16) AESDecryptLastRound(y Uint32x4) Uint8x16 // y is the chunk of dw array in use. // result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y) // -// Asm: VAESDECLAST, CPU Feature: AVX512VAES +// Asm: VAESDECLAST, CPU Feature: VAES func (x Uint8x32) AESDecryptLastRound(y Uint32x8) Uint8x32 // AESDecryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197. @@ -45,7 +45,7 @@ func (x Uint8x16) AESDecryptOneRound(y Uint32x4) Uint8x16 // y is the chunk of dw array in use. // result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y) // -// Asm: VAESDEC, CPU Feature: AVX512VAES +// Asm: VAESDEC, CPU Feature: VAES func (x Uint8x32) AESDecryptOneRound(y Uint32x8) Uint8x32 // AESDecryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197. @@ -71,7 +71,7 @@ func (x Uint8x16) AESEncryptLastRound(y Uint32x4) Uint8x16 // y is the chunk of w array in use. // result = AddRoundKey((ShiftRows(SubBytes(x))), y) // -// Asm: VAESENCLAST, CPU Feature: AVX512VAES +// Asm: VAESENCLAST, CPU Feature: VAES func (x Uint8x32) AESEncryptLastRound(y Uint32x8) Uint8x32 // AESEncryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197. @@ -97,7 +97,7 @@ func (x Uint8x16) AESEncryptOneRound(y Uint32x4) Uint8x16 // y is the chunk of w array in use. // result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y) // -// Asm: VAESENC, CPU Feature: AVX512VAES +// Asm: VAESENC, CPU Feature: VAES func (x Uint8x32) AESEncryptOneRound(y Uint32x8) Uint8x32 // AESEncryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.