mirror of
https://github.com/golang/go.git
synced 2026-01-29 07:02:05 +03:00
cmd/compile, simd: capture VAES instructions and fix AVX512VAES feature
The code previously filters out VAES-only instructions, this CL added them back. This CL added the VAES feature check following the Intel xed data: XED_ISA_SET_VAES: vaes.7.0.ecx.9 # avx.1.0.ecx.28 This CL also found out that the old AVX512VAES feature check is not checking the correct bits, it also fixes it: XED_ISA_SET_AVX512_VAES_128: vaes.7.0.ecx.9 aes.1.0.ecx.25 avx512f.7.0.ebx.16 avx512vl.7.0.ebx.31 XED_ISA_SET_AVX512_VAES_256: vaes.7.0.ecx.9 aes.1.0.ecx.25 avx512f.7.0.ebx.16 avx512vl.7.0.ebx.31 XED_ISA_SET_AVX512_VAES_512: vaes.7.0.ecx.9 aes.1.0.ecx.25 avx512f.7.0.ebx.16 It restricts to the most strict common set - includes avx512vl for even 512-bits although it doesn't requires it. Change-Id: I4e2f72b312fd2411589fbc12f9ee5c63c09c2e9a Reviewed-on: https://go-review.googlesource.com/c/go/+/738500 Reviewed-by: Cherry Mui <cherryyz@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
@@ -28,16 +28,16 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||
{name: "VADDSUBPS128", argLength: 2, reg: v21, asm: "VADDSUBPS", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VADDSUBPS256", argLength: 2, reg: v21, asm: "VADDSUBPS", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VAESDEC128", argLength: 2, reg: v21, asm: "VAESDEC", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VAESDEC256", argLength: 2, reg: w21, asm: "VAESDEC", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VAESDEC256", argLength: 2, reg: v21, asm: "VAESDEC", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VAESDEC512", argLength: 2, reg: w21, asm: "VAESDEC", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VAESDECLAST128", argLength: 2, reg: v21, asm: "VAESDECLAST", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VAESDECLAST256", argLength: 2, reg: w21, asm: "VAESDECLAST", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VAESDECLAST256", argLength: 2, reg: v21, asm: "VAESDECLAST", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VAESDECLAST512", argLength: 2, reg: w21, asm: "VAESDECLAST", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VAESENC128", argLength: 2, reg: v21, asm: "VAESENC", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VAESENC256", argLength: 2, reg: w21, asm: "VAESENC", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VAESENC256", argLength: 2, reg: v21, asm: "VAESENC", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VAESENC512", argLength: 2, reg: w21, asm: "VAESENC", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VAESENCLAST128", argLength: 2, reg: v21, asm: "VAESENCLAST", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VAESENCLAST256", argLength: 2, reg: w21, asm: "VAESENCLAST", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VAESENCLAST256", argLength: 2, reg: v21, asm: "VAESENCLAST", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VAESENCLAST512", argLength: 2, reg: w21, asm: "VAESENCLAST", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VAESIMC128", argLength: 1, reg: v11, asm: "VAESIMC", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VBROADCASTSD256", argLength: 1, reg: v11, asm: "VBROADCASTSD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
|
||||
@@ -21198,11 +21198,11 @@ var opcodeTable = [...]opInfo{
|
||||
asm: x86.AVAESDEC,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -21240,11 +21240,11 @@ var opcodeTable = [...]opInfo{
|
||||
asm: x86.AVAESDECLAST,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -21282,11 +21282,11 @@ var opcodeTable = [...]opInfo{
|
||||
asm: x86.AVAESENC,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -21324,11 +21324,11 @@ var opcodeTable = [...]opInfo{
|
||||
asm: x86.AVAESENCLAST,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
@@ -56,6 +56,7 @@ var X86 struct {
|
||||
HasSSSE3 bool
|
||||
HasSSE41 bool
|
||||
HasSSE42 bool
|
||||
HasVAES bool
|
||||
_ CacheLinePad
|
||||
}
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ const (
|
||||
cpuid_AVX512VBMI2 = 1 << 6
|
||||
cpuid_SSSE3 = 1 << 9
|
||||
cpuid_AVX512GFNI = 1 << 8
|
||||
cpuid_AVX512VAES = 1 << 9
|
||||
cpuid_VAES = 1 << 9
|
||||
cpuid_AVX512VNNI = 1 << 11
|
||||
cpuid_AVX512BITALG = 1 << 12
|
||||
cpuid_FMA = 1 << 12
|
||||
@@ -173,6 +173,7 @@ func doinit() {
|
||||
X86.HasERMS = isSet(ebx7, cpuid_ERMS)
|
||||
X86.HasADX = isSet(ebx7, cpuid_ADX)
|
||||
X86.HasSHA = isSet(ebx7, cpuid_SHA)
|
||||
X86.HasVAES = isSet(ecx7, cpuid_VAES) && X86.HasAVX
|
||||
|
||||
X86.HasAVX512F = isSet(ebx7, cpuid_AVX512F) && osSupportsAVX512
|
||||
if X86.HasAVX512F {
|
||||
@@ -185,7 +186,7 @@ func doinit() {
|
||||
X86.HasAVX512VPOPCNTDQ = isSet(ecx7, cpuid_AVX512VPOPCNTDQ)
|
||||
X86.HasAVX512VBMI = isSet(ecx7, cpuid_AVX512VBMI)
|
||||
X86.HasAVX512VBMI2 = isSet(ecx7, cpuid_AVX512VBMI2)
|
||||
X86.HasAVX512VAES = isSet(ecx7, cpuid_AVX512VAES)
|
||||
X86.HasAVX512VAES = isSet(ecx7, cpuid_VAES) && X86.HasAES && isSet(ebx7, cpuid_AVX512VL)
|
||||
X86.HasAVX512VNNI = isSet(ecx7, cpuid_AVX512VNNI)
|
||||
X86.HasAVX512VPCLMULQDQ = isSet(ecx7, cpuid_AVX512VPCLMULQDQ)
|
||||
X86.HasAVX512VBMI = isSet(ecx7, cpuid_AVX512_VBMI)
|
||||
|
||||
@@ -77,7 +77,8 @@ func loadXED(xedPath string) []*unify.Value {
|
||||
switch {
|
||||
case inst.RealOpcode == "N":
|
||||
return // Skip unstable instructions
|
||||
case !(strings.HasPrefix(inst.Extension, "AVX") || strings.HasPrefix(inst.Extension, "SHA") || inst.Extension == "FMA"):
|
||||
case !(strings.HasPrefix(inst.Extension, "AVX") || strings.HasPrefix(inst.Extension, "SHA") ||
|
||||
inst.Extension == "FMA" || inst.Extension == "VAES"):
|
||||
// We're only interested in AVX and SHA instructions.
|
||||
return
|
||||
}
|
||||
@@ -796,6 +797,7 @@ var cpuFeatureMap = map[string]string{
|
||||
"AVXAES": "AVXAES",
|
||||
"SHA": "SHA",
|
||||
"FMA": "FMA",
|
||||
"VAES": "VAES",
|
||||
|
||||
// AVX-512 foundational features. We combine all of these into one "AVX512" feature.
|
||||
"AVX512F": "AVX512",
|
||||
@@ -829,6 +831,7 @@ func init() {
|
||||
|
||||
"AVXAES": {Virtual: true, Implies: []string{"AVX", "AES"}},
|
||||
"FMA": {Implies: []string{"AVX"}},
|
||||
"VAES": {Implies: []string{"AVX"}},
|
||||
|
||||
// AVX-512 subfeatures.
|
||||
"AVX512BITALG": {Implies: []string{"AVX512"}},
|
||||
|
||||
@@ -158,3 +158,13 @@ func (X86Features) FMA() bool {
|
||||
func (X86Features) SHA() bool {
|
||||
return cpu.X86.HasSHA
|
||||
}
|
||||
|
||||
// VAES returns whether the CPU supports the VAES feature.
|
||||
//
|
||||
// If it returns true, then the CPU also supports AVX.
|
||||
//
|
||||
// VAES is defined on all GOARCHes, but will only return true on
|
||||
// GOARCH amd64.
|
||||
func (X86Features) VAES() bool {
|
||||
return cpu.X86.HasVAES
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ func (x Uint8x16) AESDecryptLastRound(y Uint32x4) Uint8x16
|
||||
// y is the chunk of dw array in use.
|
||||
// result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y)
|
||||
//
|
||||
// Asm: VAESDECLAST, CPU Feature: AVX512VAES
|
||||
// Asm: VAESDECLAST, CPU Feature: VAES
|
||||
func (x Uint8x32) AESDecryptLastRound(y Uint32x8) Uint8x32
|
||||
|
||||
// AESDecryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
|
||||
@@ -45,7 +45,7 @@ func (x Uint8x16) AESDecryptOneRound(y Uint32x4) Uint8x16
|
||||
// y is the chunk of dw array in use.
|
||||
// result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y)
|
||||
//
|
||||
// Asm: VAESDEC, CPU Feature: AVX512VAES
|
||||
// Asm: VAESDEC, CPU Feature: VAES
|
||||
func (x Uint8x32) AESDecryptOneRound(y Uint32x8) Uint8x32
|
||||
|
||||
// AESDecryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
|
||||
@@ -71,7 +71,7 @@ func (x Uint8x16) AESEncryptLastRound(y Uint32x4) Uint8x16
|
||||
// y is the chunk of w array in use.
|
||||
// result = AddRoundKey((ShiftRows(SubBytes(x))), y)
|
||||
//
|
||||
// Asm: VAESENCLAST, CPU Feature: AVX512VAES
|
||||
// Asm: VAESENCLAST, CPU Feature: VAES
|
||||
func (x Uint8x32) AESEncryptLastRound(y Uint32x8) Uint8x32
|
||||
|
||||
// AESEncryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
|
||||
@@ -97,7 +97,7 @@ func (x Uint8x16) AESEncryptOneRound(y Uint32x4) Uint8x16
|
||||
// y is the chunk of w array in use.
|
||||
// result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y)
|
||||
//
|
||||
// Asm: VAESENC, CPU Feature: AVX512VAES
|
||||
// Asm: VAESENC, CPU Feature: VAES
|
||||
func (x Uint8x32) AESEncryptOneRound(y Uint32x8) Uint8x32
|
||||
|
||||
// AESEncryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
|
||||
|
||||
Reference in New Issue
Block a user