mirror of
https://github.com/golang/go.git
synced 2026-01-29 15:12:08 +03:00
Compare commits
8 Commits
release-br
...
dev.simd
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
096a80ff51 | ||
|
|
d8720e1c29 | ||
|
|
4a702376b7 | ||
|
|
3132179209 | ||
|
|
29842d6b23 | ||
|
|
63d1eec2bb | ||
|
|
636119f3d0 | ||
|
|
c87e344f7a |
@@ -1 +1,2 @@
|
||||
branch: master
|
||||
branch: dev.simd
|
||||
parent-branch: master
|
||||
|
||||
@@ -1959,23 +1959,11 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||
ssa.OpAMD64VPERMI2Q256load,
|
||||
ssa.OpAMD64VPERMI2PD512load,
|
||||
ssa.OpAMD64VPERMI2Q512load,
|
||||
ssa.OpAMD64VFMADD213PS128load,
|
||||
ssa.OpAMD64VFMADD213PS256load,
|
||||
ssa.OpAMD64VFMADD213PS512load,
|
||||
ssa.OpAMD64VFMADD213PD128load,
|
||||
ssa.OpAMD64VFMADD213PD256load,
|
||||
ssa.OpAMD64VFMADD213PD512load,
|
||||
ssa.OpAMD64VFMADDSUB213PS128load,
|
||||
ssa.OpAMD64VFMADDSUB213PS256load,
|
||||
ssa.OpAMD64VFMADDSUB213PS512load,
|
||||
ssa.OpAMD64VFMADDSUB213PD128load,
|
||||
ssa.OpAMD64VFMADDSUB213PD256load,
|
||||
ssa.OpAMD64VFMADDSUB213PD512load,
|
||||
ssa.OpAMD64VFMSUBADD213PS128load,
|
||||
ssa.OpAMD64VFMSUBADD213PS256load,
|
||||
ssa.OpAMD64VFMSUBADD213PS512load,
|
||||
ssa.OpAMD64VFMSUBADD213PD128load,
|
||||
ssa.OpAMD64VFMSUBADD213PD256load,
|
||||
ssa.OpAMD64VFMSUBADD213PD512load,
|
||||
ssa.OpAMD64VPSHLDVD128load,
|
||||
ssa.OpAMD64VPSHLDVD256load,
|
||||
|
||||
@@ -2771,11 +2771,7 @@
|
||||
(VPMULLQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULLQ128load {sym} [off] x ptr mem)
|
||||
(VPMULLQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULLQ256load {sym} [off] x ptr mem)
|
||||
(VPMULLQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULLQ512load {sym} [off] x ptr mem)
|
||||
(VFMADD213PS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PS128load {sym} [off] x y ptr mem)
|
||||
(VFMADD213PS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PS256load {sym} [off] x y ptr mem)
|
||||
(VFMADD213PS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PS512load {sym} [off] x y ptr mem)
|
||||
(VFMADD213PD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PD128load {sym} [off] x y ptr mem)
|
||||
(VFMADD213PD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PD256load {sym} [off] x y ptr mem)
|
||||
(VFMADD213PD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PD512load {sym} [off] x y ptr mem)
|
||||
(VFMADD213PSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PSMasked128load {sym} [off] x y ptr mask mem)
|
||||
(VFMADD213PSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PSMasked256load {sym} [off] x y ptr mask mem)
|
||||
@@ -2783,11 +2779,7 @@
|
||||
(VFMADD213PDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PDMasked128load {sym} [off] x y ptr mask mem)
|
||||
(VFMADD213PDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PDMasked256load {sym} [off] x y ptr mask mem)
|
||||
(VFMADD213PDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PDMasked512load {sym} [off] x y ptr mask mem)
|
||||
(VFMADDSUB213PS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PS128load {sym} [off] x y ptr mem)
|
||||
(VFMADDSUB213PS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PS256load {sym} [off] x y ptr mem)
|
||||
(VFMADDSUB213PS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PS512load {sym} [off] x y ptr mem)
|
||||
(VFMADDSUB213PD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PD128load {sym} [off] x y ptr mem)
|
||||
(VFMADDSUB213PD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PD256load {sym} [off] x y ptr mem)
|
||||
(VFMADDSUB213PD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PD512load {sym} [off] x y ptr mem)
|
||||
(VFMADDSUB213PSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PSMasked128load {sym} [off] x y ptr mask mem)
|
||||
(VFMADDSUB213PSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PSMasked256load {sym} [off] x y ptr mask mem)
|
||||
@@ -2807,11 +2799,7 @@
|
||||
(VPMULLQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMULLQMasked128load {sym} [off] x ptr mask mem)
|
||||
(VPMULLQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMULLQMasked256load {sym} [off] x ptr mask mem)
|
||||
(VPMULLQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMULLQMasked512load {sym} [off] x ptr mask mem)
|
||||
(VFMSUBADD213PS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PS128load {sym} [off] x y ptr mem)
|
||||
(VFMSUBADD213PS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PS256load {sym} [off] x y ptr mem)
|
||||
(VFMSUBADD213PS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PS512load {sym} [off] x y ptr mem)
|
||||
(VFMSUBADD213PD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PD128load {sym} [off] x y ptr mem)
|
||||
(VFMSUBADD213PD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PD256load {sym} [off] x y ptr mem)
|
||||
(VFMSUBADD213PD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PD512load {sym} [off] x y ptr mem)
|
||||
(VFMSUBADD213PSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PSMasked128load {sym} [off] x y ptr mask mem)
|
||||
(VFMSUBADD213PSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PSMasked256load {sym} [off] x y ptr mask mem)
|
||||
|
||||
@@ -172,38 +172,38 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||
{name: "VEXPANDPSMasked128", argLength: 2, reg: wkw, asm: "VEXPANDPS", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VEXPANDPSMasked256", argLength: 2, reg: wkw, asm: "VEXPANDPS", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VEXPANDPSMasked512", argLength: 2, reg: wkw, asm: "VEXPANDPS", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VFMADD213PD128", argLength: 3, reg: w31, asm: "VFMADD213PD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VFMADD213PD256", argLength: 3, reg: w31, asm: "VFMADD213PD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VFMADD213PD128", argLength: 3, reg: v31, asm: "VFMADD213PD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VFMADD213PD256", argLength: 3, reg: v31, asm: "VFMADD213PD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VFMADD213PD512", argLength: 3, reg: w31, asm: "VFMADD213PD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VFMADD213PDMasked128", argLength: 4, reg: w3kw, asm: "VFMADD213PD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VFMADD213PDMasked256", argLength: 4, reg: w3kw, asm: "VFMADD213PD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VFMADD213PDMasked512", argLength: 4, reg: w3kw, asm: "VFMADD213PD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VFMADD213PS128", argLength: 3, reg: w31, asm: "VFMADD213PS", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VFMADD213PS256", argLength: 3, reg: w31, asm: "VFMADD213PS", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VFMADD213PS128", argLength: 3, reg: v31, asm: "VFMADD213PS", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VFMADD213PS256", argLength: 3, reg: v31, asm: "VFMADD213PS", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VFMADD213PS512", argLength: 3, reg: w31, asm: "VFMADD213PS", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VFMADD213PSMasked128", argLength: 4, reg: w3kw, asm: "VFMADD213PS", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VFMADD213PSMasked256", argLength: 4, reg: w3kw, asm: "VFMADD213PS", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VFMADD213PSMasked512", argLength: 4, reg: w3kw, asm: "VFMADD213PS", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VFMADDSUB213PD128", argLength: 3, reg: w31, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VFMADDSUB213PD256", argLength: 3, reg: w31, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VFMADDSUB213PD128", argLength: 3, reg: v31, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VFMADDSUB213PD256", argLength: 3, reg: v31, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VFMADDSUB213PD512", argLength: 3, reg: w31, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VFMADDSUB213PDMasked128", argLength: 4, reg: w3kw, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VFMADDSUB213PDMasked256", argLength: 4, reg: w3kw, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VFMADDSUB213PDMasked512", argLength: 4, reg: w3kw, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VFMADDSUB213PS128", argLength: 3, reg: w31, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VFMADDSUB213PS256", argLength: 3, reg: w31, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VFMADDSUB213PS128", argLength: 3, reg: v31, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VFMADDSUB213PS256", argLength: 3, reg: v31, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VFMADDSUB213PS512", argLength: 3, reg: w31, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VFMADDSUB213PSMasked128", argLength: 4, reg: w3kw, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VFMADDSUB213PSMasked256", argLength: 4, reg: w3kw, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VFMADDSUB213PSMasked512", argLength: 4, reg: w3kw, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VFMSUBADD213PD128", argLength: 3, reg: w31, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VFMSUBADD213PD256", argLength: 3, reg: w31, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VFMSUBADD213PD128", argLength: 3, reg: v31, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VFMSUBADD213PD256", argLength: 3, reg: v31, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VFMSUBADD213PD512", argLength: 3, reg: w31, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VFMSUBADD213PDMasked128", argLength: 4, reg: w3kw, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VFMSUBADD213PDMasked256", argLength: 4, reg: w3kw, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VFMSUBADD213PDMasked512", argLength: 4, reg: w3kw, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VFMSUBADD213PS128", argLength: 3, reg: w31, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VFMSUBADD213PS256", argLength: 3, reg: w31, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VFMSUBADD213PS128", argLength: 3, reg: v31, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VFMSUBADD213PS256", argLength: 3, reg: v31, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
{name: "VFMSUBADD213PS512", argLength: 3, reg: w31, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec512", resultInArg0: true},
|
||||
{name: "VFMSUBADD213PSMasked128", argLength: 4, reg: w3kw, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec128", resultInArg0: true},
|
||||
{name: "VFMSUBADD213PSMasked256", argLength: 4, reg: w3kw, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec256", resultInArg0: true},
|
||||
@@ -1594,38 +1594,26 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||
{name: "VDIVPSMasked128load", argLength: 4, reg: w2kwload, asm: "VDIVPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
||||
{name: "VDIVPSMasked256load", argLength: 4, reg: w2kwload, asm: "VDIVPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
||||
{name: "VDIVPSMasked512load", argLength: 4, reg: w2kwload, asm: "VDIVPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
||||
{name: "VFMADD213PD128load", argLength: 4, reg: w31load, asm: "VFMADD213PD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMADD213PD256load", argLength: 4, reg: w31load, asm: "VFMADD213PD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMADD213PD512load", argLength: 4, reg: w31load, asm: "VFMADD213PD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMADD213PDMasked128load", argLength: 5, reg: w3kwload, asm: "VFMADD213PD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMADD213PDMasked256load", argLength: 5, reg: w3kwload, asm: "VFMADD213PD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMADD213PDMasked512load", argLength: 5, reg: w3kwload, asm: "VFMADD213PD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMADD213PS128load", argLength: 4, reg: w31load, asm: "VFMADD213PS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMADD213PS256load", argLength: 4, reg: w31load, asm: "VFMADD213PS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMADD213PS512load", argLength: 4, reg: w31load, asm: "VFMADD213PS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMADD213PSMasked128load", argLength: 5, reg: w3kwload, asm: "VFMADD213PS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMADD213PSMasked256load", argLength: 5, reg: w3kwload, asm: "VFMADD213PS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMADD213PSMasked512load", argLength: 5, reg: w3kwload, asm: "VFMADD213PS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMADDSUB213PD128load", argLength: 4, reg: w31load, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMADDSUB213PD256load", argLength: 4, reg: w31load, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMADDSUB213PD512load", argLength: 4, reg: w31load, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMADDSUB213PDMasked128load", argLength: 5, reg: w3kwload, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMADDSUB213PDMasked256load", argLength: 5, reg: w3kwload, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMADDSUB213PDMasked512load", argLength: 5, reg: w3kwload, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMADDSUB213PS128load", argLength: 4, reg: w31load, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMADDSUB213PS256load", argLength: 4, reg: w31load, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMADDSUB213PS512load", argLength: 4, reg: w31load, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMADDSUB213PSMasked128load", argLength: 5, reg: w3kwload, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMADDSUB213PSMasked256load", argLength: 5, reg: w3kwload, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMADDSUB213PSMasked512load", argLength: 5, reg: w3kwload, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMSUBADD213PD128load", argLength: 4, reg: w31load, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMSUBADD213PD256load", argLength: 4, reg: w31load, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMSUBADD213PD512load", argLength: 4, reg: w31load, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMSUBADD213PDMasked128load", argLength: 5, reg: w3kwload, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMSUBADD213PDMasked256load", argLength: 5, reg: w3kwload, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMSUBADD213PDMasked512load", argLength: 5, reg: w3kwload, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMSUBADD213PS128load", argLength: 4, reg: w31load, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMSUBADD213PS256load", argLength: 4, reg: w31load, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMSUBADD213PS512load", argLength: 4, reg: w31load, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMSUBADD213PSMasked128load", argLength: 5, reg: w3kwload, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
{name: "VFMSUBADD213PSMasked256load", argLength: 5, reg: w3kwload, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
|
||||
|
||||
@@ -2835,38 +2835,26 @@ const (
|
||||
OpAMD64VDIVPSMasked128load
|
||||
OpAMD64VDIVPSMasked256load
|
||||
OpAMD64VDIVPSMasked512load
|
||||
OpAMD64VFMADD213PD128load
|
||||
OpAMD64VFMADD213PD256load
|
||||
OpAMD64VFMADD213PD512load
|
||||
OpAMD64VFMADD213PDMasked128load
|
||||
OpAMD64VFMADD213PDMasked256load
|
||||
OpAMD64VFMADD213PDMasked512load
|
||||
OpAMD64VFMADD213PS128load
|
||||
OpAMD64VFMADD213PS256load
|
||||
OpAMD64VFMADD213PS512load
|
||||
OpAMD64VFMADD213PSMasked128load
|
||||
OpAMD64VFMADD213PSMasked256load
|
||||
OpAMD64VFMADD213PSMasked512load
|
||||
OpAMD64VFMADDSUB213PD128load
|
||||
OpAMD64VFMADDSUB213PD256load
|
||||
OpAMD64VFMADDSUB213PD512load
|
||||
OpAMD64VFMADDSUB213PDMasked128load
|
||||
OpAMD64VFMADDSUB213PDMasked256load
|
||||
OpAMD64VFMADDSUB213PDMasked512load
|
||||
OpAMD64VFMADDSUB213PS128load
|
||||
OpAMD64VFMADDSUB213PS256load
|
||||
OpAMD64VFMADDSUB213PS512load
|
||||
OpAMD64VFMADDSUB213PSMasked128load
|
||||
OpAMD64VFMADDSUB213PSMasked256load
|
||||
OpAMD64VFMADDSUB213PSMasked512load
|
||||
OpAMD64VFMSUBADD213PD128load
|
||||
OpAMD64VFMSUBADD213PD256load
|
||||
OpAMD64VFMSUBADD213PD512load
|
||||
OpAMD64VFMSUBADD213PDMasked128load
|
||||
OpAMD64VFMSUBADD213PDMasked256load
|
||||
OpAMD64VFMSUBADD213PDMasked512load
|
||||
OpAMD64VFMSUBADD213PS128load
|
||||
OpAMD64VFMSUBADD213PS256load
|
||||
OpAMD64VFMSUBADD213PS512load
|
||||
OpAMD64VFMSUBADD213PSMasked128load
|
||||
OpAMD64VFMSUBADD213PSMasked256load
|
||||
@@ -23095,12 +23083,12 @@ var opcodeTable = [...]opInfo{
|
||||
asm: x86.AVFMADD213PD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
{2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -23111,12 +23099,12 @@ var opcodeTable = [...]opInfo{
|
||||
asm: x86.AVFMADD213PD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
{2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -23194,12 +23182,12 @@ var opcodeTable = [...]opInfo{
|
||||
asm: x86.AVFMADD213PS,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
{2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -23210,12 +23198,12 @@ var opcodeTable = [...]opInfo{
|
||||
asm: x86.AVFMADD213PS,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
{2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -23293,12 +23281,12 @@ var opcodeTable = [...]opInfo{
|
||||
asm: x86.AVFMADDSUB213PD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
{2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -23309,12 +23297,12 @@ var opcodeTable = [...]opInfo{
|
||||
asm: x86.AVFMADDSUB213PD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
{2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -23392,12 +23380,12 @@ var opcodeTable = [...]opInfo{
|
||||
asm: x86.AVFMADDSUB213PS,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
{2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -23408,12 +23396,12 @@ var opcodeTable = [...]opInfo{
|
||||
asm: x86.AVFMADDSUB213PS,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
{2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -23491,12 +23479,12 @@ var opcodeTable = [...]opInfo{
|
||||
asm: x86.AVFMSUBADD213PD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
{2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -23507,12 +23495,12 @@ var opcodeTable = [...]opInfo{
|
||||
asm: x86.AVFMSUBADD213PD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
{2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -23590,12 +23578,12 @@ var opcodeTable = [...]opInfo{
|
||||
asm: x86.AVFMSUBADD213PS,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
{2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -23606,12 +23594,12 @@ var opcodeTable = [...]opInfo{
|
||||
asm: x86.AVFMSUBADD213PS,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
{2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -44109,42 +44097,6 @@ var opcodeTable = [...]opInfo{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VFMADD213PD128load",
|
||||
auxType: auxSymOff,
|
||||
argLen: 4,
|
||||
resultInArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.AVFMADD213PD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VFMADD213PD256load",
|
||||
auxType: auxSymOff,
|
||||
argLen: 4,
|
||||
resultInArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.AVFMADD213PD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VFMADD213PD512load",
|
||||
auxType: auxSymOff,
|
||||
@@ -44220,42 +44172,6 @@ var opcodeTable = [...]opInfo{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VFMADD213PS128load",
|
||||
auxType: auxSymOff,
|
||||
argLen: 4,
|
||||
resultInArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.AVFMADD213PS,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VFMADD213PS256load",
|
||||
auxType: auxSymOff,
|
||||
argLen: 4,
|
||||
resultInArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.AVFMADD213PS,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VFMADD213PS512load",
|
||||
auxType: auxSymOff,
|
||||
@@ -44331,42 +44247,6 @@ var opcodeTable = [...]opInfo{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VFMADDSUB213PD128load",
|
||||
auxType: auxSymOff,
|
||||
argLen: 4,
|
||||
resultInArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.AVFMADDSUB213PD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VFMADDSUB213PD256load",
|
||||
auxType: auxSymOff,
|
||||
argLen: 4,
|
||||
resultInArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.AVFMADDSUB213PD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VFMADDSUB213PD512load",
|
||||
auxType: auxSymOff,
|
||||
@@ -44442,42 +44322,6 @@ var opcodeTable = [...]opInfo{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VFMADDSUB213PS128load",
|
||||
auxType: auxSymOff,
|
||||
argLen: 4,
|
||||
resultInArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.AVFMADDSUB213PS,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VFMADDSUB213PS256load",
|
||||
auxType: auxSymOff,
|
||||
argLen: 4,
|
||||
resultInArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.AVFMADDSUB213PS,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VFMADDSUB213PS512load",
|
||||
auxType: auxSymOff,
|
||||
@@ -44553,42 +44397,6 @@ var opcodeTable = [...]opInfo{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VFMSUBADD213PD128load",
|
||||
auxType: auxSymOff,
|
||||
argLen: 4,
|
||||
resultInArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.AVFMSUBADD213PD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VFMSUBADD213PD256load",
|
||||
auxType: auxSymOff,
|
||||
argLen: 4,
|
||||
resultInArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.AVFMSUBADD213PD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VFMSUBADD213PD512load",
|
||||
auxType: auxSymOff,
|
||||
@@ -44664,42 +44472,6 @@ var opcodeTable = [...]opInfo{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VFMSUBADD213PS128load",
|
||||
auxType: auxSymOff,
|
||||
argLen: 4,
|
||||
resultInArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.AVFMSUBADD213PS,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VFMSUBADD213PS256load",
|
||||
auxType: auxSymOff,
|
||||
argLen: 4,
|
||||
resultInArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: x86.AVFMSUBADD213PS,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
{1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VFMSUBADD213PS512load",
|
||||
auxType: auxSymOff,
|
||||
|
||||
@@ -782,10 +782,6 @@ func rewriteValueAMD64(v *Value) bool {
|
||||
return rewriteValueAMD64_OpAMD64VDIVPSMasked256(v)
|
||||
case OpAMD64VDIVPSMasked512:
|
||||
return rewriteValueAMD64_OpAMD64VDIVPSMasked512(v)
|
||||
case OpAMD64VFMADD213PD128:
|
||||
return rewriteValueAMD64_OpAMD64VFMADD213PD128(v)
|
||||
case OpAMD64VFMADD213PD256:
|
||||
return rewriteValueAMD64_OpAMD64VFMADD213PD256(v)
|
||||
case OpAMD64VFMADD213PD512:
|
||||
return rewriteValueAMD64_OpAMD64VFMADD213PD512(v)
|
||||
case OpAMD64VFMADD213PDMasked128:
|
||||
@@ -794,10 +790,6 @@ func rewriteValueAMD64(v *Value) bool {
|
||||
return rewriteValueAMD64_OpAMD64VFMADD213PDMasked256(v)
|
||||
case OpAMD64VFMADD213PDMasked512:
|
||||
return rewriteValueAMD64_OpAMD64VFMADD213PDMasked512(v)
|
||||
case OpAMD64VFMADD213PS128:
|
||||
return rewriteValueAMD64_OpAMD64VFMADD213PS128(v)
|
||||
case OpAMD64VFMADD213PS256:
|
||||
return rewriteValueAMD64_OpAMD64VFMADD213PS256(v)
|
||||
case OpAMD64VFMADD213PS512:
|
||||
return rewriteValueAMD64_OpAMD64VFMADD213PS512(v)
|
||||
case OpAMD64VFMADD213PSMasked128:
|
||||
@@ -806,10 +798,6 @@ func rewriteValueAMD64(v *Value) bool {
|
||||
return rewriteValueAMD64_OpAMD64VFMADD213PSMasked256(v)
|
||||
case OpAMD64VFMADD213PSMasked512:
|
||||
return rewriteValueAMD64_OpAMD64VFMADD213PSMasked512(v)
|
||||
case OpAMD64VFMADDSUB213PD128:
|
||||
return rewriteValueAMD64_OpAMD64VFMADDSUB213PD128(v)
|
||||
case OpAMD64VFMADDSUB213PD256:
|
||||
return rewriteValueAMD64_OpAMD64VFMADDSUB213PD256(v)
|
||||
case OpAMD64VFMADDSUB213PD512:
|
||||
return rewriteValueAMD64_OpAMD64VFMADDSUB213PD512(v)
|
||||
case OpAMD64VFMADDSUB213PDMasked128:
|
||||
@@ -818,10 +806,6 @@ func rewriteValueAMD64(v *Value) bool {
|
||||
return rewriteValueAMD64_OpAMD64VFMADDSUB213PDMasked256(v)
|
||||
case OpAMD64VFMADDSUB213PDMasked512:
|
||||
return rewriteValueAMD64_OpAMD64VFMADDSUB213PDMasked512(v)
|
||||
case OpAMD64VFMADDSUB213PS128:
|
||||
return rewriteValueAMD64_OpAMD64VFMADDSUB213PS128(v)
|
||||
case OpAMD64VFMADDSUB213PS256:
|
||||
return rewriteValueAMD64_OpAMD64VFMADDSUB213PS256(v)
|
||||
case OpAMD64VFMADDSUB213PS512:
|
||||
return rewriteValueAMD64_OpAMD64VFMADDSUB213PS512(v)
|
||||
case OpAMD64VFMADDSUB213PSMasked128:
|
||||
@@ -830,10 +814,6 @@ func rewriteValueAMD64(v *Value) bool {
|
||||
return rewriteValueAMD64_OpAMD64VFMADDSUB213PSMasked256(v)
|
||||
case OpAMD64VFMADDSUB213PSMasked512:
|
||||
return rewriteValueAMD64_OpAMD64VFMADDSUB213PSMasked512(v)
|
||||
case OpAMD64VFMSUBADD213PD128:
|
||||
return rewriteValueAMD64_OpAMD64VFMSUBADD213PD128(v)
|
||||
case OpAMD64VFMSUBADD213PD256:
|
||||
return rewriteValueAMD64_OpAMD64VFMSUBADD213PD256(v)
|
||||
case OpAMD64VFMSUBADD213PD512:
|
||||
return rewriteValueAMD64_OpAMD64VFMSUBADD213PD512(v)
|
||||
case OpAMD64VFMSUBADD213PDMasked128:
|
||||
@@ -842,10 +822,6 @@ func rewriteValueAMD64(v *Value) bool {
|
||||
return rewriteValueAMD64_OpAMD64VFMSUBADD213PDMasked256(v)
|
||||
case OpAMD64VFMSUBADD213PDMasked512:
|
||||
return rewriteValueAMD64_OpAMD64VFMSUBADD213PDMasked512(v)
|
||||
case OpAMD64VFMSUBADD213PS128:
|
||||
return rewriteValueAMD64_OpAMD64VFMSUBADD213PS128(v)
|
||||
case OpAMD64VFMSUBADD213PS256:
|
||||
return rewriteValueAMD64_OpAMD64VFMSUBADD213PS256(v)
|
||||
case OpAMD64VFMSUBADD213PS512:
|
||||
return rewriteValueAMD64_OpAMD64VFMSUBADD213PS512(v)
|
||||
case OpAMD64VFMSUBADD213PSMasked128:
|
||||
@@ -31486,64 +31462,6 @@ func rewriteValueAMD64_OpAMD64VDIVPSMasked512(v *Value) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VFMADD213PD128(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VFMADD213PD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem))
|
||||
// cond: canMergeLoad(v, l) && clobber(l)
|
||||
// result: (VFMADD213PD128load {sym} [off] x y ptr mem)
|
||||
for {
|
||||
x := v_0
|
||||
y := v_1
|
||||
l := v_2
|
||||
if l.Op != OpAMD64VMOVDQUload128 {
|
||||
break
|
||||
}
|
||||
off := auxIntToInt32(l.AuxInt)
|
||||
sym := auxToSym(l.Aux)
|
||||
mem := l.Args[1]
|
||||
ptr := l.Args[0]
|
||||
if !(canMergeLoad(v, l) && clobber(l)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VFMADD213PD128load)
|
||||
v.AuxInt = int32ToAuxInt(off)
|
||||
v.Aux = symToAux(sym)
|
||||
v.AddArg4(x, y, ptr, mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VFMADD213PD256(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VFMADD213PD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem))
|
||||
// cond: canMergeLoad(v, l) && clobber(l)
|
||||
// result: (VFMADD213PD256load {sym} [off] x y ptr mem)
|
||||
for {
|
||||
x := v_0
|
||||
y := v_1
|
||||
l := v_2
|
||||
if l.Op != OpAMD64VMOVDQUload256 {
|
||||
break
|
||||
}
|
||||
off := auxIntToInt32(l.AuxInt)
|
||||
sym := auxToSym(l.Aux)
|
||||
mem := l.Args[1]
|
||||
ptr := l.Args[0]
|
||||
if !(canMergeLoad(v, l) && clobber(l)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VFMADD213PD256load)
|
||||
v.AuxInt = int32ToAuxInt(off)
|
||||
v.Aux = symToAux(sym)
|
||||
v.AddArg4(x, y, ptr, mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VFMADD213PD512(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
@@ -31666,64 +31584,6 @@ func rewriteValueAMD64_OpAMD64VFMADD213PDMasked512(v *Value) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VFMADD213PS128(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VFMADD213PS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem))
|
||||
// cond: canMergeLoad(v, l) && clobber(l)
|
||||
// result: (VFMADD213PS128load {sym} [off] x y ptr mem)
|
||||
for {
|
||||
x := v_0
|
||||
y := v_1
|
||||
l := v_2
|
||||
if l.Op != OpAMD64VMOVDQUload128 {
|
||||
break
|
||||
}
|
||||
off := auxIntToInt32(l.AuxInt)
|
||||
sym := auxToSym(l.Aux)
|
||||
mem := l.Args[1]
|
||||
ptr := l.Args[0]
|
||||
if !(canMergeLoad(v, l) && clobber(l)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VFMADD213PS128load)
|
||||
v.AuxInt = int32ToAuxInt(off)
|
||||
v.Aux = symToAux(sym)
|
||||
v.AddArg4(x, y, ptr, mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VFMADD213PS256(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VFMADD213PS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem))
|
||||
// cond: canMergeLoad(v, l) && clobber(l)
|
||||
// result: (VFMADD213PS256load {sym} [off] x y ptr mem)
|
||||
for {
|
||||
x := v_0
|
||||
y := v_1
|
||||
l := v_2
|
||||
if l.Op != OpAMD64VMOVDQUload256 {
|
||||
break
|
||||
}
|
||||
off := auxIntToInt32(l.AuxInt)
|
||||
sym := auxToSym(l.Aux)
|
||||
mem := l.Args[1]
|
||||
ptr := l.Args[0]
|
||||
if !(canMergeLoad(v, l) && clobber(l)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VFMADD213PS256load)
|
||||
v.AuxInt = int32ToAuxInt(off)
|
||||
v.Aux = symToAux(sym)
|
||||
v.AddArg4(x, y, ptr, mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VFMADD213PS512(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
@@ -31846,64 +31706,6 @@ func rewriteValueAMD64_OpAMD64VFMADD213PSMasked512(v *Value) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VFMADDSUB213PD128(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VFMADDSUB213PD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem))
|
||||
// cond: canMergeLoad(v, l) && clobber(l)
|
||||
// result: (VFMADDSUB213PD128load {sym} [off] x y ptr mem)
|
||||
for {
|
||||
x := v_0
|
||||
y := v_1
|
||||
l := v_2
|
||||
if l.Op != OpAMD64VMOVDQUload128 {
|
||||
break
|
||||
}
|
||||
off := auxIntToInt32(l.AuxInt)
|
||||
sym := auxToSym(l.Aux)
|
||||
mem := l.Args[1]
|
||||
ptr := l.Args[0]
|
||||
if !(canMergeLoad(v, l) && clobber(l)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VFMADDSUB213PD128load)
|
||||
v.AuxInt = int32ToAuxInt(off)
|
||||
v.Aux = symToAux(sym)
|
||||
v.AddArg4(x, y, ptr, mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VFMADDSUB213PD256(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VFMADDSUB213PD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem))
|
||||
// cond: canMergeLoad(v, l) && clobber(l)
|
||||
// result: (VFMADDSUB213PD256load {sym} [off] x y ptr mem)
|
||||
for {
|
||||
x := v_0
|
||||
y := v_1
|
||||
l := v_2
|
||||
if l.Op != OpAMD64VMOVDQUload256 {
|
||||
break
|
||||
}
|
||||
off := auxIntToInt32(l.AuxInt)
|
||||
sym := auxToSym(l.Aux)
|
||||
mem := l.Args[1]
|
||||
ptr := l.Args[0]
|
||||
if !(canMergeLoad(v, l) && clobber(l)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VFMADDSUB213PD256load)
|
||||
v.AuxInt = int32ToAuxInt(off)
|
||||
v.Aux = symToAux(sym)
|
||||
v.AddArg4(x, y, ptr, mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VFMADDSUB213PD512(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
@@ -32026,64 +31828,6 @@ func rewriteValueAMD64_OpAMD64VFMADDSUB213PDMasked512(v *Value) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VFMADDSUB213PS128(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VFMADDSUB213PS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem))
|
||||
// cond: canMergeLoad(v, l) && clobber(l)
|
||||
// result: (VFMADDSUB213PS128load {sym} [off] x y ptr mem)
|
||||
for {
|
||||
x := v_0
|
||||
y := v_1
|
||||
l := v_2
|
||||
if l.Op != OpAMD64VMOVDQUload128 {
|
||||
break
|
||||
}
|
||||
off := auxIntToInt32(l.AuxInt)
|
||||
sym := auxToSym(l.Aux)
|
||||
mem := l.Args[1]
|
||||
ptr := l.Args[0]
|
||||
if !(canMergeLoad(v, l) && clobber(l)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VFMADDSUB213PS128load)
|
||||
v.AuxInt = int32ToAuxInt(off)
|
||||
v.Aux = symToAux(sym)
|
||||
v.AddArg4(x, y, ptr, mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VFMADDSUB213PS256(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VFMADDSUB213PS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem))
|
||||
// cond: canMergeLoad(v, l) && clobber(l)
|
||||
// result: (VFMADDSUB213PS256load {sym} [off] x y ptr mem)
|
||||
for {
|
||||
x := v_0
|
||||
y := v_1
|
||||
l := v_2
|
||||
if l.Op != OpAMD64VMOVDQUload256 {
|
||||
break
|
||||
}
|
||||
off := auxIntToInt32(l.AuxInt)
|
||||
sym := auxToSym(l.Aux)
|
||||
mem := l.Args[1]
|
||||
ptr := l.Args[0]
|
||||
if !(canMergeLoad(v, l) && clobber(l)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VFMADDSUB213PS256load)
|
||||
v.AuxInt = int32ToAuxInt(off)
|
||||
v.Aux = symToAux(sym)
|
||||
v.AddArg4(x, y, ptr, mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VFMADDSUB213PS512(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
@@ -32206,64 +31950,6 @@ func rewriteValueAMD64_OpAMD64VFMADDSUB213PSMasked512(v *Value) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VFMSUBADD213PD128(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VFMSUBADD213PD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem))
|
||||
// cond: canMergeLoad(v, l) && clobber(l)
|
||||
// result: (VFMSUBADD213PD128load {sym} [off] x y ptr mem)
|
||||
for {
|
||||
x := v_0
|
||||
y := v_1
|
||||
l := v_2
|
||||
if l.Op != OpAMD64VMOVDQUload128 {
|
||||
break
|
||||
}
|
||||
off := auxIntToInt32(l.AuxInt)
|
||||
sym := auxToSym(l.Aux)
|
||||
mem := l.Args[1]
|
||||
ptr := l.Args[0]
|
||||
if !(canMergeLoad(v, l) && clobber(l)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VFMSUBADD213PD128load)
|
||||
v.AuxInt = int32ToAuxInt(off)
|
||||
v.Aux = symToAux(sym)
|
||||
v.AddArg4(x, y, ptr, mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VFMSUBADD213PD256(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VFMSUBADD213PD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem))
|
||||
// cond: canMergeLoad(v, l) && clobber(l)
|
||||
// result: (VFMSUBADD213PD256load {sym} [off] x y ptr mem)
|
||||
for {
|
||||
x := v_0
|
||||
y := v_1
|
||||
l := v_2
|
||||
if l.Op != OpAMD64VMOVDQUload256 {
|
||||
break
|
||||
}
|
||||
off := auxIntToInt32(l.AuxInt)
|
||||
sym := auxToSym(l.Aux)
|
||||
mem := l.Args[1]
|
||||
ptr := l.Args[0]
|
||||
if !(canMergeLoad(v, l) && clobber(l)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VFMSUBADD213PD256load)
|
||||
v.AuxInt = int32ToAuxInt(off)
|
||||
v.Aux = symToAux(sym)
|
||||
v.AddArg4(x, y, ptr, mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VFMSUBADD213PD512(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
@@ -32386,64 +32072,6 @@ func rewriteValueAMD64_OpAMD64VFMSUBADD213PDMasked512(v *Value) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VFMSUBADD213PS128(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VFMSUBADD213PS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem))
|
||||
// cond: canMergeLoad(v, l) && clobber(l)
|
||||
// result: (VFMSUBADD213PS128load {sym} [off] x y ptr mem)
|
||||
for {
|
||||
x := v_0
|
||||
y := v_1
|
||||
l := v_2
|
||||
if l.Op != OpAMD64VMOVDQUload128 {
|
||||
break
|
||||
}
|
||||
off := auxIntToInt32(l.AuxInt)
|
||||
sym := auxToSym(l.Aux)
|
||||
mem := l.Args[1]
|
||||
ptr := l.Args[0]
|
||||
if !(canMergeLoad(v, l) && clobber(l)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VFMSUBADD213PS128load)
|
||||
v.AuxInt = int32ToAuxInt(off)
|
||||
v.Aux = symToAux(sym)
|
||||
v.AddArg4(x, y, ptr, mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VFMSUBADD213PS256(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VFMSUBADD213PS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem))
|
||||
// cond: canMergeLoad(v, l) && clobber(l)
|
||||
// result: (VFMSUBADD213PS256load {sym} [off] x y ptr mem)
|
||||
for {
|
||||
x := v_0
|
||||
y := v_1
|
||||
l := v_2
|
||||
if l.Op != OpAMD64VMOVDQUload256 {
|
||||
break
|
||||
}
|
||||
off := auxIntToInt32(l.AuxInt)
|
||||
sym := auxToSym(l.Aux)
|
||||
mem := l.Args[1]
|
||||
ptr := l.Args[0]
|
||||
if !(canMergeLoad(v, l) && clobber(l)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VFMSUBADD213PS256load)
|
||||
v.AuxInt = int32ToAuxInt(off)
|
||||
v.Aux = symToAux(sym)
|
||||
v.AddArg4(x, y, ptr, mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VFMSUBADD213PS512(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
|
||||
@@ -84,6 +84,7 @@ func ParseGOEXPERIMENT(goos, goarch, goexp string) (*ExperimentFlags, error) {
|
||||
RegabiWrappers: regabiSupported,
|
||||
RegabiArgs: regabiSupported,
|
||||
Dwarf5: dwarf5Supported,
|
||||
SIMD: goarch == "amd64", // TODO: remove this (default to false) when dev.simd is merged
|
||||
RandomizedHeapBase64: true,
|
||||
SizeSpecializedMalloc: true,
|
||||
GreenTeaGC: true,
|
||||
|
||||
@@ -136,12 +136,6 @@ func doinit() {
|
||||
// e.g. setting the xsavedisable boot option on Windows 10.
|
||||
X86.HasOSXSAVE = isSet(ecx1, cpuid_OSXSAVE)
|
||||
|
||||
// The FMA instruction set extension only has VEX prefixed instructions.
|
||||
// VEX prefixed instructions require OSXSAVE to be enabled.
|
||||
// See Intel 64 and IA-32 Architecture Software Developer’s Manual Volume 2
|
||||
// Section 2.4 "AVX and SSE Instruction Exception Specification"
|
||||
X86.HasFMA = isSet(ecx1, cpuid_FMA) && X86.HasOSXSAVE
|
||||
|
||||
osSupportsAVX := false
|
||||
osSupportsAVX512 := false
|
||||
// For XGETBV, OSXSAVE bit is required and sufficient.
|
||||
@@ -159,6 +153,14 @@ func doinit() {
|
||||
|
||||
X86.HasAVX = isSet(ecx1, cpuid_AVX) && osSupportsAVX
|
||||
|
||||
// The FMA instruction set extension requires both the FMA and AVX flags.
|
||||
//
|
||||
// Furthermore, the FMA instructions are all VEX prefixed instructions.
|
||||
// VEX prefixed instructions require OSXSAVE to be enabled.
|
||||
// See Intel 64 and IA-32 Architecture Software Developer’s Manual Volume 2
|
||||
// Section 2.4 "AVX and SSE Instruction Exception Specification"
|
||||
X86.HasFMA = isSet(ecx1, cpuid_FMA) && X86.HasAVX && X86.HasOSXSAVE
|
||||
|
||||
if maxID < 7 {
|
||||
osInit()
|
||||
return
|
||||
|
||||
@@ -162,6 +162,7 @@ type X86Features struct {}
|
||||
var X86 X86Features
|
||||
|
||||
{{range .}}
|
||||
{{$f := .}}
|
||||
{{- if eq .Feature "AVX512"}}
|
||||
// {{.Feature}} returns whether the CPU supports the AVX512F+CD+BW+DQ+VL features.
|
||||
//
|
||||
@@ -172,11 +173,19 @@ var X86 X86Features
|
||||
{{- else -}}
|
||||
// {{.Feature}} returns whether the CPU supports the {{.Feature}} feature.
|
||||
{{- end}}
|
||||
{{- if ne .ImpliesAll ""}}
|
||||
//
|
||||
// If it returns true, then the CPU also supports {{.ImpliesAll}}.
|
||||
{{- end}}
|
||||
//
|
||||
// {{.Feature}} is defined on all GOARCHes, but will only return true on
|
||||
// GOARCH {{.GoArch}}.
|
||||
func (X86Features) {{.Feature}}() bool {
|
||||
return cpu.X86.Has{{.Feature}}
|
||||
func ({{.FeatureVar}}Features) {{.Feature}}() bool {
|
||||
{{- if .Virtual}}
|
||||
return {{range $i, $dep := .Implies}}{{if $i}} && {{end}}cpu.{{$f.FeatureVar}}.Has{{$dep}}{{end}}
|
||||
{{- else}}
|
||||
return cpu.{{.FeatureVar}}.Has{{.Feature}}
|
||||
{{- end}}
|
||||
}
|
||||
{{end}}
|
||||
`
|
||||
@@ -564,6 +573,65 @@ func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer {
|
||||
return buffer
|
||||
}
|
||||
|
||||
type goarchFeatures struct {
|
||||
// featureVar is the name of the exported feature-check variable for this
|
||||
// architecture.
|
||||
featureVar string
|
||||
|
||||
// features records per-feature information.
|
||||
features map[string]featureInfo
|
||||
}
|
||||
|
||||
type featureInfo struct {
|
||||
// Implies is a list of other CPU features that are required for this
|
||||
// feature. These are allowed to chain.
|
||||
//
|
||||
// For example, if the Frob feature lists "Baz", then if X.Frob() returns
|
||||
// true, it must also be true that the CPU has feature Baz.
|
||||
Implies []string
|
||||
|
||||
// Virtual means this feature is not represented directly in internal/cpu,
|
||||
// but is instead the logical AND of the features in Implies.
|
||||
Virtual bool
|
||||
}
|
||||
|
||||
// goarchFeatureInfo maps from GOARCH to CPU feature to additional information
|
||||
// about that feature. Not all features need to be in this map.
|
||||
var goarchFeatureInfo = make(map[string]goarchFeatures)
|
||||
|
||||
func registerFeatureInfo(goArch string, features goarchFeatures) {
|
||||
goarchFeatureInfo[goArch] = features
|
||||
}
|
||||
|
||||
func featureImplies(goarch string, base string) string {
|
||||
// Compute the transitive closure of base.
|
||||
var list []string
|
||||
var visit func(f string)
|
||||
visit = func(f string) {
|
||||
list = append(list, f)
|
||||
for _, dep := range goarchFeatureInfo[goarch].features[f].Implies {
|
||||
visit(dep)
|
||||
}
|
||||
}
|
||||
visit(base)
|
||||
// Drop base
|
||||
list = list[1:]
|
||||
// Put in "nice" order
|
||||
slices.Reverse(list)
|
||||
// Combine into a comment-ready form
|
||||
switch len(list) {
|
||||
case 0:
|
||||
return ""
|
||||
case 1:
|
||||
return list[0]
|
||||
case 2:
|
||||
return list[0] + " and " + list[1]
|
||||
default:
|
||||
list[len(list)-1] = "and " + list[len(list)-1]
|
||||
return strings.Join(list, ", ")
|
||||
}
|
||||
}
|
||||
|
||||
func writeSIMDFeatures(ops []Operation) *bytes.Buffer {
|
||||
// Gather all features
|
||||
type featureKey struct {
|
||||
@@ -579,13 +647,36 @@ func writeSIMDFeatures(ops []Operation) *bytes.Buffer {
|
||||
featureSet[featureKey{op.GoArch, feature}] = struct{}{}
|
||||
}
|
||||
}
|
||||
features := slices.SortedFunc(maps.Keys(featureSet), func(a, b featureKey) int {
|
||||
featureKeys := slices.SortedFunc(maps.Keys(featureSet), func(a, b featureKey) int {
|
||||
if c := cmp.Compare(a.GoArch, b.GoArch); c != 0 {
|
||||
return c
|
||||
}
|
||||
return compareNatural(a.Feature, b.Feature)
|
||||
})
|
||||
|
||||
// TODO: internal/cpu doesn't enforce these at all. You can even do
|
||||
// GODEBUG=cpu.avx=off and it will happily turn off AVX without turning off
|
||||
// AVX2. We need to push these dependencies into it somehow.
|
||||
type feature struct {
|
||||
featureKey
|
||||
FeatureVar string
|
||||
Virtual bool
|
||||
Implies []string
|
||||
ImpliesAll string
|
||||
}
|
||||
var features []feature
|
||||
for _, k := range featureKeys {
|
||||
featureVar := goarchFeatureInfo[k.GoArch].featureVar
|
||||
fi := goarchFeatureInfo[k.GoArch].features[k.Feature]
|
||||
features = append(features, feature{
|
||||
featureKey: k,
|
||||
FeatureVar: featureVar,
|
||||
Virtual: fi.Virtual,
|
||||
Implies: fi.Implies,
|
||||
ImpliesAll: featureImplies(k.GoArch, k.Feature),
|
||||
})
|
||||
}
|
||||
|
||||
// If we ever have the same feature name on more than one GOARCH, we'll have
|
||||
// to be more careful about this.
|
||||
t := templateOf(simdFeaturesTemplate, "features")
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"fmt"
|
||||
"log"
|
||||
"maps"
|
||||
@@ -78,7 +77,7 @@ func loadXED(xedPath string) []*unify.Value {
|
||||
switch {
|
||||
case inst.RealOpcode == "N":
|
||||
return // Skip unstable instructions
|
||||
case !(strings.HasPrefix(inst.Extension, "AVX") || strings.HasPrefix(inst.Extension, "SHA")):
|
||||
case !(strings.HasPrefix(inst.Extension, "AVX") || strings.HasPrefix(inst.Extension, "SHA") || inst.Extension == "FMA"):
|
||||
// We're only interested in AVX and SHA instructions.
|
||||
return
|
||||
}
|
||||
@@ -210,16 +209,9 @@ func loadXED(xedPath string) []*unify.Value {
|
||||
}
|
||||
log.Printf("%d unhandled CPU features for %d instructions (use -v for details)", len(unknownFeatures), nInst)
|
||||
} else {
|
||||
keys := slices.SortedFunc(maps.Keys(unknownFeatures), func(a, b cpuFeatureKey) int {
|
||||
return cmp.Or(cmp.Compare(a.Extension, b.Extension),
|
||||
cmp.Compare(a.ISASet, b.ISASet))
|
||||
})
|
||||
keys := slices.Sorted(maps.Keys(unknownFeatures))
|
||||
for _, key := range keys {
|
||||
if key.ISASet == "" || key.ISASet == key.Extension {
|
||||
log.Printf("unhandled Extension %s", key.Extension)
|
||||
} else {
|
||||
log.Printf("unhandled Extension %s and ISASet %s", key.Extension, key.ISASet)
|
||||
}
|
||||
log.Printf("unhandled ISASet %s", key)
|
||||
log.Printf(" opcodes: %s", slices.Sorted(maps.Keys(unknownFeatures[key])))
|
||||
}
|
||||
}
|
||||
@@ -763,16 +755,24 @@ func instToUVal1(inst *xeddata.Inst, ops []operand, feature string, variant inst
|
||||
// decodeCPUFeature returns the CPU feature name required by inst. These match
|
||||
// the names of the "Has*" feature checks in the simd package.
|
||||
func decodeCPUFeature(inst *xeddata.Inst) (string, bool) {
|
||||
key := cpuFeatureKey{
|
||||
Extension: inst.Extension,
|
||||
ISASet: isaSetStrip.ReplaceAllLiteralString(inst.ISASet, ""),
|
||||
isaSet := inst.ISASet
|
||||
if isaSet == "" {
|
||||
// Older instructions don't have an ISA set. Use their "extension"
|
||||
// instead.
|
||||
isaSet = inst.Extension
|
||||
}
|
||||
feat, ok := cpuFeatureMap[key]
|
||||
// We require AVX512VL to use AVX512 at all, so strip off the vector length
|
||||
// suffixes.
|
||||
if strings.HasPrefix(isaSet, "AVX512") {
|
||||
isaSet = isaSetVL.ReplaceAllLiteralString(isaSet, "")
|
||||
}
|
||||
|
||||
feat, ok := cpuFeatureMap[isaSet]
|
||||
if !ok {
|
||||
imap := unknownFeatures[key]
|
||||
imap := unknownFeatures[isaSet]
|
||||
if imap == nil {
|
||||
imap = make(map[string]struct{})
|
||||
unknownFeatures[key] = imap
|
||||
unknownFeatures[isaSet] = imap
|
||||
}
|
||||
imap[inst.Opcode()] = struct{}{}
|
||||
return "", false
|
||||
@@ -783,45 +783,76 @@ func decodeCPUFeature(inst *xeddata.Inst) (string, bool) {
|
||||
return feat, true
|
||||
}
|
||||
|
||||
var isaSetStrip = regexp.MustCompile("_(128N?|256N?|512)$")
|
||||
var isaSetVL = regexp.MustCompile("_(128N?|256N?|512)$")
|
||||
|
||||
type cpuFeatureKey struct {
|
||||
Extension, ISASet string
|
||||
}
|
||||
|
||||
// cpuFeatureMap maps from XED's "EXTENSION" and "ISA_SET" to a CPU feature name
|
||||
// that can be used in the SIMD API.
|
||||
var cpuFeatureMap = map[cpuFeatureKey]string{
|
||||
{"SHA", "SHA"}: "SHA",
|
||||
|
||||
{"AVX", ""}: "AVX",
|
||||
{"AVX_VNNI", "AVX_VNNI"}: "AVXVNNI",
|
||||
{"AVX2", ""}: "AVX2",
|
||||
{"AVXAES", ""}: "AVX, AES",
|
||||
// cpuFeatureMap maps from XED's "ISA_SET" (or "EXTENSION") to a CPU feature
|
||||
// name to expose in the SIMD feature check API.
|
||||
//
|
||||
// See XED's datafiles/*/cpuid.xed.txt for how ISA set names map to CPUID flags.
|
||||
var cpuFeatureMap = map[string]string{
|
||||
"AVX": "AVX",
|
||||
"AVX_VNNI": "AVXVNNI",
|
||||
"AVX2": "AVX2",
|
||||
"AVXAES": "AVXAES",
|
||||
"SHA": "SHA",
|
||||
"FMA": "FMA",
|
||||
|
||||
// AVX-512 foundational features. We combine all of these into one "AVX512" feature.
|
||||
{"AVX512EVEX", "AVX512F"}: "AVX512",
|
||||
{"AVX512EVEX", "AVX512CD"}: "AVX512",
|
||||
{"AVX512EVEX", "AVX512BW"}: "AVX512",
|
||||
{"AVX512EVEX", "AVX512DQ"}: "AVX512",
|
||||
// AVX512VL doesn't appear explicitly in the ISASet. I guess it's implied by
|
||||
// the vector length suffix.
|
||||
"AVX512F": "AVX512",
|
||||
"AVX512BW": "AVX512",
|
||||
"AVX512CD": "AVX512",
|
||||
"AVX512DQ": "AVX512",
|
||||
// AVX512VL doesn't appear as its own ISASet; instead, the CPUID flag is
|
||||
// required by the *_128 and *_256 ISASets. We fold it into "AVX512" anyway.
|
||||
|
||||
// AVX-512 extension features
|
||||
{"AVX512EVEX", "AVX512_BITALG"}: "AVX512BITALG",
|
||||
{"AVX512EVEX", "AVX512_GFNI"}: "AVX512GFNI",
|
||||
{"AVX512EVEX", "AVX512_VBMI2"}: "AVX512VBMI2",
|
||||
{"AVX512EVEX", "AVX512_VBMI"}: "AVX512VBMI",
|
||||
{"AVX512EVEX", "AVX512_VNNI"}: "AVX512VNNI",
|
||||
{"AVX512EVEX", "AVX512_VPOPCNTDQ"}: "AVX512VPOPCNTDQ",
|
||||
{"AVX512EVEX", "AVX512_VAES"}: "AVX512VAES",
|
||||
{"AVX512EVEX", "AVX512_VPCLMULQDQ"}: "AVX512VPCLMULQDQ",
|
||||
"AVX512_BITALG": "AVX512BITALG",
|
||||
"AVX512_GFNI": "AVX512GFNI",
|
||||
"AVX512_VBMI": "AVX512VBMI",
|
||||
"AVX512_VBMI2": "AVX512VBMI2",
|
||||
"AVX512_VNNI": "AVX512VNNI",
|
||||
"AVX512_VPOPCNTDQ": "AVX512VPOPCNTDQ",
|
||||
"AVX512_VAES": "AVX512VAES",
|
||||
"AVX512_VPCLMULQDQ": "AVX512VPCLMULQDQ",
|
||||
|
||||
// AVX 10.2 (not yet supported)
|
||||
{"AVX512EVEX", "AVX10_2_RC"}: "ignore",
|
||||
"AVX10_2_RC": "ignore",
|
||||
}
|
||||
|
||||
var unknownFeatures = map[cpuFeatureKey]map[string]struct{}{}
|
||||
func init() {
|
||||
// TODO: In general, Intel doesn't make any guarantees about what flags are
|
||||
// set, so this means our feature checks need to ensure these, just to be
|
||||
// sure.
|
||||
var features = map[string]featureInfo{
|
||||
"AVX2": {Implies: []string{"AVX"}},
|
||||
"AVX512": {Implies: []string{"AVX2"}},
|
||||
|
||||
"AVXAES": {Virtual: true, Implies: []string{"AVX", "AES"}},
|
||||
"FMA": {Implies: []string{"AVX"}},
|
||||
|
||||
// AVX-512 subfeatures.
|
||||
"AVX512BITALG": {Implies: []string{"AVX512"}},
|
||||
"AVX512GFNI": {Implies: []string{"AVX512"}},
|
||||
"AVX512VBMI": {Implies: []string{"AVX512"}},
|
||||
"AVX512VBMI2": {Implies: []string{"AVX512"}},
|
||||
"AVX512VNNI": {Implies: []string{"AVX512"}},
|
||||
"AVX512VPOPCNTDQ": {Implies: []string{"AVX512"}},
|
||||
"AVX512VAES": {Implies: []string{"AVX512"}},
|
||||
|
||||
// AVX-VNNI and AVX-IFMA are "backports" of the AVX512-VNNI/IFMA
|
||||
// instructions to VEX encoding, limited to 256 bit vectors. They're
|
||||
// intended for lower end CPUs that want to support VNNI/IFMA without
|
||||
// supporting AVX-512. As such, they're built on AVX2's VEX encoding.
|
||||
"AVXVNNI": {Implies: []string{"AVX2"}},
|
||||
"AVXIFMA": {Implies: []string{"AVX2"}},
|
||||
}
|
||||
registerFeatureInfo("amd64", goarchFeatures{
|
||||
featureVar: "X86",
|
||||
features: features,
|
||||
})
|
||||
}
|
||||
|
||||
var unknownFeatures = map[string]map[string]struct{}{}
|
||||
|
||||
// hasOptionalMask returns whether there is an optional mask operand in ops.
|
||||
func hasOptionalMask(ops []operand) bool {
|
||||
|
||||
@@ -10,14 +10,6 @@ type X86Features struct{}
|
||||
|
||||
var X86 X86Features
|
||||
|
||||
// AES returns whether the CPU supports the AES feature.
|
||||
//
|
||||
// AES is defined on all GOARCHes, but will only return true on
|
||||
// GOARCH amd64.
|
||||
func (X86Features) AES() bool {
|
||||
return cpu.X86.HasAES
|
||||
}
|
||||
|
||||
// AVX returns whether the CPU supports the AVX feature.
|
||||
//
|
||||
// AVX is defined on all GOARCHes, but will only return true on
|
||||
@@ -28,6 +20,8 @@ func (X86Features) AVX() bool {
|
||||
|
||||
// AVX2 returns whether the CPU supports the AVX2 feature.
|
||||
//
|
||||
// If it returns true, then the CPU also supports AVX.
|
||||
//
|
||||
// AVX2 is defined on all GOARCHes, but will only return true on
|
||||
// GOARCH amd64.
|
||||
func (X86Features) AVX2() bool {
|
||||
@@ -41,6 +35,8 @@ func (X86Features) AVX2() bool {
|
||||
// Nearly every CPU that has shipped with any support for AVX-512 has
|
||||
// supported all five of these features.
|
||||
//
|
||||
// If it returns true, then the CPU also supports AVX and AVX2.
|
||||
//
|
||||
// AVX512 is defined on all GOARCHes, but will only return true on
|
||||
// GOARCH amd64.
|
||||
func (X86Features) AVX512() bool {
|
||||
@@ -49,6 +45,8 @@ func (X86Features) AVX512() bool {
|
||||
|
||||
// AVX512BITALG returns whether the CPU supports the AVX512BITALG feature.
|
||||
//
|
||||
// If it returns true, then the CPU also supports AVX, AVX2, and AVX512.
|
||||
//
|
||||
// AVX512BITALG is defined on all GOARCHes, but will only return true on
|
||||
// GOARCH amd64.
|
||||
func (X86Features) AVX512BITALG() bool {
|
||||
@@ -57,6 +55,8 @@ func (X86Features) AVX512BITALG() bool {
|
||||
|
||||
// AVX512GFNI returns whether the CPU supports the AVX512GFNI feature.
|
||||
//
|
||||
// If it returns true, then the CPU also supports AVX, AVX2, and AVX512.
|
||||
//
|
||||
// AVX512GFNI is defined on all GOARCHes, but will only return true on
|
||||
// GOARCH amd64.
|
||||
func (X86Features) AVX512GFNI() bool {
|
||||
@@ -65,6 +65,8 @@ func (X86Features) AVX512GFNI() bool {
|
||||
|
||||
// AVX512VAES returns whether the CPU supports the AVX512VAES feature.
|
||||
//
|
||||
// If it returns true, then the CPU also supports AVX, AVX2, and AVX512.
|
||||
//
|
||||
// AVX512VAES is defined on all GOARCHes, but will only return true on
|
||||
// GOARCH amd64.
|
||||
func (X86Features) AVX512VAES() bool {
|
||||
@@ -73,6 +75,8 @@ func (X86Features) AVX512VAES() bool {
|
||||
|
||||
// AVX512VBMI returns whether the CPU supports the AVX512VBMI feature.
|
||||
//
|
||||
// If it returns true, then the CPU also supports AVX, AVX2, and AVX512.
|
||||
//
|
||||
// AVX512VBMI is defined on all GOARCHes, but will only return true on
|
||||
// GOARCH amd64.
|
||||
func (X86Features) AVX512VBMI() bool {
|
||||
@@ -81,6 +85,8 @@ func (X86Features) AVX512VBMI() bool {
|
||||
|
||||
// AVX512VBMI2 returns whether the CPU supports the AVX512VBMI2 feature.
|
||||
//
|
||||
// If it returns true, then the CPU also supports AVX, AVX2, and AVX512.
|
||||
//
|
||||
// AVX512VBMI2 is defined on all GOARCHes, but will only return true on
|
||||
// GOARCH amd64.
|
||||
func (X86Features) AVX512VBMI2() bool {
|
||||
@@ -89,6 +95,8 @@ func (X86Features) AVX512VBMI2() bool {
|
||||
|
||||
// AVX512VNNI returns whether the CPU supports the AVX512VNNI feature.
|
||||
//
|
||||
// If it returns true, then the CPU also supports AVX, AVX2, and AVX512.
|
||||
//
|
||||
// AVX512VNNI is defined on all GOARCHes, but will only return true on
|
||||
// GOARCH amd64.
|
||||
func (X86Features) AVX512VNNI() bool {
|
||||
@@ -105,20 +113,44 @@ func (X86Features) AVX512VPCLMULQDQ() bool {
|
||||
|
||||
// AVX512VPOPCNTDQ returns whether the CPU supports the AVX512VPOPCNTDQ feature.
|
||||
//
|
||||
// If it returns true, then the CPU also supports AVX, AVX2, and AVX512.
|
||||
//
|
||||
// AVX512VPOPCNTDQ is defined on all GOARCHes, but will only return true on
|
||||
// GOARCH amd64.
|
||||
func (X86Features) AVX512VPOPCNTDQ() bool {
|
||||
return cpu.X86.HasAVX512VPOPCNTDQ
|
||||
}
|
||||
|
||||
// AVXAES returns whether the CPU supports the AVXAES feature.
|
||||
//
|
||||
// If it returns true, then the CPU also supports AES and AVX.
|
||||
//
|
||||
// AVXAES is defined on all GOARCHes, but will only return true on
|
||||
// GOARCH amd64.
|
||||
func (X86Features) AVXAES() bool {
|
||||
return cpu.X86.HasAVX && cpu.X86.HasAES
|
||||
}
|
||||
|
||||
// AVXVNNI returns whether the CPU supports the AVXVNNI feature.
|
||||
//
|
||||
// If it returns true, then the CPU also supports AVX and AVX2.
|
||||
//
|
||||
// AVXVNNI is defined on all GOARCHes, but will only return true on
|
||||
// GOARCH amd64.
|
||||
func (X86Features) AVXVNNI() bool {
|
||||
return cpu.X86.HasAVXVNNI
|
||||
}
|
||||
|
||||
// FMA returns whether the CPU supports the FMA feature.
|
||||
//
|
||||
// If it returns true, then the CPU also supports AVX.
|
||||
//
|
||||
// FMA is defined on all GOARCHes, but will only return true on
|
||||
// GOARCH amd64.
|
||||
func (X86Features) FMA() bool {
|
||||
return cpu.X86.HasFMA
|
||||
}
|
||||
|
||||
// SHA returns whether the CPU supports the SHA feature.
|
||||
//
|
||||
// SHA is defined on all GOARCHes, but will only return true on
|
||||
|
||||
@@ -11,7 +11,7 @@ package archsimd
|
||||
// y is the chunk of dw array in use.
|
||||
// result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y)
|
||||
//
|
||||
// Asm: VAESDECLAST, CPU Feature: AVX, AES
|
||||
// Asm: VAESDECLAST, CPU Feature: AVXAES
|
||||
func (x Uint8x16) AESDecryptLastRound(y Uint32x4) Uint8x16
|
||||
|
||||
// AESDecryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
|
||||
@@ -37,7 +37,7 @@ func (x Uint8x64) AESDecryptLastRound(y Uint32x16) Uint8x64
|
||||
// y is the chunk of dw array in use.
|
||||
// result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y)
|
||||
//
|
||||
// Asm: VAESDEC, CPU Feature: AVX, AES
|
||||
// Asm: VAESDEC, CPU Feature: AVXAES
|
||||
func (x Uint8x16) AESDecryptOneRound(y Uint32x4) Uint8x16
|
||||
|
||||
// AESDecryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
|
||||
@@ -63,7 +63,7 @@ func (x Uint8x64) AESDecryptOneRound(y Uint32x16) Uint8x64
|
||||
// y is the chunk of w array in use.
|
||||
// result = AddRoundKey((ShiftRows(SubBytes(x))), y)
|
||||
//
|
||||
// Asm: VAESENCLAST, CPU Feature: AVX, AES
|
||||
// Asm: VAESENCLAST, CPU Feature: AVXAES
|
||||
func (x Uint8x16) AESEncryptLastRound(y Uint32x4) Uint8x16
|
||||
|
||||
// AESEncryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
|
||||
@@ -89,7 +89,7 @@ func (x Uint8x64) AESEncryptLastRound(y Uint32x16) Uint8x64
|
||||
// y is the chunk of w array in use.
|
||||
// result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y)
|
||||
//
|
||||
// Asm: VAESENC, CPU Feature: AVX, AES
|
||||
// Asm: VAESENC, CPU Feature: AVXAES
|
||||
func (x Uint8x16) AESEncryptOneRound(y Uint32x4) Uint8x16
|
||||
|
||||
// AESEncryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
|
||||
@@ -114,7 +114,7 @@ func (x Uint8x64) AESEncryptOneRound(y Uint32x16) Uint8x64
|
||||
// x is the chunk of w array in use.
|
||||
// result = InvMixColumns(x)
|
||||
//
|
||||
// Asm: VAESIMC, CPU Feature: AVX, AES
|
||||
// Asm: VAESIMC, CPU Feature: AVXAES
|
||||
func (x Uint32x4) AESInvMixColumns() Uint32x4
|
||||
|
||||
/* AESRoundKeyGenAssist */
|
||||
@@ -129,7 +129,7 @@ func (x Uint32x4) AESInvMixColumns() Uint32x4
|
||||
//
|
||||
// rconVal results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||
//
|
||||
// Asm: VAESKEYGENASSIST, CPU Feature: AVX, AES
|
||||
// Asm: VAESKEYGENASSIST, CPU Feature: AVXAES
|
||||
func (x Uint32x4) AESRoundKeyGenAssist(rconVal uint8) Uint32x4
|
||||
|
||||
/* Abs */
|
||||
@@ -4082,12 +4082,12 @@ func (x Uint64x8) Mul(y Uint64x8) Uint64x8
|
||||
|
||||
// MulAdd performs a fused (x * y) + z.
|
||||
//
|
||||
// Asm: VFMADD213PS, CPU Feature: AVX512
|
||||
// Asm: VFMADD213PS, CPU Feature: FMA
|
||||
func (x Float32x4) MulAdd(y Float32x4, z Float32x4) Float32x4
|
||||
|
||||
// MulAdd performs a fused (x * y) + z.
|
||||
//
|
||||
// Asm: VFMADD213PS, CPU Feature: AVX512
|
||||
// Asm: VFMADD213PS, CPU Feature: FMA
|
||||
func (x Float32x8) MulAdd(y Float32x8, z Float32x8) Float32x8
|
||||
|
||||
// MulAdd performs a fused (x * y) + z.
|
||||
@@ -4097,12 +4097,12 @@ func (x Float32x16) MulAdd(y Float32x16, z Float32x16) Float32x16
|
||||
|
||||
// MulAdd performs a fused (x * y) + z.
|
||||
//
|
||||
// Asm: VFMADD213PD, CPU Feature: AVX512
|
||||
// Asm: VFMADD213PD, CPU Feature: FMA
|
||||
func (x Float64x2) MulAdd(y Float64x2, z Float64x2) Float64x2
|
||||
|
||||
// MulAdd performs a fused (x * y) + z.
|
||||
//
|
||||
// Asm: VFMADD213PD, CPU Feature: AVX512
|
||||
// Asm: VFMADD213PD, CPU Feature: FMA
|
||||
func (x Float64x4) MulAdd(y Float64x4, z Float64x4) Float64x4
|
||||
|
||||
// MulAdd performs a fused (x * y) + z.
|
||||
@@ -4114,12 +4114,12 @@ func (x Float64x8) MulAdd(y Float64x8, z Float64x8) Float64x8
|
||||
|
||||
// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
|
||||
//
|
||||
// Asm: VFMADDSUB213PS, CPU Feature: AVX512
|
||||
// Asm: VFMADDSUB213PS, CPU Feature: FMA
|
||||
func (x Float32x4) MulAddSub(y Float32x4, z Float32x4) Float32x4
|
||||
|
||||
// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
|
||||
//
|
||||
// Asm: VFMADDSUB213PS, CPU Feature: AVX512
|
||||
// Asm: VFMADDSUB213PS, CPU Feature: FMA
|
||||
func (x Float32x8) MulAddSub(y Float32x8, z Float32x8) Float32x8
|
||||
|
||||
// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
|
||||
@@ -4129,12 +4129,12 @@ func (x Float32x16) MulAddSub(y Float32x16, z Float32x16) Float32x16
|
||||
|
||||
// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
|
||||
//
|
||||
// Asm: VFMADDSUB213PD, CPU Feature: AVX512
|
||||
// Asm: VFMADDSUB213PD, CPU Feature: FMA
|
||||
func (x Float64x2) MulAddSub(y Float64x2, z Float64x2) Float64x2
|
||||
|
||||
// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
|
||||
//
|
||||
// Asm: VFMADDSUB213PD, CPU Feature: AVX512
|
||||
// Asm: VFMADDSUB213PD, CPU Feature: FMA
|
||||
func (x Float64x4) MulAddSub(y Float64x4, z Float64x4) Float64x4
|
||||
|
||||
// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
|
||||
@@ -4204,12 +4204,12 @@ func (x Uint16x32) MulHigh(y Uint16x32) Uint16x32
|
||||
|
||||
// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
|
||||
//
|
||||
// Asm: VFMSUBADD213PS, CPU Feature: AVX512
|
||||
// Asm: VFMSUBADD213PS, CPU Feature: FMA
|
||||
func (x Float32x4) MulSubAdd(y Float32x4, z Float32x4) Float32x4
|
||||
|
||||
// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
|
||||
//
|
||||
// Asm: VFMSUBADD213PS, CPU Feature: AVX512
|
||||
// Asm: VFMSUBADD213PS, CPU Feature: FMA
|
||||
func (x Float32x8) MulSubAdd(y Float32x8, z Float32x8) Float32x8
|
||||
|
||||
// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
|
||||
@@ -4219,12 +4219,12 @@ func (x Float32x16) MulSubAdd(y Float32x16, z Float32x16) Float32x16
|
||||
|
||||
// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
|
||||
//
|
||||
// Asm: VFMSUBADD213PD, CPU Feature: AVX512
|
||||
// Asm: VFMSUBADD213PD, CPU Feature: FMA
|
||||
func (x Float64x2) MulSubAdd(y Float64x2, z Float64x2) Float64x2
|
||||
|
||||
// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
|
||||
//
|
||||
// Asm: VFMSUBADD213PD, CPU Feature: AVX512
|
||||
// Asm: VFMSUBADD213PD, CPU Feature: FMA
|
||||
func (x Float64x4) MulSubAdd(y Float64x4, z Float64x4) Float64x4
|
||||
|
||||
// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
|
||||
|
||||
Reference in New Issue
Block a user