mirror of
https://github.com/golang/go.git
synced 2026-01-29 15:12:08 +03:00
[dev.simd] simd, cmd/compile: generated code for Broadcast
Generated by simdgen CL 693599 This turned out to require some additional work in other places, including filling in missing methods (use OverwriteBase to get FP versions). Also includes a test. Change-Id: I2efe8967837834745f9cae661d4d4dcbb5390b6f Reviewed-on: https://go-review.googlesource.com/c/go/+/693758 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Junyang Shao <shaojunyang@google.com>
This commit is contained in:
@@ -24,6 +24,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||
ssa.OpAMD64VPABSQ128,
|
||||
ssa.OpAMD64VPABSQ256,
|
||||
ssa.OpAMD64VPABSQ512,
|
||||
ssa.OpAMD64VBROADCASTSS128,
|
||||
ssa.OpAMD64VPBROADCASTQ128,
|
||||
ssa.OpAMD64VPBROADCASTB128,
|
||||
ssa.OpAMD64VPBROADCASTW128,
|
||||
ssa.OpAMD64VPBROADCASTD128,
|
||||
ssa.OpAMD64VBROADCASTSS256,
|
||||
ssa.OpAMD64VBROADCASTSD256,
|
||||
ssa.OpAMD64VPBROADCASTB256,
|
||||
ssa.OpAMD64VPBROADCASTW256,
|
||||
ssa.OpAMD64VPBROADCASTD256,
|
||||
ssa.OpAMD64VPBROADCASTQ256,
|
||||
ssa.OpAMD64VBROADCASTSS512,
|
||||
ssa.OpAMD64VBROADCASTSD512,
|
||||
ssa.OpAMD64VPBROADCASTB512,
|
||||
ssa.OpAMD64VPBROADCASTW512,
|
||||
ssa.OpAMD64VPBROADCASTD512,
|
||||
ssa.OpAMD64VPBROADCASTQ512,
|
||||
ssa.OpAMD64VCVTTPS2DQ128,
|
||||
ssa.OpAMD64VCVTTPS2DQ256,
|
||||
ssa.OpAMD64VCVTTPS2DQ512,
|
||||
@@ -624,6 +641,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||
ssa.OpAMD64VPABSQMasked128,
|
||||
ssa.OpAMD64VPABSQMasked256,
|
||||
ssa.OpAMD64VPABSQMasked512,
|
||||
ssa.OpAMD64VBROADCASTSSMasked128,
|
||||
ssa.OpAMD64VPBROADCASTQMasked128,
|
||||
ssa.OpAMD64VPBROADCASTBMasked128,
|
||||
ssa.OpAMD64VPBROADCASTWMasked128,
|
||||
ssa.OpAMD64VPBROADCASTDMasked128,
|
||||
ssa.OpAMD64VBROADCASTSSMasked256,
|
||||
ssa.OpAMD64VBROADCASTSDMasked256,
|
||||
ssa.OpAMD64VPBROADCASTBMasked256,
|
||||
ssa.OpAMD64VPBROADCASTWMasked256,
|
||||
ssa.OpAMD64VPBROADCASTDMasked256,
|
||||
ssa.OpAMD64VPBROADCASTQMasked256,
|
||||
ssa.OpAMD64VBROADCASTSSMasked512,
|
||||
ssa.OpAMD64VBROADCASTSDMasked512,
|
||||
ssa.OpAMD64VPBROADCASTBMasked512,
|
||||
ssa.OpAMD64VPBROADCASTWMasked512,
|
||||
ssa.OpAMD64VPBROADCASTDMasked512,
|
||||
ssa.OpAMD64VPBROADCASTQMasked512,
|
||||
ssa.OpAMD64VCOMPRESSPSMasked128,
|
||||
ssa.OpAMD64VCOMPRESSPSMasked256,
|
||||
ssa.OpAMD64VCOMPRESSPSMasked512,
|
||||
@@ -1104,10 +1138,10 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||
ssa.OpAMD64VPSRLQMasked512:
|
||||
p = simdVfpkv(s, v)
|
||||
|
||||
case ssa.OpAMD64VPINSRB128,
|
||||
ssa.OpAMD64VPINSRW128,
|
||||
ssa.OpAMD64VPINSRD128,
|
||||
ssa.OpAMD64VPINSRQ128:
|
||||
case ssa.OpAMD64VPINSRD128,
|
||||
ssa.OpAMD64VPINSRQ128,
|
||||
ssa.OpAMD64VPINSRB128,
|
||||
ssa.OpAMD64VPINSRW128:
|
||||
p = simdVgpvImm8(s, v)
|
||||
|
||||
case ssa.OpAMD64VPEXTRB128,
|
||||
@@ -1221,6 +1255,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||
ssa.OpAMD64VPAVGWMasked128,
|
||||
ssa.OpAMD64VPAVGWMasked256,
|
||||
ssa.OpAMD64VPAVGWMasked512,
|
||||
ssa.OpAMD64VBROADCASTSSMasked128,
|
||||
ssa.OpAMD64VPBROADCASTQMasked128,
|
||||
ssa.OpAMD64VPBROADCASTBMasked128,
|
||||
ssa.OpAMD64VPBROADCASTWMasked128,
|
||||
ssa.OpAMD64VPBROADCASTDMasked128,
|
||||
ssa.OpAMD64VBROADCASTSSMasked256,
|
||||
ssa.OpAMD64VBROADCASTSDMasked256,
|
||||
ssa.OpAMD64VPBROADCASTBMasked256,
|
||||
ssa.OpAMD64VPBROADCASTWMasked256,
|
||||
ssa.OpAMD64VPBROADCASTDMasked256,
|
||||
ssa.OpAMD64VPBROADCASTQMasked256,
|
||||
ssa.OpAMD64VBROADCASTSSMasked512,
|
||||
ssa.OpAMD64VBROADCASTSDMasked512,
|
||||
ssa.OpAMD64VPBROADCASTBMasked512,
|
||||
ssa.OpAMD64VPBROADCASTWMasked512,
|
||||
ssa.OpAMD64VPBROADCASTDMasked512,
|
||||
ssa.OpAMD64VPBROADCASTQMasked512,
|
||||
ssa.OpAMD64VRNDSCALEPSMasked128,
|
||||
ssa.OpAMD64VRNDSCALEPSMasked256,
|
||||
ssa.OpAMD64VRNDSCALEPSMasked512,
|
||||
|
||||
@@ -228,6 +228,66 @@
|
||||
(AverageMaskedUint16x8 x y mask) => (VPAVGWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(AverageMaskedUint16x16 x y mask) => (VPAVGWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(AverageMaskedUint16x32 x y mask) => (VPAVGWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
|
||||
(Broadcast128Float32x4 ...) => (VBROADCASTSS128 ...)
|
||||
(Broadcast128Float64x2 ...) => (VPBROADCASTQ128 ...)
|
||||
(Broadcast128Int8x16 ...) => (VPBROADCASTB128 ...)
|
||||
(Broadcast128Int16x8 ...) => (VPBROADCASTW128 ...)
|
||||
(Broadcast128Int32x4 ...) => (VPBROADCASTD128 ...)
|
||||
(Broadcast128Int64x2 ...) => (VPBROADCASTQ128 ...)
|
||||
(Broadcast128Uint8x16 ...) => (VPBROADCASTB128 ...)
|
||||
(Broadcast128Uint16x8 ...) => (VPBROADCASTW128 ...)
|
||||
(Broadcast128Uint32x4 ...) => (VPBROADCASTD128 ...)
|
||||
(Broadcast128Uint64x2 ...) => (VPBROADCASTQ128 ...)
|
||||
(Broadcast128MaskedFloat32x4 x mask) => (VBROADCASTSSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(Broadcast128MaskedFloat64x2 x mask) => (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(Broadcast128MaskedInt8x16 x mask) => (VPBROADCASTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(Broadcast128MaskedInt16x8 x mask) => (VPBROADCASTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(Broadcast128MaskedInt32x4 x mask) => (VPBROADCASTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(Broadcast128MaskedInt64x2 x mask) => (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(Broadcast128MaskedUint8x16 x mask) => (VPBROADCASTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(Broadcast128MaskedUint16x8 x mask) => (VPBROADCASTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(Broadcast128MaskedUint32x4 x mask) => (VPBROADCASTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(Broadcast128MaskedUint64x2 x mask) => (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(Broadcast256Float32x4 ...) => (VBROADCASTSS256 ...)
|
||||
(Broadcast256Float64x2 ...) => (VBROADCASTSD256 ...)
|
||||
(Broadcast256Int8x16 ...) => (VPBROADCASTB256 ...)
|
||||
(Broadcast256Int16x8 ...) => (VPBROADCASTW256 ...)
|
||||
(Broadcast256Int32x4 ...) => (VPBROADCASTD256 ...)
|
||||
(Broadcast256Int64x2 ...) => (VPBROADCASTQ256 ...)
|
||||
(Broadcast256Uint8x16 ...) => (VPBROADCASTB256 ...)
|
||||
(Broadcast256Uint16x8 ...) => (VPBROADCASTW256 ...)
|
||||
(Broadcast256Uint32x4 ...) => (VPBROADCASTD256 ...)
|
||||
(Broadcast256Uint64x2 ...) => (VPBROADCASTQ256 ...)
|
||||
(Broadcast256MaskedFloat32x4 x mask) => (VBROADCASTSSMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(Broadcast256MaskedFloat64x2 x mask) => (VBROADCASTSDMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(Broadcast256MaskedInt8x16 x mask) => (VPBROADCASTBMasked256 x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(Broadcast256MaskedInt16x8 x mask) => (VPBROADCASTWMasked256 x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(Broadcast256MaskedInt32x4 x mask) => (VPBROADCASTDMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(Broadcast256MaskedInt64x2 x mask) => (VPBROADCASTQMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(Broadcast256MaskedUint8x16 x mask) => (VPBROADCASTBMasked256 x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(Broadcast256MaskedUint16x8 x mask) => (VPBROADCASTWMasked256 x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(Broadcast256MaskedUint32x4 x mask) => (VPBROADCASTDMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(Broadcast256MaskedUint64x2 x mask) => (VPBROADCASTQMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(Broadcast512Float32x4 ...) => (VBROADCASTSS512 ...)
|
||||
(Broadcast512Float64x2 ...) => (VBROADCASTSD512 ...)
|
||||
(Broadcast512Int8x16 ...) => (VPBROADCASTB512 ...)
|
||||
(Broadcast512Int16x8 ...) => (VPBROADCASTW512 ...)
|
||||
(Broadcast512Int32x4 ...) => (VPBROADCASTD512 ...)
|
||||
(Broadcast512Int64x2 ...) => (VPBROADCASTQ512 ...)
|
||||
(Broadcast512Uint8x16 ...) => (VPBROADCASTB512 ...)
|
||||
(Broadcast512Uint16x8 ...) => (VPBROADCASTW512 ...)
|
||||
(Broadcast512Uint32x4 ...) => (VPBROADCASTD512 ...)
|
||||
(Broadcast512Uint64x2 ...) => (VPBROADCASTQ512 ...)
|
||||
(Broadcast512MaskedFloat32x4 x mask) => (VBROADCASTSSMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(Broadcast512MaskedFloat64x2 x mask) => (VBROADCASTSDMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(Broadcast512MaskedInt8x16 x mask) => (VPBROADCASTBMasked512 x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(Broadcast512MaskedInt16x8 x mask) => (VPBROADCASTWMasked512 x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(Broadcast512MaskedInt32x4 x mask) => (VPBROADCASTDMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(Broadcast512MaskedInt64x2 x mask) => (VPBROADCASTQMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(Broadcast512MaskedUint8x16 x mask) => (VPBROADCASTBMasked512 x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(Broadcast512MaskedUint16x8 x mask) => (VPBROADCASTWMasked512 x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(Broadcast512MaskedUint32x4 x mask) => (VPBROADCASTDMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(Broadcast512MaskedUint64x2 x mask) => (VPBROADCASTQMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(CeilFloat32x4 x) => (VROUNDPS128 [2] x)
|
||||
(CeilFloat32x8 x) => (VROUNDPS256 [2] x)
|
||||
(CeilFloat64x2 x) => (VROUNDPD128 [2] x)
|
||||
@@ -1396,6 +1456,8 @@
|
||||
(ScaleMaskedFloat64x2 x y mask) => (VSCALEFPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(ScaleMaskedFloat64x4 x y mask) => (VSCALEFPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(ScaleMaskedFloat64x8 x y mask) => (VSCALEFPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
|
||||
(SetElemFloat32x4 ...) => (VPINSRD128 ...)
|
||||
(SetElemFloat64x2 ...) => (VPINSRQ128 ...)
|
||||
(SetElemInt8x16 ...) => (VPINSRB128 ...)
|
||||
(SetElemInt16x8 ...) => (VPINSRW128 ...)
|
||||
(SetElemInt32x4 ...) => (VPINSRD128 ...)
|
||||
|
||||
@@ -20,6 +20,16 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||
{name: "VADDSUBPD256", argLength: 2, reg: v21, asm: "VADDSUBPD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VADDSUBPS128", argLength: 2, reg: v21, asm: "VADDSUBPS", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VADDSUBPS256", argLength: 2, reg: v21, asm: "VADDSUBPS", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VBROADCASTSD256", argLength: 1, reg: v11, asm: "VBROADCASTSD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VBROADCASTSD512", argLength: 1, reg: w11, asm: "VBROADCASTSD", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VBROADCASTSDMasked256", argLength: 2, reg: wkw, asm: "VBROADCASTSD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VBROADCASTSDMasked512", argLength: 2, reg: wkw, asm: "VBROADCASTSD", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VBROADCASTSS128", argLength: 1, reg: v11, asm: "VBROADCASTSS", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VBROADCASTSS256", argLength: 1, reg: v11, asm: "VBROADCASTSS", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VBROADCASTSS512", argLength: 1, reg: w11, asm: "VBROADCASTSS", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VBROADCASTSSMasked128", argLength: 2, reg: wkw, asm: "VBROADCASTSS", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VBROADCASTSSMasked256", argLength: 2, reg: wkw, asm: "VBROADCASTSS", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VBROADCASTSSMasked512", argLength: 2, reg: wkw, asm: "VBROADCASTSS", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VCOMPRESSPDMasked128", argLength: 2, reg: wkw, asm: "VCOMPRESSPD", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VCOMPRESSPDMasked256", argLength: 2, reg: wkw, asm: "VCOMPRESSPD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VCOMPRESSPDMasked512", argLength: 2, reg: wkw, asm: "VCOMPRESSPD", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
@@ -252,6 +262,30 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||
{name: "VPBLENDMWMasked512", argLength: 3, reg: w2kw, asm: "VPBLENDMW", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPBLENDVB128", argLength: 3, reg: v31, asm: "VPBLENDVB", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPBLENDVB256", argLength: 3, reg: v31, asm: "VPBLENDVB", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPBROADCASTB128", argLength: 1, reg: v11, asm: "VPBROADCASTB", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPBROADCASTB256", argLength: 1, reg: v11, asm: "VPBROADCASTB", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPBROADCASTB512", argLength: 1, reg: w11, asm: "VPBROADCASTB", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPBROADCASTBMasked128", argLength: 2, reg: wkw, asm: "VPBROADCASTB", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPBROADCASTBMasked256", argLength: 2, reg: wkw, asm: "VPBROADCASTB", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPBROADCASTBMasked512", argLength: 2, reg: wkw, asm: "VPBROADCASTB", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPBROADCASTD128", argLength: 1, reg: v11, asm: "VPBROADCASTD", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPBROADCASTD256", argLength: 1, reg: v11, asm: "VPBROADCASTD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPBROADCASTD512", argLength: 1, reg: w11, asm: "VPBROADCASTD", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPBROADCASTDMasked128", argLength: 2, reg: wkw, asm: "VPBROADCASTD", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPBROADCASTDMasked256", argLength: 2, reg: wkw, asm: "VPBROADCASTD", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPBROADCASTDMasked512", argLength: 2, reg: wkw, asm: "VPBROADCASTD", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPBROADCASTQ128", argLength: 1, reg: v11, asm: "VPBROADCASTQ", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPBROADCASTQ256", argLength: 1, reg: v11, asm: "VPBROADCASTQ", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPBROADCASTQ512", argLength: 1, reg: w11, asm: "VPBROADCASTQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPBROADCASTQMasked128", argLength: 2, reg: wkw, asm: "VPBROADCASTQ", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPBROADCASTQMasked256", argLength: 2, reg: wkw, asm: "VPBROADCASTQ", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPBROADCASTQMasked512", argLength: 2, reg: wkw, asm: "VPBROADCASTQ", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPBROADCASTW128", argLength: 1, reg: v11, asm: "VPBROADCASTW", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPBROADCASTW256", argLength: 1, reg: v11, asm: "VPBROADCASTW", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPBROADCASTW512", argLength: 1, reg: w11, asm: "VPBROADCASTW", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPBROADCASTWMasked128", argLength: 2, reg: wkw, asm: "VPBROADCASTW", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPBROADCASTWMasked256", argLength: 2, reg: wkw, asm: "VPBROADCASTW", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPBROADCASTWMasked512", argLength: 2, reg: wkw, asm: "VPBROADCASTW", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPCMPEQB128", argLength: 2, reg: v21, asm: "VPCMPEQB", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPCMPEQB256", argLength: 2, reg: v21, asm: "VPCMPEQB", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPCMPEQB512", argLength: 2, reg: w2k, asm: "VPCMPEQB", commutative: true, typ: "Mask", resultInArg0: false},
|
||||
@@ -1000,10 +1034,10 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||
{name: "VPRORQMasked128", argLength: 2, reg: wkw, asm: "VPRORQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPRORQMasked256", argLength: 2, reg: wkw, asm: "VPRORQ", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPRORQMasked512", argLength: 2, reg: wkw, asm: "VPRORQ", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPINSRB128", argLength: 2, reg: vgpv, asm: "VPINSRB", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPINSRW128", argLength: 2, reg: vgpv, asm: "VPINSRW", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPINSRD128", argLength: 2, reg: vgpv, asm: "VPINSRD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPINSRQ128", argLength: 2, reg: vgpv, asm: "VPINSRQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPINSRB128", argLength: 2, reg: vgpv, asm: "VPINSRB", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPINSRW128", argLength: 2, reg: vgpv, asm: "VPINSRW", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VINSERTF128256", argLength: 2, reg: v21, asm: "VINSERTF128", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VINSERTF64X4512", argLength: 2, reg: w21, asm: "VINSERTF64X4", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VINSERTI128256", argLength: 2, reg: v21, asm: "VINSERTI128", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
|
||||
@@ -232,6 +232,66 @@ func simdGenericOps() []opData {
|
||||
{name: "AverageUint16x8", argLength: 2, commutative: true},
|
||||
{name: "AverageUint16x16", argLength: 2, commutative: true},
|
||||
{name: "AverageUint16x32", argLength: 2, commutative: true},
|
||||
{name: "Broadcast128Float32x4", argLength: 1, commutative: false},
|
||||
{name: "Broadcast128Float64x2", argLength: 1, commutative: false},
|
||||
{name: "Broadcast128Int8x16", argLength: 1, commutative: false},
|
||||
{name: "Broadcast128Int16x8", argLength: 1, commutative: false},
|
||||
{name: "Broadcast128Int32x4", argLength: 1, commutative: false},
|
||||
{name: "Broadcast128Int64x2", argLength: 1, commutative: false},
|
||||
{name: "Broadcast128MaskedFloat32x4", argLength: 2, commutative: false},
|
||||
{name: "Broadcast128MaskedFloat64x2", argLength: 2, commutative: false},
|
||||
{name: "Broadcast128MaskedInt8x16", argLength: 2, commutative: false},
|
||||
{name: "Broadcast128MaskedInt16x8", argLength: 2, commutative: false},
|
||||
{name: "Broadcast128MaskedInt32x4", argLength: 2, commutative: false},
|
||||
{name: "Broadcast128MaskedInt64x2", argLength: 2, commutative: false},
|
||||
{name: "Broadcast128MaskedUint8x16", argLength: 2, commutative: false},
|
||||
{name: "Broadcast128MaskedUint16x8", argLength: 2, commutative: false},
|
||||
{name: "Broadcast128MaskedUint32x4", argLength: 2, commutative: false},
|
||||
{name: "Broadcast128MaskedUint64x2", argLength: 2, commutative: false},
|
||||
{name: "Broadcast128Uint8x16", argLength: 1, commutative: false},
|
||||
{name: "Broadcast128Uint16x8", argLength: 1, commutative: false},
|
||||
{name: "Broadcast128Uint32x4", argLength: 1, commutative: false},
|
||||
{name: "Broadcast128Uint64x2", argLength: 1, commutative: false},
|
||||
{name: "Broadcast256Float32x4", argLength: 1, commutative: false},
|
||||
{name: "Broadcast256Float64x2", argLength: 1, commutative: false},
|
||||
{name: "Broadcast256Int8x16", argLength: 1, commutative: false},
|
||||
{name: "Broadcast256Int16x8", argLength: 1, commutative: false},
|
||||
{name: "Broadcast256Int32x4", argLength: 1, commutative: false},
|
||||
{name: "Broadcast256Int64x2", argLength: 1, commutative: false},
|
||||
{name: "Broadcast256MaskedFloat32x4", argLength: 2, commutative: false},
|
||||
{name: "Broadcast256MaskedFloat64x2", argLength: 2, commutative: false},
|
||||
{name: "Broadcast256MaskedInt8x16", argLength: 2, commutative: false},
|
||||
{name: "Broadcast256MaskedInt16x8", argLength: 2, commutative: false},
|
||||
{name: "Broadcast256MaskedInt32x4", argLength: 2, commutative: false},
|
||||
{name: "Broadcast256MaskedInt64x2", argLength: 2, commutative: false},
|
||||
{name: "Broadcast256MaskedUint8x16", argLength: 2, commutative: false},
|
||||
{name: "Broadcast256MaskedUint16x8", argLength: 2, commutative: false},
|
||||
{name: "Broadcast256MaskedUint32x4", argLength: 2, commutative: false},
|
||||
{name: "Broadcast256MaskedUint64x2", argLength: 2, commutative: false},
|
||||
{name: "Broadcast256Uint8x16", argLength: 1, commutative: false},
|
||||
{name: "Broadcast256Uint16x8", argLength: 1, commutative: false},
|
||||
{name: "Broadcast256Uint32x4", argLength: 1, commutative: false},
|
||||
{name: "Broadcast256Uint64x2", argLength: 1, commutative: false},
|
||||
{name: "Broadcast512Float32x4", argLength: 1, commutative: false},
|
||||
{name: "Broadcast512Float64x2", argLength: 1, commutative: false},
|
||||
{name: "Broadcast512Int8x16", argLength: 1, commutative: false},
|
||||
{name: "Broadcast512Int16x8", argLength: 1, commutative: false},
|
||||
{name: "Broadcast512Int32x4", argLength: 1, commutative: false},
|
||||
{name: "Broadcast512Int64x2", argLength: 1, commutative: false},
|
||||
{name: "Broadcast512MaskedFloat32x4", argLength: 2, commutative: false},
|
||||
{name: "Broadcast512MaskedFloat64x2", argLength: 2, commutative: false},
|
||||
{name: "Broadcast512MaskedInt8x16", argLength: 2, commutative: false},
|
||||
{name: "Broadcast512MaskedInt16x8", argLength: 2, commutative: false},
|
||||
{name: "Broadcast512MaskedInt32x4", argLength: 2, commutative: false},
|
||||
{name: "Broadcast512MaskedInt64x2", argLength: 2, commutative: false},
|
||||
{name: "Broadcast512MaskedUint8x16", argLength: 2, commutative: false},
|
||||
{name: "Broadcast512MaskedUint16x8", argLength: 2, commutative: false},
|
||||
{name: "Broadcast512MaskedUint32x4", argLength: 2, commutative: false},
|
||||
{name: "Broadcast512MaskedUint64x2", argLength: 2, commutative: false},
|
||||
{name: "Broadcast512Uint8x16", argLength: 1, commutative: false},
|
||||
{name: "Broadcast512Uint16x8", argLength: 1, commutative: false},
|
||||
{name: "Broadcast512Uint32x4", argLength: 1, commutative: false},
|
||||
{name: "Broadcast512Uint64x2", argLength: 1, commutative: false},
|
||||
{name: "CeilFloat32x4", argLength: 1, commutative: false},
|
||||
{name: "CeilFloat32x8", argLength: 1, commutative: false},
|
||||
{name: "CeilFloat64x2", argLength: 1, commutative: false},
|
||||
@@ -1812,6 +1872,8 @@ func simdGenericOps() []opData {
|
||||
{name: "RoundToEvenScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
|
||||
{name: "RoundToEvenScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
|
||||
{name: "RoundToEvenScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
|
||||
{name: "SetElemFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
|
||||
{name: "SetElemFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
|
||||
{name: "SetElemInt8x16", argLength: 2, commutative: false, aux: "UInt8"},
|
||||
{name: "SetElemInt16x8", argLength: 2, commutative: false, aux: "UInt8"},
|
||||
{name: "SetElemInt32x4", argLength: 2, commutative: false, aux: "UInt8"},
|
||||
|
||||
@@ -1242,6 +1242,16 @@ const (
|
||||
OpAMD64VADDSUBPD256
|
||||
OpAMD64VADDSUBPS128
|
||||
OpAMD64VADDSUBPS256
|
||||
OpAMD64VBROADCASTSD256
|
||||
OpAMD64VBROADCASTSD512
|
||||
OpAMD64VBROADCASTSDMasked256
|
||||
OpAMD64VBROADCASTSDMasked512
|
||||
OpAMD64VBROADCASTSS128
|
||||
OpAMD64VBROADCASTSS256
|
||||
OpAMD64VBROADCASTSS512
|
||||
OpAMD64VBROADCASTSSMasked128
|
||||
OpAMD64VBROADCASTSSMasked256
|
||||
OpAMD64VBROADCASTSSMasked512
|
||||
OpAMD64VCOMPRESSPDMasked128
|
||||
OpAMD64VCOMPRESSPDMasked256
|
||||
OpAMD64VCOMPRESSPDMasked512
|
||||
@@ -1474,6 +1484,30 @@ const (
|
||||
OpAMD64VPBLENDMWMasked512
|
||||
OpAMD64VPBLENDVB128
|
||||
OpAMD64VPBLENDVB256
|
||||
OpAMD64VPBROADCASTB128
|
||||
OpAMD64VPBROADCASTB256
|
||||
OpAMD64VPBROADCASTB512
|
||||
OpAMD64VPBROADCASTBMasked128
|
||||
OpAMD64VPBROADCASTBMasked256
|
||||
OpAMD64VPBROADCASTBMasked512
|
||||
OpAMD64VPBROADCASTD128
|
||||
OpAMD64VPBROADCASTD256
|
||||
OpAMD64VPBROADCASTD512
|
||||
OpAMD64VPBROADCASTDMasked128
|
||||
OpAMD64VPBROADCASTDMasked256
|
||||
OpAMD64VPBROADCASTDMasked512
|
||||
OpAMD64VPBROADCASTQ128
|
||||
OpAMD64VPBROADCASTQ256
|
||||
OpAMD64VPBROADCASTQ512
|
||||
OpAMD64VPBROADCASTQMasked128
|
||||
OpAMD64VPBROADCASTQMasked256
|
||||
OpAMD64VPBROADCASTQMasked512
|
||||
OpAMD64VPBROADCASTW128
|
||||
OpAMD64VPBROADCASTW256
|
||||
OpAMD64VPBROADCASTW512
|
||||
OpAMD64VPBROADCASTWMasked128
|
||||
OpAMD64VPBROADCASTWMasked256
|
||||
OpAMD64VPBROADCASTWMasked512
|
||||
OpAMD64VPCMPEQB128
|
||||
OpAMD64VPCMPEQB256
|
||||
OpAMD64VPCMPEQB512
|
||||
@@ -2222,10 +2256,10 @@ const (
|
||||
OpAMD64VPRORQMasked128
|
||||
OpAMD64VPRORQMasked256
|
||||
OpAMD64VPRORQMasked512
|
||||
OpAMD64VPINSRB128
|
||||
OpAMD64VPINSRW128
|
||||
OpAMD64VPINSRD128
|
||||
OpAMD64VPINSRQ128
|
||||
OpAMD64VPINSRB128
|
||||
OpAMD64VPINSRW128
|
||||
OpAMD64VINSERTF128256
|
||||
OpAMD64VINSERTF64X4512
|
||||
OpAMD64VINSERTI128256
|
||||
@@ -4839,6 +4873,66 @@ const (
|
||||
OpAverageUint16x8
|
||||
OpAverageUint16x16
|
||||
OpAverageUint16x32
|
||||
OpBroadcast128Float32x4
|
||||
OpBroadcast128Float64x2
|
||||
OpBroadcast128Int8x16
|
||||
OpBroadcast128Int16x8
|
||||
OpBroadcast128Int32x4
|
||||
OpBroadcast128Int64x2
|
||||
OpBroadcast128MaskedFloat32x4
|
||||
OpBroadcast128MaskedFloat64x2
|
||||
OpBroadcast128MaskedInt8x16
|
||||
OpBroadcast128MaskedInt16x8
|
||||
OpBroadcast128MaskedInt32x4
|
||||
OpBroadcast128MaskedInt64x2
|
||||
OpBroadcast128MaskedUint8x16
|
||||
OpBroadcast128MaskedUint16x8
|
||||
OpBroadcast128MaskedUint32x4
|
||||
OpBroadcast128MaskedUint64x2
|
||||
OpBroadcast128Uint8x16
|
||||
OpBroadcast128Uint16x8
|
||||
OpBroadcast128Uint32x4
|
||||
OpBroadcast128Uint64x2
|
||||
OpBroadcast256Float32x4
|
||||
OpBroadcast256Float64x2
|
||||
OpBroadcast256Int8x16
|
||||
OpBroadcast256Int16x8
|
||||
OpBroadcast256Int32x4
|
||||
OpBroadcast256Int64x2
|
||||
OpBroadcast256MaskedFloat32x4
|
||||
OpBroadcast256MaskedFloat64x2
|
||||
OpBroadcast256MaskedInt8x16
|
||||
OpBroadcast256MaskedInt16x8
|
||||
OpBroadcast256MaskedInt32x4
|
||||
OpBroadcast256MaskedInt64x2
|
||||
OpBroadcast256MaskedUint8x16
|
||||
OpBroadcast256MaskedUint16x8
|
||||
OpBroadcast256MaskedUint32x4
|
||||
OpBroadcast256MaskedUint64x2
|
||||
OpBroadcast256Uint8x16
|
||||
OpBroadcast256Uint16x8
|
||||
OpBroadcast256Uint32x4
|
||||
OpBroadcast256Uint64x2
|
||||
OpBroadcast512Float32x4
|
||||
OpBroadcast512Float64x2
|
||||
OpBroadcast512Int8x16
|
||||
OpBroadcast512Int16x8
|
||||
OpBroadcast512Int32x4
|
||||
OpBroadcast512Int64x2
|
||||
OpBroadcast512MaskedFloat32x4
|
||||
OpBroadcast512MaskedFloat64x2
|
||||
OpBroadcast512MaskedInt8x16
|
||||
OpBroadcast512MaskedInt16x8
|
||||
OpBroadcast512MaskedInt32x4
|
||||
OpBroadcast512MaskedInt64x2
|
||||
OpBroadcast512MaskedUint8x16
|
||||
OpBroadcast512MaskedUint16x8
|
||||
OpBroadcast512MaskedUint32x4
|
||||
OpBroadcast512MaskedUint64x2
|
||||
OpBroadcast512Uint8x16
|
||||
OpBroadcast512Uint16x8
|
||||
OpBroadcast512Uint32x4
|
||||
OpBroadcast512Uint64x2
|
||||
OpCeilFloat32x4
|
||||
OpCeilFloat32x8
|
||||
OpCeilFloat64x2
|
||||
@@ -6419,6 +6513,8 @@ const (
|
||||
OpRoundToEvenScaledResidueMaskedFloat64x2
|
||||
OpRoundToEvenScaledResidueMaskedFloat64x4
|
||||
OpRoundToEvenScaledResidueMaskedFloat64x8
|
||||
OpSetElemFloat32x4
|
||||
OpSetElemFloat64x2
|
||||
OpSetElemInt8x16
|
||||
OpSetElemInt16x8
|
||||
OpSetElemInt32x4
|
||||
@@ -19771,6 +19867,141 @@ var opcodeTable = [...]opInfo{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VBROADCASTSD256",
|
||||
argLen: 1,
|
||||
asm: x86.AVBROADCASTSD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VBROADCASTSD512",
|
||||
argLen: 1,
|
||||
asm: x86.AVBROADCASTSD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VBROADCASTSDMasked256",
|
||||
argLen: 2,
|
||||
asm: x86.AVBROADCASTSD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VBROADCASTSDMasked512",
|
||||
argLen: 2,
|
||||
asm: x86.AVBROADCASTSD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VBROADCASTSS128",
|
||||
argLen: 1,
|
||||
asm: x86.AVBROADCASTSS,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VBROADCASTSS256",
|
||||
argLen: 1,
|
||||
asm: x86.AVBROADCASTSS,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VBROADCASTSS512",
|
||||
argLen: 1,
|
||||
asm: x86.AVBROADCASTSS,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VBROADCASTSSMasked128",
|
||||
argLen: 2,
|
||||
asm: x86.AVBROADCASTSS,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VBROADCASTSSMasked256",
|
||||
argLen: 2,
|
||||
asm: x86.AVBROADCASTSS,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VBROADCASTSSMasked512",
|
||||
argLen: 2,
|
||||
asm: x86.AVBROADCASTSS,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VCOMPRESSPDMasked128",
|
||||
argLen: 2,
|
||||
@@ -23272,6 +23503,330 @@ var opcodeTable = [...]opInfo{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPBROADCASTB128",
|
||||
argLen: 1,
|
||||
asm: x86.AVPBROADCASTB,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPBROADCASTB256",
|
||||
argLen: 1,
|
||||
asm: x86.AVPBROADCASTB,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPBROADCASTB512",
|
||||
argLen: 1,
|
||||
asm: x86.AVPBROADCASTB,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPBROADCASTBMasked128",
|
||||
argLen: 2,
|
||||
asm: x86.AVPBROADCASTB,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPBROADCASTBMasked256",
|
||||
argLen: 2,
|
||||
asm: x86.AVPBROADCASTB,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPBROADCASTBMasked512",
|
||||
argLen: 2,
|
||||
asm: x86.AVPBROADCASTB,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPBROADCASTD128",
|
||||
argLen: 1,
|
||||
asm: x86.AVPBROADCASTD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPBROADCASTD256",
|
||||
argLen: 1,
|
||||
asm: x86.AVPBROADCASTD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPBROADCASTD512",
|
||||
argLen: 1,
|
||||
asm: x86.AVPBROADCASTD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPBROADCASTDMasked128",
|
||||
argLen: 2,
|
||||
asm: x86.AVPBROADCASTD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPBROADCASTDMasked256",
|
||||
argLen: 2,
|
||||
asm: x86.AVPBROADCASTD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPBROADCASTDMasked512",
|
||||
argLen: 2,
|
||||
asm: x86.AVPBROADCASTD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPBROADCASTQ128",
|
||||
argLen: 1,
|
||||
asm: x86.AVPBROADCASTQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPBROADCASTQ256",
|
||||
argLen: 1,
|
||||
asm: x86.AVPBROADCASTQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPBROADCASTQ512",
|
||||
argLen: 1,
|
||||
asm: x86.AVPBROADCASTQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPBROADCASTQMasked128",
|
||||
argLen: 2,
|
||||
asm: x86.AVPBROADCASTQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPBROADCASTQMasked256",
|
||||
argLen: 2,
|
||||
asm: x86.AVPBROADCASTQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPBROADCASTQMasked512",
|
||||
argLen: 2,
|
||||
asm: x86.AVPBROADCASTQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPBROADCASTW128",
|
||||
argLen: 1,
|
||||
asm: x86.AVPBROADCASTW,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPBROADCASTW256",
|
||||
argLen: 1,
|
||||
asm: x86.AVPBROADCASTW,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPBROADCASTW512",
|
||||
argLen: 1,
|
||||
asm: x86.AVPBROADCASTW,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPBROADCASTWMasked128",
|
||||
argLen: 2,
|
||||
asm: x86.AVPBROADCASTW,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPBROADCASTWMasked256",
|
||||
argLen: 2,
|
||||
asm: x86.AVPBROADCASTW,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPBROADCASTWMasked512",
|
||||
argLen: 2,
|
||||
asm: x86.AVPBROADCASTW,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPCMPEQB128",
|
||||
argLen: 2,
|
||||
@@ -34481,36 +35036,6 @@ var opcodeTable = [...]opInfo{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPINSRB128",
|
||||
auxType: auxUInt8,
|
||||
argLen: 2,
|
||||
asm: x86.AVPINSRB,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPINSRW128",
|
||||
auxType: auxUInt8,
|
||||
argLen: 2,
|
||||
asm: x86.AVPINSRW,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPINSRD128",
|
||||
auxType: auxUInt8,
|
||||
@@ -34541,6 +35066,36 @@ var opcodeTable = [...]opInfo{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPINSRB128",
|
||||
auxType: auxUInt8,
|
||||
argLen: 2,
|
||||
asm: x86.AVPINSRB,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPINSRW128",
|
||||
auxType: auxUInt8,
|
||||
argLen: 2,
|
||||
asm: x86.AVPINSRW,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VINSERTF128256",
|
||||
auxType: auxUInt8,
|
||||
@@ -64725,6 +65280,306 @@ var opcodeTable = [...]opInfo{
|
||||
commutative: true,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast128Float32x4",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast128Float64x2",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast128Int8x16",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast128Int16x8",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast128Int32x4",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast128Int64x2",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast128MaskedFloat32x4",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast128MaskedFloat64x2",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast128MaskedInt8x16",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast128MaskedInt16x8",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast128MaskedInt32x4",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast128MaskedInt64x2",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast128MaskedUint8x16",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast128MaskedUint16x8",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast128MaskedUint32x4",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast128MaskedUint64x2",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast128Uint8x16",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast128Uint16x8",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast128Uint32x4",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast128Uint64x2",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast256Float32x4",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast256Float64x2",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast256Int8x16",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast256Int16x8",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast256Int32x4",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast256Int64x2",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast256MaskedFloat32x4",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast256MaskedFloat64x2",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast256MaskedInt8x16",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast256MaskedInt16x8",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast256MaskedInt32x4",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast256MaskedInt64x2",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast256MaskedUint8x16",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast256MaskedUint16x8",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast256MaskedUint32x4",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast256MaskedUint64x2",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast256Uint8x16",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast256Uint16x8",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast256Uint32x4",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast256Uint64x2",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast512Float32x4",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast512Float64x2",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast512Int8x16",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast512Int16x8",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast512Int32x4",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast512Int64x2",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast512MaskedFloat32x4",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast512MaskedFloat64x2",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast512MaskedInt8x16",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast512MaskedInt16x8",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast512MaskedInt32x4",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast512MaskedInt64x2",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast512MaskedUint8x16",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast512MaskedUint16x8",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast512MaskedUint32x4",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast512MaskedUint64x2",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast512Uint8x16",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast512Uint16x8",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast512Uint32x4",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast512Uint64x2",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "CeilFloat32x4",
|
||||
argLen: 1,
|
||||
@@ -73153,6 +74008,18 @@ var opcodeTable = [...]opInfo{
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "SetElemFloat32x4",
|
||||
auxType: auxUInt8,
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "SetElemFloat64x2",
|
||||
auxType: auxUInt8,
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "SetElemInt8x16",
|
||||
auxType: auxUInt8,
|
||||
|
||||
@@ -1317,6 +1317,156 @@ func rewriteValueAMD64(v *Value) bool {
|
||||
return rewriteValueAMD64_OpBitLen64(v)
|
||||
case OpBitLen8:
|
||||
return rewriteValueAMD64_OpBitLen8(v)
|
||||
case OpBroadcast128Float32x4:
|
||||
v.Op = OpAMD64VBROADCASTSS128
|
||||
return true
|
||||
case OpBroadcast128Float64x2:
|
||||
v.Op = OpAMD64VPBROADCASTQ128
|
||||
return true
|
||||
case OpBroadcast128Int16x8:
|
||||
v.Op = OpAMD64VPBROADCASTW128
|
||||
return true
|
||||
case OpBroadcast128Int32x4:
|
||||
v.Op = OpAMD64VPBROADCASTD128
|
||||
return true
|
||||
case OpBroadcast128Int64x2:
|
||||
v.Op = OpAMD64VPBROADCASTQ128
|
||||
return true
|
||||
case OpBroadcast128Int8x16:
|
||||
v.Op = OpAMD64VPBROADCASTB128
|
||||
return true
|
||||
case OpBroadcast128MaskedFloat32x4:
|
||||
return rewriteValueAMD64_OpBroadcast128MaskedFloat32x4(v)
|
||||
case OpBroadcast128MaskedFloat64x2:
|
||||
return rewriteValueAMD64_OpBroadcast128MaskedFloat64x2(v)
|
||||
case OpBroadcast128MaskedInt16x8:
|
||||
return rewriteValueAMD64_OpBroadcast128MaskedInt16x8(v)
|
||||
case OpBroadcast128MaskedInt32x4:
|
||||
return rewriteValueAMD64_OpBroadcast128MaskedInt32x4(v)
|
||||
case OpBroadcast128MaskedInt64x2:
|
||||
return rewriteValueAMD64_OpBroadcast128MaskedInt64x2(v)
|
||||
case OpBroadcast128MaskedInt8x16:
|
||||
return rewriteValueAMD64_OpBroadcast128MaskedInt8x16(v)
|
||||
case OpBroadcast128MaskedUint16x8:
|
||||
return rewriteValueAMD64_OpBroadcast128MaskedUint16x8(v)
|
||||
case OpBroadcast128MaskedUint32x4:
|
||||
return rewriteValueAMD64_OpBroadcast128MaskedUint32x4(v)
|
||||
case OpBroadcast128MaskedUint64x2:
|
||||
return rewriteValueAMD64_OpBroadcast128MaskedUint64x2(v)
|
||||
case OpBroadcast128MaskedUint8x16:
|
||||
return rewriteValueAMD64_OpBroadcast128MaskedUint8x16(v)
|
||||
case OpBroadcast128Uint16x8:
|
||||
v.Op = OpAMD64VPBROADCASTW128
|
||||
return true
|
||||
case OpBroadcast128Uint32x4:
|
||||
v.Op = OpAMD64VPBROADCASTD128
|
||||
return true
|
||||
case OpBroadcast128Uint64x2:
|
||||
v.Op = OpAMD64VPBROADCASTQ128
|
||||
return true
|
||||
case OpBroadcast128Uint8x16:
|
||||
v.Op = OpAMD64VPBROADCASTB128
|
||||
return true
|
||||
case OpBroadcast256Float32x4:
|
||||
v.Op = OpAMD64VBROADCASTSS256
|
||||
return true
|
||||
case OpBroadcast256Float64x2:
|
||||
v.Op = OpAMD64VBROADCASTSD256
|
||||
return true
|
||||
case OpBroadcast256Int16x8:
|
||||
v.Op = OpAMD64VPBROADCASTW256
|
||||
return true
|
||||
case OpBroadcast256Int32x4:
|
||||
v.Op = OpAMD64VPBROADCASTD256
|
||||
return true
|
||||
case OpBroadcast256Int64x2:
|
||||
v.Op = OpAMD64VPBROADCASTQ256
|
||||
return true
|
||||
case OpBroadcast256Int8x16:
|
||||
v.Op = OpAMD64VPBROADCASTB256
|
||||
return true
|
||||
case OpBroadcast256MaskedFloat32x4:
|
||||
return rewriteValueAMD64_OpBroadcast256MaskedFloat32x4(v)
|
||||
case OpBroadcast256MaskedFloat64x2:
|
||||
return rewriteValueAMD64_OpBroadcast256MaskedFloat64x2(v)
|
||||
case OpBroadcast256MaskedInt16x8:
|
||||
return rewriteValueAMD64_OpBroadcast256MaskedInt16x8(v)
|
||||
case OpBroadcast256MaskedInt32x4:
|
||||
return rewriteValueAMD64_OpBroadcast256MaskedInt32x4(v)
|
||||
case OpBroadcast256MaskedInt64x2:
|
||||
return rewriteValueAMD64_OpBroadcast256MaskedInt64x2(v)
|
||||
case OpBroadcast256MaskedInt8x16:
|
||||
return rewriteValueAMD64_OpBroadcast256MaskedInt8x16(v)
|
||||
case OpBroadcast256MaskedUint16x8:
|
||||
return rewriteValueAMD64_OpBroadcast256MaskedUint16x8(v)
|
||||
case OpBroadcast256MaskedUint32x4:
|
||||
return rewriteValueAMD64_OpBroadcast256MaskedUint32x4(v)
|
||||
case OpBroadcast256MaskedUint64x2:
|
||||
return rewriteValueAMD64_OpBroadcast256MaskedUint64x2(v)
|
||||
case OpBroadcast256MaskedUint8x16:
|
||||
return rewriteValueAMD64_OpBroadcast256MaskedUint8x16(v)
|
||||
case OpBroadcast256Uint16x8:
|
||||
v.Op = OpAMD64VPBROADCASTW256
|
||||
return true
|
||||
case OpBroadcast256Uint32x4:
|
||||
v.Op = OpAMD64VPBROADCASTD256
|
||||
return true
|
||||
case OpBroadcast256Uint64x2:
|
||||
v.Op = OpAMD64VPBROADCASTQ256
|
||||
return true
|
||||
case OpBroadcast256Uint8x16:
|
||||
v.Op = OpAMD64VPBROADCASTB256
|
||||
return true
|
||||
case OpBroadcast512Float32x4:
|
||||
v.Op = OpAMD64VBROADCASTSS512
|
||||
return true
|
||||
case OpBroadcast512Float64x2:
|
||||
v.Op = OpAMD64VBROADCASTSD512
|
||||
return true
|
||||
case OpBroadcast512Int16x8:
|
||||
v.Op = OpAMD64VPBROADCASTW512
|
||||
return true
|
||||
case OpBroadcast512Int32x4:
|
||||
v.Op = OpAMD64VPBROADCASTD512
|
||||
return true
|
||||
case OpBroadcast512Int64x2:
|
||||
v.Op = OpAMD64VPBROADCASTQ512
|
||||
return true
|
||||
case OpBroadcast512Int8x16:
|
||||
v.Op = OpAMD64VPBROADCASTB512
|
||||
return true
|
||||
case OpBroadcast512MaskedFloat32x4:
|
||||
return rewriteValueAMD64_OpBroadcast512MaskedFloat32x4(v)
|
||||
case OpBroadcast512MaskedFloat64x2:
|
||||
return rewriteValueAMD64_OpBroadcast512MaskedFloat64x2(v)
|
||||
case OpBroadcast512MaskedInt16x8:
|
||||
return rewriteValueAMD64_OpBroadcast512MaskedInt16x8(v)
|
||||
case OpBroadcast512MaskedInt32x4:
|
||||
return rewriteValueAMD64_OpBroadcast512MaskedInt32x4(v)
|
||||
case OpBroadcast512MaskedInt64x2:
|
||||
return rewriteValueAMD64_OpBroadcast512MaskedInt64x2(v)
|
||||
case OpBroadcast512MaskedInt8x16:
|
||||
return rewriteValueAMD64_OpBroadcast512MaskedInt8x16(v)
|
||||
case OpBroadcast512MaskedUint16x8:
|
||||
return rewriteValueAMD64_OpBroadcast512MaskedUint16x8(v)
|
||||
case OpBroadcast512MaskedUint32x4:
|
||||
return rewriteValueAMD64_OpBroadcast512MaskedUint32x4(v)
|
||||
case OpBroadcast512MaskedUint64x2:
|
||||
return rewriteValueAMD64_OpBroadcast512MaskedUint64x2(v)
|
||||
case OpBroadcast512MaskedUint8x16:
|
||||
return rewriteValueAMD64_OpBroadcast512MaskedUint8x16(v)
|
||||
case OpBroadcast512Uint16x8:
|
||||
v.Op = OpAMD64VPBROADCASTW512
|
||||
return true
|
||||
case OpBroadcast512Uint32x4:
|
||||
v.Op = OpAMD64VPBROADCASTD512
|
||||
return true
|
||||
case OpBroadcast512Uint64x2:
|
||||
v.Op = OpAMD64VPBROADCASTQ512
|
||||
return true
|
||||
case OpBroadcast512Uint8x16:
|
||||
v.Op = OpAMD64VPBROADCASTB512
|
||||
return true
|
||||
case OpBswap16:
|
||||
return rewriteValueAMD64_OpBswap16(v)
|
||||
case OpBswap32:
|
||||
@@ -4539,6 +4689,12 @@ func rewriteValueAMD64(v *Value) bool {
|
||||
return rewriteValueAMD64_OpSelect1(v)
|
||||
case OpSelectN:
|
||||
return rewriteValueAMD64_OpSelectN(v)
|
||||
case OpSetElemFloat32x4:
|
||||
v.Op = OpAMD64VPINSRD128
|
||||
return true
|
||||
case OpSetElemFloat64x2:
|
||||
v.Op = OpAMD64VPINSRQ128
|
||||
return true
|
||||
case OpSetElemInt16x8:
|
||||
v.Op = OpAMD64VPINSRW128
|
||||
return true
|
||||
@@ -31628,6 +31784,486 @@ func rewriteValueAMD64_OpBitLen8(v *Value) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpBroadcast128MaskedFloat32x4(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (Broadcast128MaskedFloat32x4 x mask)
|
||||
// result: (VBROADCASTSSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VBROADCASTSSMasked128)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpBroadcast128MaskedFloat64x2(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (Broadcast128MaskedFloat64x2 x mask)
|
||||
// result: (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPBROADCASTQMasked128)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpBroadcast128MaskedInt16x8(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (Broadcast128MaskedInt16x8 x mask)
|
||||
// result: (VPBROADCASTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPBROADCASTWMasked128)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpBroadcast128MaskedInt32x4(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (Broadcast128MaskedInt32x4 x mask)
|
||||
// result: (VPBROADCASTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPBROADCASTDMasked128)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpBroadcast128MaskedInt64x2(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (Broadcast128MaskedInt64x2 x mask)
|
||||
// result: (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPBROADCASTQMasked128)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpBroadcast128MaskedInt8x16(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (Broadcast128MaskedInt8x16 x mask)
|
||||
// result: (VPBROADCASTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPBROADCASTBMasked128)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpBroadcast128MaskedUint16x8(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (Broadcast128MaskedUint16x8 x mask)
|
||||
// result: (VPBROADCASTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPBROADCASTWMasked128)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpBroadcast128MaskedUint32x4(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (Broadcast128MaskedUint32x4 x mask)
|
||||
// result: (VPBROADCASTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPBROADCASTDMasked128)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpBroadcast128MaskedUint64x2(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (Broadcast128MaskedUint64x2 x mask)
|
||||
// result: (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPBROADCASTQMasked128)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpBroadcast128MaskedUint8x16(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (Broadcast128MaskedUint8x16 x mask)
|
||||
// result: (VPBROADCASTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPBROADCASTBMasked128)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpBroadcast256MaskedFloat32x4(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (Broadcast256MaskedFloat32x4 x mask)
|
||||
// result: (VBROADCASTSSMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VBROADCASTSSMasked256)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpBroadcast256MaskedFloat64x2(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (Broadcast256MaskedFloat64x2 x mask)
|
||||
// result: (VBROADCASTSDMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VBROADCASTSDMasked256)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpBroadcast256MaskedInt16x8(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (Broadcast256MaskedInt16x8 x mask)
|
||||
// result: (VPBROADCASTWMasked256 x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPBROADCASTWMasked256)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpBroadcast256MaskedInt32x4(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (Broadcast256MaskedInt32x4 x mask)
|
||||
// result: (VPBROADCASTDMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPBROADCASTDMasked256)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpBroadcast256MaskedInt64x2(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (Broadcast256MaskedInt64x2 x mask)
|
||||
// result: (VPBROADCASTQMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPBROADCASTQMasked256)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpBroadcast256MaskedInt8x16(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (Broadcast256MaskedInt8x16 x mask)
|
||||
// result: (VPBROADCASTBMasked256 x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPBROADCASTBMasked256)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpBroadcast256MaskedUint16x8(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (Broadcast256MaskedUint16x8 x mask)
|
||||
// result: (VPBROADCASTWMasked256 x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPBROADCASTWMasked256)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpBroadcast256MaskedUint32x4(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (Broadcast256MaskedUint32x4 x mask)
|
||||
// result: (VPBROADCASTDMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPBROADCASTDMasked256)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpBroadcast256MaskedUint64x2(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (Broadcast256MaskedUint64x2 x mask)
|
||||
// result: (VPBROADCASTQMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPBROADCASTQMasked256)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpBroadcast256MaskedUint8x16(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (Broadcast256MaskedUint8x16 x mask)
|
||||
// result: (VPBROADCASTBMasked256 x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPBROADCASTBMasked256)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpBroadcast512MaskedFloat32x4(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (Broadcast512MaskedFloat32x4 x mask)
|
||||
// result: (VBROADCASTSSMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VBROADCASTSSMasked512)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpBroadcast512MaskedFloat64x2(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (Broadcast512MaskedFloat64x2 x mask)
|
||||
// result: (VBROADCASTSDMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VBROADCASTSDMasked512)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpBroadcast512MaskedInt16x8(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (Broadcast512MaskedInt16x8 x mask)
|
||||
// result: (VPBROADCASTWMasked512 x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPBROADCASTWMasked512)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpBroadcast512MaskedInt32x4(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (Broadcast512MaskedInt32x4 x mask)
|
||||
// result: (VPBROADCASTDMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPBROADCASTDMasked512)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpBroadcast512MaskedInt64x2(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (Broadcast512MaskedInt64x2 x mask)
|
||||
// result: (VPBROADCASTQMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPBROADCASTQMasked512)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpBroadcast512MaskedInt8x16(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (Broadcast512MaskedInt8x16 x mask)
|
||||
// result: (VPBROADCASTBMasked512 x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPBROADCASTBMasked512)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpBroadcast512MaskedUint16x8(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (Broadcast512MaskedUint16x8 x mask)
|
||||
// result: (VPBROADCASTWMasked512 x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPBROADCASTWMasked512)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpBroadcast512MaskedUint32x4(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (Broadcast512MaskedUint32x4 x mask)
|
||||
// result: (VPBROADCASTDMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPBROADCASTDMasked512)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpBroadcast512MaskedUint64x2(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (Broadcast512MaskedUint64x2 x mask)
|
||||
// result: (VPBROADCASTQMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPBROADCASTQMasked512)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpBroadcast512MaskedUint8x16(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
// match: (Broadcast512MaskedUint8x16 x mask)
|
||||
// result: (VPBROADCASTBMasked512 x (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
for {
|
||||
x := v_0
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPBROADCASTBMasked512)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg2(x, v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueAMD64_OpBswap16(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
// match: (Bswap16 x)
|
||||
|
||||
@@ -240,6 +240,66 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
||||
addF(simdPackage, "Uint16x8.AverageMasked", opLen3(ssa.OpAverageMaskedUint16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x16.AverageMasked", opLen3(ssa.OpAverageMaskedUint16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x32.AverageMasked", opLen3(ssa.OpAverageMaskedUint16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.Broadcast128", opLen1(ssa.OpBroadcast128Float32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.Broadcast128", opLen1(ssa.OpBroadcast128Float64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.Broadcast128", opLen1(ssa.OpBroadcast128Int8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.Broadcast128", opLen1(ssa.OpBroadcast128Int16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.Broadcast128", opLen1(ssa.OpBroadcast128Int32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int64x2.Broadcast128", opLen1(ssa.OpBroadcast128Int64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.Broadcast128", opLen1(ssa.OpBroadcast128Uint8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.Broadcast128", opLen1(ssa.OpBroadcast128Uint16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.Broadcast128", opLen1(ssa.OpBroadcast128Uint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x2.Broadcast128", opLen1(ssa.OpBroadcast128Uint64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedFloat32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedFloat64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedInt8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedInt16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int64x2.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedInt64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedUint8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedUint16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedUint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x2.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedUint64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.Broadcast256", opLen1(ssa.OpBroadcast256Float32x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.Broadcast256", opLen1(ssa.OpBroadcast256Float64x2, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.Broadcast256", opLen1(ssa.OpBroadcast256Int8x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.Broadcast256", opLen1(ssa.OpBroadcast256Int16x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.Broadcast256", opLen1(ssa.OpBroadcast256Int32x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int64x2.Broadcast256", opLen1(ssa.OpBroadcast256Int64x2, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.Broadcast256", opLen1(ssa.OpBroadcast256Uint8x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.Broadcast256", opLen1(ssa.OpBroadcast256Uint16x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.Broadcast256", opLen1(ssa.OpBroadcast256Uint32x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x2.Broadcast256", opLen1(ssa.OpBroadcast256Uint64x2, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedFloat32x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedFloat64x2, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedInt8x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedInt16x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedInt32x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int64x2.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedInt64x2, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedUint8x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedUint16x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedUint32x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x2.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedUint64x2, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.Broadcast512", opLen1(ssa.OpBroadcast512Float32x4, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.Broadcast512", opLen1(ssa.OpBroadcast512Float64x2, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.Broadcast512", opLen1(ssa.OpBroadcast512Int8x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.Broadcast512", opLen1(ssa.OpBroadcast512Int16x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.Broadcast512", opLen1(ssa.OpBroadcast512Int32x4, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int64x2.Broadcast512", opLen1(ssa.OpBroadcast512Int64x2, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.Broadcast512", opLen1(ssa.OpBroadcast512Uint8x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.Broadcast512", opLen1(ssa.OpBroadcast512Uint16x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.Broadcast512", opLen1(ssa.OpBroadcast512Uint32x4, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x2.Broadcast512", opLen1(ssa.OpBroadcast512Uint64x2, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedFloat32x4, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedFloat64x2, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedInt8x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedInt16x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedInt32x4, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int64x2.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedInt64x2, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedUint8x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedUint16x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedUint32x4, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x2.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedUint64x2, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.Ceil", opLen1(ssa.OpCeilFloat32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.Ceil", opLen1(ssa.OpCeilFloat32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.Ceil", opLen1(ssa.OpCeilFloat64x2, types.TypeVec128), sys.AMD64)
|
||||
@@ -1408,6 +1468,8 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
||||
addF(simdPackage, "Float64x2.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.SetElem", opLen2Imm8(ssa.OpSetElemFloat32x4, types.TypeVec128, 0), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.SetElem", opLen2Imm8(ssa.OpSetElemFloat64x2, types.TypeVec128, 0), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.SetElem", opLen2Imm8(ssa.OpSetElemInt8x16, types.TypeVec128, 0), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.SetElem", opLen2Imm8(ssa.OpSetElemInt16x8, types.TypeVec128, 0), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.SetElem", opLen2Imm8(ssa.OpSetElemInt32x4, types.TypeVec128, 0), sys.AMD64)
|
||||
|
||||
@@ -87,6 +87,23 @@ var ternaryFlaky = &shapes{ // for tests that support flaky equality
|
||||
floats: []int{32},
|
||||
}
|
||||
|
||||
type templateData struct {
|
||||
Vec string // the type of the vector, e.g. Float32x4
|
||||
AOrAn string // for documentation, the article "a" or "an"
|
||||
Width int // the bit width of the element type, e.g. 32
|
||||
Vwidth int // the width of the vector type, e.g. 128
|
||||
Count int // the number of elements, e.g. 4
|
||||
WxC string // the width-by-type string, e.g., "32x4"
|
||||
BxC string // as if bytes, in the proper count, e.g., "8x16" (W==8)
|
||||
Base string // the capitalized Base Type of the vector, e.g., "Float"
|
||||
Type string // the element type, e.g. "float32"
|
||||
OxFF string // a mask for the lowest 'count' bits
|
||||
}
|
||||
|
||||
func (t templateData) As128BitVec() string {
|
||||
return fmt.Sprintf("%s%dx%d", t.Base, t.Width, 128/t.Width)
|
||||
}
|
||||
|
||||
func oneTemplate(t *template.Template, baseType string, width, count int, out io.Writer) {
|
||||
b := width * count
|
||||
if b < 128 || b > 512 {
|
||||
@@ -102,26 +119,17 @@ func oneTemplate(t *template.Template, baseType string, width, count int, out io
|
||||
aOrAn = "an"
|
||||
}
|
||||
oxFF := fmt.Sprintf("0x%x", uint64((1<<count)-1))
|
||||
t.Execute(out, struct {
|
||||
Vec string // the type of the vector, e.g. Float32x4
|
||||
AOrAn string // for documentation, the article "a" or "an"
|
||||
Width int // the bit width of the element type, e.g. 32
|
||||
Count int // the number of elements, e.g. 4
|
||||
WxC string // the width-by-type string, e.g., "32x4"
|
||||
BxC string // as if bytes, in the proper count, e.g., "8x16" (W==8)
|
||||
Base string // the capitalized Base Type of the vector, e.g., "Float"
|
||||
Type string // the element type, e.g. "float32"
|
||||
OxFF string // a mask for the lowest 'count' bits
|
||||
}{
|
||||
Vec: vType,
|
||||
AOrAn: aOrAn,
|
||||
Width: width,
|
||||
Count: count,
|
||||
WxC: wxc,
|
||||
BxC: bxc,
|
||||
Base: BaseType,
|
||||
Type: eType,
|
||||
OxFF: oxFF,
|
||||
t.Execute(out, templateData{
|
||||
Vec: vType,
|
||||
AOrAn: aOrAn,
|
||||
Width: width,
|
||||
Vwidth: b,
|
||||
Count: count,
|
||||
WxC: wxc,
|
||||
BxC: bxc,
|
||||
Base: BaseType,
|
||||
Type: eType,
|
||||
OxFF: oxFF,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -480,7 +488,7 @@ func (x {{.Vec}}) StoreSlicePart(s []{{.Type}}) {
|
||||
|
||||
var unsafePATemplate = templateOf("unsafe PA helper", `
|
||||
// pa{{.Vec}} returns a type-unsafe pointer to array that can
|
||||
// only be used with partial load/store operations that only
|
||||
// only be used with partial load/store operations that only
|
||||
// access the known-safe portions of the array.
|
||||
func pa{{.Vec}}(s []{{.Type}}) *[{{.Count}}]{{.Type}} {
|
||||
return (*[{{.Count}}]{{.Type}})(unsafe.Pointer(&s[0]))
|
||||
@@ -500,7 +508,7 @@ func (x {{.Vec}}) Masked(mask Mask{{.WxC}}) {{.Vec}} {
|
||||
|
||||
// Merge returns x but with elements set to y where mask is false.
|
||||
func (x {{.Vec}}) Merge(y {{.Vec}}, mask Mask{{.WxC}}) {{.Vec}} {
|
||||
{{- if eq .BxC .WxC }}
|
||||
{{- if eq .BxC .WxC -}}
|
||||
im := mask.AsInt{{.BxC}}()
|
||||
{{- else}}
|
||||
im := mask.AsInt{{.WxC}}().AsInt{{.BxC}}()
|
||||
@@ -539,6 +547,32 @@ func (x {{.Vec}}) Merge(y {{.Vec}}, mask Mask{{.WxC}}) {{.Vec}} {
|
||||
}
|
||||
`)
|
||||
|
||||
func (t templateData) CPUfeatureBC() string {
|
||||
switch t.Vwidth {
|
||||
case 128:
|
||||
return "AVX2"
|
||||
case 256:
|
||||
return "AVX2"
|
||||
case 512:
|
||||
if t.Width <= 16 {
|
||||
return "AVX512BW"
|
||||
}
|
||||
return "AVX512F"
|
||||
}
|
||||
panic(fmt.Errorf("unexpected vector width %d", t.Vwidth))
|
||||
}
|
||||
|
||||
var broadcastTemplate = templateOf("Broadcast functions", `
|
||||
// Broadcast{{.Vec}} returns a vector with the input
|
||||
// x assigned to all elements of the output.
|
||||
//
|
||||
// Emulated, CPU Feature {{.CPUfeatureBC}}
|
||||
func Broadcast{{.Vec}}(x {{.Type}}) {{.Vec}} {
|
||||
var z {{.As128BitVec }}
|
||||
return z.SetElem(0, x).Broadcast{{.Vwidth}}()
|
||||
}
|
||||
`)
|
||||
|
||||
func main() {
|
||||
sl := flag.String("sl", "slice_amd64.go", "file name for slice operations")
|
||||
ush := flag.String("ush", "unsafe_helpers.go", "file name for unsafe helpers")
|
||||
@@ -557,6 +591,7 @@ func main() {
|
||||
avx2SmallLoadSlicePartTemplate,
|
||||
avx2MaskedTemplate,
|
||||
avx512MaskedTemplate,
|
||||
broadcastTemplate,
|
||||
)
|
||||
}
|
||||
if *ush != "" {
|
||||
|
||||
@@ -1386,6 +1386,438 @@ func (x Uint16x16) AverageMasked(y Uint16x16, mask Mask16x16) Uint16x16
|
||||
// Asm: VPAVGW, CPU Feature: AVX512
|
||||
func (x Uint16x32) AverageMasked(y Uint16x32, mask Mask16x32) Uint16x32
|
||||
|
||||
/* Broadcast128 */
|
||||
|
||||
// Broadcast128 copies element zero of its (128-bit) input to all elements of
|
||||
// the 128-bit output vector.
|
||||
//
|
||||
// Asm: VBROADCASTSS, CPU Feature: AVX2
|
||||
func (x Float32x4) Broadcast128() Float32x4
|
||||
|
||||
// Broadcast128 copies element zero of its (128-bit) input to all elements of
|
||||
// the 128-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTQ, CPU Feature: AVX2
|
||||
func (x Float64x2) Broadcast128() Float64x2
|
||||
|
||||
// Broadcast128 copies element zero of its (128-bit) input to all elements of
|
||||
// the 128-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTB, CPU Feature: AVX2
|
||||
func (x Int8x16) Broadcast128() Int8x16
|
||||
|
||||
// Broadcast128 copies element zero of its (128-bit) input to all elements of
|
||||
// the 128-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTW, CPU Feature: AVX2
|
||||
func (x Int16x8) Broadcast128() Int16x8
|
||||
|
||||
// Broadcast128 copies element zero of its (128-bit) input to all elements of
|
||||
// the 128-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTD, CPU Feature: AVX2
|
||||
func (x Int32x4) Broadcast128() Int32x4
|
||||
|
||||
// Broadcast128 copies element zero of its (128-bit) input to all elements of
|
||||
// the 128-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTQ, CPU Feature: AVX2
|
||||
func (x Int64x2) Broadcast128() Int64x2
|
||||
|
||||
// Broadcast128 copies element zero of its (128-bit) input to all elements of
|
||||
// the 128-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTB, CPU Feature: AVX2
|
||||
func (x Uint8x16) Broadcast128() Uint8x16
|
||||
|
||||
// Broadcast128 copies element zero of its (128-bit) input to all elements of
|
||||
// the 128-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTW, CPU Feature: AVX2
|
||||
func (x Uint16x8) Broadcast128() Uint16x8
|
||||
|
||||
// Broadcast128 copies element zero of its (128-bit) input to all elements of
|
||||
// the 128-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTD, CPU Feature: AVX2
|
||||
func (x Uint32x4) Broadcast128() Uint32x4
|
||||
|
||||
// Broadcast128 copies element zero of its (128-bit) input to all elements of
|
||||
// the 128-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTQ, CPU Feature: AVX2
|
||||
func (x Uint64x2) Broadcast128() Uint64x2
|
||||
|
||||
/* Broadcast128Masked */
|
||||
|
||||
// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
|
||||
// the 128-bit output vector.
|
||||
//
|
||||
// This operation is applied selectively under a write mask.
|
||||
//
|
||||
// Asm: VBROADCASTSS, CPU Feature: AVX512
|
||||
func (x Float32x4) Broadcast128Masked(mask Mask32x4) Float32x4
|
||||
|
||||
// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
|
||||
// the 128-bit output vector.
|
||||
//
|
||||
// This operation is applied selectively under a write mask.
|
||||
//
|
||||
// Asm: VPBROADCASTQ, CPU Feature: AVX512
|
||||
func (x Float64x2) Broadcast128Masked(mask Mask64x2) Float64x2
|
||||
|
||||
// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
|
||||
// the 128-bit output vector.
|
||||
//
|
||||
// This operation is applied selectively under a write mask.
|
||||
//
|
||||
// Asm: VPBROADCASTB, CPU Feature: AVX512
|
||||
func (x Int8x16) Broadcast128Masked(mask Mask8x16) Int8x16
|
||||
|
||||
// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
|
||||
// the 128-bit output vector.
|
||||
//
|
||||
// This operation is applied selectively under a write mask.
|
||||
//
|
||||
// Asm: VPBROADCASTW, CPU Feature: AVX512
|
||||
func (x Int16x8) Broadcast128Masked(mask Mask16x8) Int16x8
|
||||
|
||||
// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
|
||||
// the 128-bit output vector.
|
||||
//
|
||||
// This operation is applied selectively under a write mask.
|
||||
//
|
||||
// Asm: VPBROADCASTD, CPU Feature: AVX512
|
||||
func (x Int32x4) Broadcast128Masked(mask Mask32x4) Int32x4
|
||||
|
||||
// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
|
||||
// the 128-bit output vector.
|
||||
//
|
||||
// This operation is applied selectively under a write mask.
|
||||
//
|
||||
// Asm: VPBROADCASTQ, CPU Feature: AVX512
|
||||
func (x Int64x2) Broadcast128Masked(mask Mask64x2) Int64x2
|
||||
|
||||
// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
|
||||
// the 128-bit output vector.
|
||||
//
|
||||
// This operation is applied selectively under a write mask.
|
||||
//
|
||||
// Asm: VPBROADCASTB, CPU Feature: AVX512
|
||||
func (x Uint8x16) Broadcast128Masked(mask Mask8x16) Uint8x16
|
||||
|
||||
// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
|
||||
// the 128-bit output vector.
|
||||
//
|
||||
// This operation is applied selectively under a write mask.
|
||||
//
|
||||
// Asm: VPBROADCASTW, CPU Feature: AVX512
|
||||
func (x Uint16x8) Broadcast128Masked(mask Mask16x8) Uint16x8
|
||||
|
||||
// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
|
||||
// the 128-bit output vector.
|
||||
//
|
||||
// This operation is applied selectively under a write mask.
|
||||
//
|
||||
// Asm: VPBROADCASTD, CPU Feature: AVX512
|
||||
func (x Uint32x4) Broadcast128Masked(mask Mask32x4) Uint32x4
|
||||
|
||||
// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
|
||||
// the 128-bit output vector.
|
||||
//
|
||||
// This operation is applied selectively under a write mask.
|
||||
//
|
||||
// Asm: VPBROADCASTQ, CPU Feature: AVX512
|
||||
func (x Uint64x2) Broadcast128Masked(mask Mask64x2) Uint64x2
|
||||
|
||||
/* Broadcast256 */
|
||||
|
||||
// Broadcast256 copies element zero of its (128-bit) input to all elements of
|
||||
// the 256-bit output vector.
|
||||
//
|
||||
// Asm: VBROADCASTSS, CPU Feature: AVX2
|
||||
func (x Float32x4) Broadcast256() Float32x8
|
||||
|
||||
// Broadcast256 copies element zero of its (128-bit) input to all elements of
|
||||
// the 256-bit output vector.
|
||||
//
|
||||
// Asm: VBROADCASTSD, CPU Feature: AVX2
|
||||
func (x Float64x2) Broadcast256() Float64x4
|
||||
|
||||
// Broadcast256 copies element zero of its (128-bit) input to all elements of
|
||||
// the 256-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTB, CPU Feature: AVX2
|
||||
func (x Int8x16) Broadcast256() Int8x32
|
||||
|
||||
// Broadcast256 copies element zero of its (128-bit) input to all elements of
|
||||
// the 256-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTW, CPU Feature: AVX2
|
||||
func (x Int16x8) Broadcast256() Int16x16
|
||||
|
||||
// Broadcast256 copies element zero of its (128-bit) input to all elements of
|
||||
// the 256-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTD, CPU Feature: AVX2
|
||||
func (x Int32x4) Broadcast256() Int32x8
|
||||
|
||||
// Broadcast256 copies element zero of its (128-bit) input to all elements of
|
||||
// the 256-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTQ, CPU Feature: AVX2
|
||||
func (x Int64x2) Broadcast256() Int64x4
|
||||
|
||||
// Broadcast256 copies element zero of its (128-bit) input to all elements of
|
||||
// the 256-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTB, CPU Feature: AVX2
|
||||
func (x Uint8x16) Broadcast256() Uint8x32
|
||||
|
||||
// Broadcast256 copies element zero of its (128-bit) input to all elements of
|
||||
// the 256-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTW, CPU Feature: AVX2
|
||||
func (x Uint16x8) Broadcast256() Uint16x16
|
||||
|
||||
// Broadcast256 copies element zero of its (128-bit) input to all elements of
|
||||
// the 256-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTD, CPU Feature: AVX2
|
||||
func (x Uint32x4) Broadcast256() Uint32x8
|
||||
|
||||
// Broadcast256 copies element zero of its (128-bit) input to all elements of
|
||||
// the 256-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTQ, CPU Feature: AVX2
|
||||
func (x Uint64x2) Broadcast256() Uint64x4
|
||||
|
||||
/* Broadcast256Masked */
|
||||
|
||||
// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
|
||||
// the 256-bit output vector.
|
||||
//
|
||||
// This operation is applied selectively under a write mask.
|
||||
//
|
||||
// Asm: VBROADCASTSS, CPU Feature: AVX512
|
||||
func (x Float32x4) Broadcast256Masked(mask Mask32x4) Float32x8
|
||||
|
||||
// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
|
||||
// the 256-bit output vector.
|
||||
//
|
||||
// This operation is applied selectively under a write mask.
|
||||
//
|
||||
// Asm: VBROADCASTSD, CPU Feature: AVX512
|
||||
func (x Float64x2) Broadcast256Masked(mask Mask64x2) Float64x4
|
||||
|
||||
// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
|
||||
// the 256-bit output vector.
|
||||
//
|
||||
// This operation is applied selectively under a write mask.
|
||||
//
|
||||
// Asm: VPBROADCASTB, CPU Feature: AVX512
|
||||
func (x Int8x16) Broadcast256Masked(mask Mask8x16) Int8x32
|
||||
|
||||
// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
|
||||
// the 256-bit output vector.
|
||||
//
|
||||
// This operation is applied selectively under a write mask.
|
||||
//
|
||||
// Asm: VPBROADCASTW, CPU Feature: AVX512
|
||||
func (x Int16x8) Broadcast256Masked(mask Mask16x8) Int16x16
|
||||
|
||||
// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
|
||||
// the 256-bit output vector.
|
||||
//
|
||||
// This operation is applied selectively under a write mask.
|
||||
//
|
||||
// Asm: VPBROADCASTD, CPU Feature: AVX512
|
||||
func (x Int32x4) Broadcast256Masked(mask Mask32x4) Int32x8
|
||||
|
||||
// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
|
||||
// the 256-bit output vector.
|
||||
//
|
||||
// This operation is applied selectively under a write mask.
|
||||
//
|
||||
// Asm: VPBROADCASTQ, CPU Feature: AVX512
|
||||
func (x Int64x2) Broadcast256Masked(mask Mask64x2) Int64x4
|
||||
|
||||
// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
|
||||
// the 256-bit output vector.
|
||||
//
|
||||
// This operation is applied selectively under a write mask.
|
||||
//
|
||||
// Asm: VPBROADCASTB, CPU Feature: AVX512
|
||||
func (x Uint8x16) Broadcast256Masked(mask Mask8x16) Uint8x32
|
||||
|
||||
// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
|
||||
// the 256-bit output vector.
|
||||
//
|
||||
// This operation is applied selectively under a write mask.
|
||||
//
|
||||
// Asm: VPBROADCASTW, CPU Feature: AVX512
|
||||
func (x Uint16x8) Broadcast256Masked(mask Mask16x8) Uint16x16
|
||||
|
||||
// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
|
||||
// the 256-bit output vector.
|
||||
//
|
||||
// This operation is applied selectively under a write mask.
|
||||
//
|
||||
// Asm: VPBROADCASTD, CPU Feature: AVX512
|
||||
func (x Uint32x4) Broadcast256Masked(mask Mask32x4) Uint32x8
|
||||
|
||||
// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
|
||||
// the 256-bit output vector.
|
||||
//
|
||||
// This operation is applied selectively under a write mask.
|
||||
//
|
||||
// Asm: VPBROADCASTQ, CPU Feature: AVX512
|
||||
func (x Uint64x2) Broadcast256Masked(mask Mask64x2) Uint64x4
|
||||
|
||||
/* Broadcast512 */
|
||||
|
||||
// Broadcast512 copies element zero of its (128-bit) input to all elements of
|
||||
// the 512-bit output vector.
|
||||
//
|
||||
// Asm: VBROADCASTSS, CPU Feature: AVX512
|
||||
func (x Float32x4) Broadcast512() Float32x16
|
||||
|
||||
// Broadcast512 copies element zero of its (128-bit) input to all elements of
|
||||
// the 512-bit output vector.
|
||||
//
|
||||
// Asm: VBROADCASTSD, CPU Feature: AVX512
|
||||
func (x Float64x2) Broadcast512() Float64x8
|
||||
|
||||
// Broadcast512 copies element zero of its (128-bit) input to all elements of
|
||||
// the 512-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTB, CPU Feature: AVX512
|
||||
func (x Int8x16) Broadcast512() Int8x64
|
||||
|
||||
// Broadcast512 copies element zero of its (128-bit) input to all elements of
|
||||
// the 512-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTW, CPU Feature: AVX512
|
||||
func (x Int16x8) Broadcast512() Int16x32
|
||||
|
||||
// Broadcast512 copies element zero of its (128-bit) input to all elements of
|
||||
// the 512-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTD, CPU Feature: AVX512
|
||||
func (x Int32x4) Broadcast512() Int32x16
|
||||
|
||||
// Broadcast512 copies element zero of its (128-bit) input to all elements of
|
||||
// the 512-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTQ, CPU Feature: AVX512
|
||||
func (x Int64x2) Broadcast512() Int64x8
|
||||
|
||||
// Broadcast512 copies element zero of its (128-bit) input to all elements of
|
||||
// the 512-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTB, CPU Feature: AVX512
|
||||
func (x Uint8x16) Broadcast512() Uint8x64
|
||||
|
||||
// Broadcast512 copies element zero of its (128-bit) input to all elements of
|
||||
// the 512-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTW, CPU Feature: AVX512
|
||||
func (x Uint16x8) Broadcast512() Uint16x32
|
||||
|
||||
// Broadcast512 copies element zero of its (128-bit) input to all elements of
|
||||
// the 512-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTD, CPU Feature: AVX512
|
||||
func (x Uint32x4) Broadcast512() Uint32x16
|
||||
|
||||
// Broadcast512 copies element zero of its (128-bit) input to all elements of
|
||||
// the 512-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTQ, CPU Feature: AVX512
|
||||
func (x Uint64x2) Broadcast512() Uint64x8
|
||||
|
||||
/* Broadcast512Masked */
|
||||
|
||||
// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
|
||||
// the 512-bit output vector.
|
||||
//
|
||||
// This operation is applied selectively under a write mask.
|
||||
//
|
||||
// Asm: VBROADCASTSS, CPU Feature: AVX512
|
||||
func (x Float32x4) Broadcast512Masked(mask Mask32x4) Float32x16
|
||||
|
||||
// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
|
||||
// the 512-bit output vector.
|
||||
//
|
||||
// This operation is applied selectively under a write mask.
|
||||
//
|
||||
// Asm: VBROADCASTSD, CPU Feature: AVX512
|
||||
func (x Float64x2) Broadcast512Masked(mask Mask64x2) Float64x8
|
||||
|
||||
// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
|
||||
// the 512-bit output vector.
|
||||
//
|
||||
// This operation is applied selectively under a write mask.
|
||||
//
|
||||
// Asm: VPBROADCASTB, CPU Feature: AVX512
|
||||
func (x Int8x16) Broadcast512Masked(mask Mask8x16) Int8x64
|
||||
|
||||
// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
|
||||
// the 512-bit output vector.
|
||||
//
|
||||
// This operation is applied selectively under a write mask.
|
||||
//
|
||||
// Asm: VPBROADCASTW, CPU Feature: AVX512
|
||||
func (x Int16x8) Broadcast512Masked(mask Mask16x8) Int16x32
|
||||
|
||||
// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
|
||||
// the 512-bit output vector.
|
||||
//
|
||||
// This operation is applied selectively under a write mask.
|
||||
//
|
||||
// Asm: VPBROADCASTD, CPU Feature: AVX512
|
||||
func (x Int32x4) Broadcast512Masked(mask Mask32x4) Int32x16
|
||||
|
||||
// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
|
||||
// the 512-bit output vector.
|
||||
//
|
||||
// This operation is applied selectively under a write mask.
|
||||
//
|
||||
// Asm: VPBROADCASTQ, CPU Feature: AVX512
|
||||
func (x Int64x2) Broadcast512Masked(mask Mask64x2) Int64x8
|
||||
|
||||
// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
|
||||
// the 512-bit output vector.
|
||||
//
|
||||
// This operation is applied selectively under a write mask.
|
||||
//
|
||||
// Asm: VPBROADCASTB, CPU Feature: AVX512
|
||||
func (x Uint8x16) Broadcast512Masked(mask Mask8x16) Uint8x64
|
||||
|
||||
// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
|
||||
// the 512-bit output vector.
|
||||
//
|
||||
// This operation is applied selectively under a write mask.
|
||||
//
|
||||
// Asm: VPBROADCASTW, CPU Feature: AVX512
|
||||
func (x Uint16x8) Broadcast512Masked(mask Mask16x8) Uint16x32
|
||||
|
||||
// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
|
||||
// the 512-bit output vector.
|
||||
//
|
||||
// This operation is applied selectively under a write mask.
|
||||
//
|
||||
// Asm: VPBROADCASTD, CPU Feature: AVX512
|
||||
func (x Uint32x4) Broadcast512Masked(mask Mask32x4) Uint32x16
|
||||
|
||||
// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
|
||||
// the 512-bit output vector.
|
||||
//
|
||||
// This operation is applied selectively under a write mask.
|
||||
//
|
||||
// Asm: VPBROADCASTQ, CPU Feature: AVX512
|
||||
func (x Uint64x2) Broadcast512Masked(mask Mask64x2) Uint64x8
|
||||
|
||||
/* Ceil */
|
||||
|
||||
// Ceil rounds elements up to the nearest integer.
|
||||
@@ -9116,6 +9548,20 @@ func (x Float64x8) ScaleMasked(y Float64x8, mask Mask64x8) Float64x8
|
||||
|
||||
/* SetElem */
|
||||
|
||||
// SetElem sets a single constant-indexed element's value.
|
||||
//
|
||||
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||
//
|
||||
// Asm: VPINSRD, CPU Feature: AVX
|
||||
func (x Float32x4) SetElem(index uint8, y float32) Float32x4
|
||||
|
||||
// SetElem sets a single constant-indexed element's value.
|
||||
//
|
||||
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||
//
|
||||
// Asm: VPINSRQ, CPU Feature: AVX
|
||||
func (x Float64x2) SetElem(index uint8, y float64) Float64x2
|
||||
|
||||
// SetElem sets a single constant-indexed element's value.
|
||||
//
|
||||
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||
|
||||
@@ -412,3 +412,15 @@ func TestRotateAllVariable(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestBroadcastUint32x4(t *testing.T) {
|
||||
s := make([]uint32, 4, 4)
|
||||
simd.BroadcastUint32x4(123456789).StoreSlice(s)
|
||||
checkSlices(t, s, []uint32{123456789, 123456789, 123456789, 123456789})
|
||||
}
|
||||
|
||||
func TestBroadcastFloat32x8(t *testing.T) {
|
||||
s := make([]float32, 8, 8)
|
||||
simd.BroadcastFloat32x8(123456789).StoreSlice(s)
|
||||
checkSlices(t, s, []float32{123456789, 123456789, 123456789, 123456789, 123456789, 123456789, 123456789, 123456789})
|
||||
}
|
||||
|
||||
@@ -1499,3 +1499,273 @@ func (x Float64x8) Merge(y Float64x8, mask Mask64x8) Float64x8 {
|
||||
iy := y.AsInt64x8()
|
||||
return iy.blendMasked(ix, mask).AsFloat64x8()
|
||||
}
|
||||
|
||||
// BroadcastInt8x16 returns a vector with the input
|
||||
// x assigned to all elements of the output.
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func BroadcastInt8x16(x int8) Int8x16 {
|
||||
var z Int8x16
|
||||
return z.SetElem(0, x).Broadcast128()
|
||||
}
|
||||
|
||||
// BroadcastInt16x8 returns a vector with the input
|
||||
// x assigned to all elements of the output.
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func BroadcastInt16x8(x int16) Int16x8 {
|
||||
var z Int16x8
|
||||
return z.SetElem(0, x).Broadcast128()
|
||||
}
|
||||
|
||||
// BroadcastInt32x4 returns a vector with the input
|
||||
// x assigned to all elements of the output.
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func BroadcastInt32x4(x int32) Int32x4 {
|
||||
var z Int32x4
|
||||
return z.SetElem(0, x).Broadcast128()
|
||||
}
|
||||
|
||||
// BroadcastInt64x2 returns a vector with the input
|
||||
// x assigned to all elements of the output.
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func BroadcastInt64x2(x int64) Int64x2 {
|
||||
var z Int64x2
|
||||
return z.SetElem(0, x).Broadcast128()
|
||||
}
|
||||
|
||||
// BroadcastUint8x16 returns a vector with the input
|
||||
// x assigned to all elements of the output.
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func BroadcastUint8x16(x uint8) Uint8x16 {
|
||||
var z Uint8x16
|
||||
return z.SetElem(0, x).Broadcast128()
|
||||
}
|
||||
|
||||
// BroadcastUint16x8 returns a vector with the input
|
||||
// x assigned to all elements of the output.
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func BroadcastUint16x8(x uint16) Uint16x8 {
|
||||
var z Uint16x8
|
||||
return z.SetElem(0, x).Broadcast128()
|
||||
}
|
||||
|
||||
// BroadcastUint32x4 returns a vector with the input
|
||||
// x assigned to all elements of the output.
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func BroadcastUint32x4(x uint32) Uint32x4 {
|
||||
var z Uint32x4
|
||||
return z.SetElem(0, x).Broadcast128()
|
||||
}
|
||||
|
||||
// BroadcastUint64x2 returns a vector with the input
|
||||
// x assigned to all elements of the output.
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func BroadcastUint64x2(x uint64) Uint64x2 {
|
||||
var z Uint64x2
|
||||
return z.SetElem(0, x).Broadcast128()
|
||||
}
|
||||
|
||||
// BroadcastFloat32x4 returns a vector with the input
|
||||
// x assigned to all elements of the output.
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func BroadcastFloat32x4(x float32) Float32x4 {
|
||||
var z Float32x4
|
||||
return z.SetElem(0, x).Broadcast128()
|
||||
}
|
||||
|
||||
// BroadcastFloat64x2 returns a vector with the input
|
||||
// x assigned to all elements of the output.
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func BroadcastFloat64x2(x float64) Float64x2 {
|
||||
var z Float64x2
|
||||
return z.SetElem(0, x).Broadcast128()
|
||||
}
|
||||
|
||||
// BroadcastInt8x32 returns a vector with the input
|
||||
// x assigned to all elements of the output.
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func BroadcastInt8x32(x int8) Int8x32 {
|
||||
var z Int8x16
|
||||
return z.SetElem(0, x).Broadcast256()
|
||||
}
|
||||
|
||||
// BroadcastInt16x16 returns a vector with the input
|
||||
// x assigned to all elements of the output.
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func BroadcastInt16x16(x int16) Int16x16 {
|
||||
var z Int16x8
|
||||
return z.SetElem(0, x).Broadcast256()
|
||||
}
|
||||
|
||||
// BroadcastInt32x8 returns a vector with the input
|
||||
// x assigned to all elements of the output.
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func BroadcastInt32x8(x int32) Int32x8 {
|
||||
var z Int32x4
|
||||
return z.SetElem(0, x).Broadcast256()
|
||||
}
|
||||
|
||||
// BroadcastInt64x4 returns a vector with the input
|
||||
// x assigned to all elements of the output.
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func BroadcastInt64x4(x int64) Int64x4 {
|
||||
var z Int64x2
|
||||
return z.SetElem(0, x).Broadcast256()
|
||||
}
|
||||
|
||||
// BroadcastUint8x32 returns a vector with the input
|
||||
// x assigned to all elements of the output.
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func BroadcastUint8x32(x uint8) Uint8x32 {
|
||||
var z Uint8x16
|
||||
return z.SetElem(0, x).Broadcast256()
|
||||
}
|
||||
|
||||
// BroadcastUint16x16 returns a vector with the input
|
||||
// x assigned to all elements of the output.
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func BroadcastUint16x16(x uint16) Uint16x16 {
|
||||
var z Uint16x8
|
||||
return z.SetElem(0, x).Broadcast256()
|
||||
}
|
||||
|
||||
// BroadcastUint32x8 returns a vector with the input
|
||||
// x assigned to all elements of the output.
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func BroadcastUint32x8(x uint32) Uint32x8 {
|
||||
var z Uint32x4
|
||||
return z.SetElem(0, x).Broadcast256()
|
||||
}
|
||||
|
||||
// BroadcastUint64x4 returns a vector with the input
|
||||
// x assigned to all elements of the output.
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func BroadcastUint64x4(x uint64) Uint64x4 {
|
||||
var z Uint64x2
|
||||
return z.SetElem(0, x).Broadcast256()
|
||||
}
|
||||
|
||||
// BroadcastFloat32x8 returns a vector with the input
|
||||
// x assigned to all elements of the output.
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func BroadcastFloat32x8(x float32) Float32x8 {
|
||||
var z Float32x4
|
||||
return z.SetElem(0, x).Broadcast256()
|
||||
}
|
||||
|
||||
// BroadcastFloat64x4 returns a vector with the input
|
||||
// x assigned to all elements of the output.
|
||||
//
|
||||
// Emulated, CPU Feature AVX2
|
||||
func BroadcastFloat64x4(x float64) Float64x4 {
|
||||
var z Float64x2
|
||||
return z.SetElem(0, x).Broadcast256()
|
||||
}
|
||||
|
||||
// BroadcastInt8x64 returns a vector with the input
|
||||
// x assigned to all elements of the output.
|
||||
//
|
||||
// Emulated, CPU Feature AVX512BW
|
||||
func BroadcastInt8x64(x int8) Int8x64 {
|
||||
var z Int8x16
|
||||
return z.SetElem(0, x).Broadcast512()
|
||||
}
|
||||
|
||||
// BroadcastInt16x32 returns a vector with the input
|
||||
// x assigned to all elements of the output.
|
||||
//
|
||||
// Emulated, CPU Feature AVX512BW
|
||||
func BroadcastInt16x32(x int16) Int16x32 {
|
||||
var z Int16x8
|
||||
return z.SetElem(0, x).Broadcast512()
|
||||
}
|
||||
|
||||
// BroadcastInt32x16 returns a vector with the input
|
||||
// x assigned to all elements of the output.
|
||||
//
|
||||
// Emulated, CPU Feature AVX512F
|
||||
func BroadcastInt32x16(x int32) Int32x16 {
|
||||
var z Int32x4
|
||||
return z.SetElem(0, x).Broadcast512()
|
||||
}
|
||||
|
||||
// BroadcastInt64x8 returns a vector with the input
|
||||
// x assigned to all elements of the output.
|
||||
//
|
||||
// Emulated, CPU Feature AVX512F
|
||||
func BroadcastInt64x8(x int64) Int64x8 {
|
||||
var z Int64x2
|
||||
return z.SetElem(0, x).Broadcast512()
|
||||
}
|
||||
|
||||
// BroadcastUint8x64 returns a vector with the input
|
||||
// x assigned to all elements of the output.
|
||||
//
|
||||
// Emulated, CPU Feature AVX512BW
|
||||
func BroadcastUint8x64(x uint8) Uint8x64 {
|
||||
var z Uint8x16
|
||||
return z.SetElem(0, x).Broadcast512()
|
||||
}
|
||||
|
||||
// BroadcastUint16x32 returns a vector with the input
|
||||
// x assigned to all elements of the output.
|
||||
//
|
||||
// Emulated, CPU Feature AVX512BW
|
||||
func BroadcastUint16x32(x uint16) Uint16x32 {
|
||||
var z Uint16x8
|
||||
return z.SetElem(0, x).Broadcast512()
|
||||
}
|
||||
|
||||
// BroadcastUint32x16 returns a vector with the input
|
||||
// x assigned to all elements of the output.
|
||||
//
|
||||
// Emulated, CPU Feature AVX512F
|
||||
func BroadcastUint32x16(x uint32) Uint32x16 {
|
||||
var z Uint32x4
|
||||
return z.SetElem(0, x).Broadcast512()
|
||||
}
|
||||
|
||||
// BroadcastUint64x8 returns a vector with the input
|
||||
// x assigned to all elements of the output.
|
||||
//
|
||||
// Emulated, CPU Feature AVX512F
|
||||
func BroadcastUint64x8(x uint64) Uint64x8 {
|
||||
var z Uint64x2
|
||||
return z.SetElem(0, x).Broadcast512()
|
||||
}
|
||||
|
||||
// BroadcastFloat32x16 returns a vector with the input
|
||||
// x assigned to all elements of the output.
|
||||
//
|
||||
// Emulated, CPU Feature AVX512F
|
||||
func BroadcastFloat32x16(x float32) Float32x16 {
|
||||
var z Float32x4
|
||||
return z.SetElem(0, x).Broadcast512()
|
||||
}
|
||||
|
||||
// BroadcastFloat64x8 returns a vector with the input
|
||||
// x assigned to all elements of the output.
|
||||
//
|
||||
// Emulated, CPU Feature AVX512F
|
||||
func BroadcastFloat64x8(x float64) Float64x8 {
|
||||
var z Float64x2
|
||||
return z.SetElem(0, x).Broadcast512()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user