[dev.simd] simd, cmd/compile: generated code for Broadcast

Generated by simdgen CL 693599

This turned out to require some additional work in
other places, including filling in missing
methods (use OverwriteBase to get FP versions).

Also includes a test.

Change-Id: I2efe8967837834745f9cae661d4d4dcbb5390b6f
Reviewed-on: https://go-review.googlesource.com/c/go/+/693758
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
This commit is contained in:
David Chase
2025-08-05 17:34:05 -04:00
parent e001300cf2
commit ddb689c7bb
11 changed files with 2597 additions and 60 deletions

View File

@@ -24,6 +24,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPABSQ128,
ssa.OpAMD64VPABSQ256,
ssa.OpAMD64VPABSQ512,
ssa.OpAMD64VBROADCASTSS128,
ssa.OpAMD64VPBROADCASTQ128,
ssa.OpAMD64VPBROADCASTB128,
ssa.OpAMD64VPBROADCASTW128,
ssa.OpAMD64VPBROADCASTD128,
ssa.OpAMD64VBROADCASTSS256,
ssa.OpAMD64VBROADCASTSD256,
ssa.OpAMD64VPBROADCASTB256,
ssa.OpAMD64VPBROADCASTW256,
ssa.OpAMD64VPBROADCASTD256,
ssa.OpAMD64VPBROADCASTQ256,
ssa.OpAMD64VBROADCASTSS512,
ssa.OpAMD64VBROADCASTSD512,
ssa.OpAMD64VPBROADCASTB512,
ssa.OpAMD64VPBROADCASTW512,
ssa.OpAMD64VPBROADCASTD512,
ssa.OpAMD64VPBROADCASTQ512,
ssa.OpAMD64VCVTTPS2DQ128,
ssa.OpAMD64VCVTTPS2DQ256,
ssa.OpAMD64VCVTTPS2DQ512,
@@ -624,6 +641,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPABSQMasked128,
ssa.OpAMD64VPABSQMasked256,
ssa.OpAMD64VPABSQMasked512,
ssa.OpAMD64VBROADCASTSSMasked128,
ssa.OpAMD64VPBROADCASTQMasked128,
ssa.OpAMD64VPBROADCASTBMasked128,
ssa.OpAMD64VPBROADCASTWMasked128,
ssa.OpAMD64VPBROADCASTDMasked128,
ssa.OpAMD64VBROADCASTSSMasked256,
ssa.OpAMD64VBROADCASTSDMasked256,
ssa.OpAMD64VPBROADCASTBMasked256,
ssa.OpAMD64VPBROADCASTWMasked256,
ssa.OpAMD64VPBROADCASTDMasked256,
ssa.OpAMD64VPBROADCASTQMasked256,
ssa.OpAMD64VBROADCASTSSMasked512,
ssa.OpAMD64VBROADCASTSDMasked512,
ssa.OpAMD64VPBROADCASTBMasked512,
ssa.OpAMD64VPBROADCASTWMasked512,
ssa.OpAMD64VPBROADCASTDMasked512,
ssa.OpAMD64VPBROADCASTQMasked512,
ssa.OpAMD64VCOMPRESSPSMasked128,
ssa.OpAMD64VCOMPRESSPSMasked256,
ssa.OpAMD64VCOMPRESSPSMasked512,
@@ -1104,10 +1138,10 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPSRLQMasked512:
p = simdVfpkv(s, v)
case ssa.OpAMD64VPINSRB128,
ssa.OpAMD64VPINSRW128,
ssa.OpAMD64VPINSRD128,
ssa.OpAMD64VPINSRQ128:
case ssa.OpAMD64VPINSRD128,
ssa.OpAMD64VPINSRQ128,
ssa.OpAMD64VPINSRB128,
ssa.OpAMD64VPINSRW128:
p = simdVgpvImm8(s, v)
case ssa.OpAMD64VPEXTRB128,
@@ -1221,6 +1255,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPAVGWMasked128,
ssa.OpAMD64VPAVGWMasked256,
ssa.OpAMD64VPAVGWMasked512,
ssa.OpAMD64VBROADCASTSSMasked128,
ssa.OpAMD64VPBROADCASTQMasked128,
ssa.OpAMD64VPBROADCASTBMasked128,
ssa.OpAMD64VPBROADCASTWMasked128,
ssa.OpAMD64VPBROADCASTDMasked128,
ssa.OpAMD64VBROADCASTSSMasked256,
ssa.OpAMD64VBROADCASTSDMasked256,
ssa.OpAMD64VPBROADCASTBMasked256,
ssa.OpAMD64VPBROADCASTWMasked256,
ssa.OpAMD64VPBROADCASTDMasked256,
ssa.OpAMD64VPBROADCASTQMasked256,
ssa.OpAMD64VBROADCASTSSMasked512,
ssa.OpAMD64VBROADCASTSDMasked512,
ssa.OpAMD64VPBROADCASTBMasked512,
ssa.OpAMD64VPBROADCASTWMasked512,
ssa.OpAMD64VPBROADCASTDMasked512,
ssa.OpAMD64VPBROADCASTQMasked512,
ssa.OpAMD64VRNDSCALEPSMasked128,
ssa.OpAMD64VRNDSCALEPSMasked256,
ssa.OpAMD64VRNDSCALEPSMasked512,

View File

@@ -228,6 +228,66 @@
(AverageMaskedUint16x8 x y mask) => (VPAVGWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(AverageMaskedUint16x16 x y mask) => (VPAVGWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(AverageMaskedUint16x32 x y mask) => (VPAVGWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
(Broadcast128Float32x4 ...) => (VBROADCASTSS128 ...)
(Broadcast128Float64x2 ...) => (VPBROADCASTQ128 ...)
(Broadcast128Int8x16 ...) => (VPBROADCASTB128 ...)
(Broadcast128Int16x8 ...) => (VPBROADCASTW128 ...)
(Broadcast128Int32x4 ...) => (VPBROADCASTD128 ...)
(Broadcast128Int64x2 ...) => (VPBROADCASTQ128 ...)
(Broadcast128Uint8x16 ...) => (VPBROADCASTB128 ...)
(Broadcast128Uint16x8 ...) => (VPBROADCASTW128 ...)
(Broadcast128Uint32x4 ...) => (VPBROADCASTD128 ...)
(Broadcast128Uint64x2 ...) => (VPBROADCASTQ128 ...)
(Broadcast128MaskedFloat32x4 x mask) => (VBROADCASTSSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
(Broadcast128MaskedFloat64x2 x mask) => (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
(Broadcast128MaskedInt8x16 x mask) => (VPBROADCASTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
(Broadcast128MaskedInt16x8 x mask) => (VPBROADCASTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
(Broadcast128MaskedInt32x4 x mask) => (VPBROADCASTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
(Broadcast128MaskedInt64x2 x mask) => (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
(Broadcast128MaskedUint8x16 x mask) => (VPBROADCASTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
(Broadcast128MaskedUint16x8 x mask) => (VPBROADCASTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
(Broadcast128MaskedUint32x4 x mask) => (VPBROADCASTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
(Broadcast128MaskedUint64x2 x mask) => (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
(Broadcast256Float32x4 ...) => (VBROADCASTSS256 ...)
(Broadcast256Float64x2 ...) => (VBROADCASTSD256 ...)
(Broadcast256Int8x16 ...) => (VPBROADCASTB256 ...)
(Broadcast256Int16x8 ...) => (VPBROADCASTW256 ...)
(Broadcast256Int32x4 ...) => (VPBROADCASTD256 ...)
(Broadcast256Int64x2 ...) => (VPBROADCASTQ256 ...)
(Broadcast256Uint8x16 ...) => (VPBROADCASTB256 ...)
(Broadcast256Uint16x8 ...) => (VPBROADCASTW256 ...)
(Broadcast256Uint32x4 ...) => (VPBROADCASTD256 ...)
(Broadcast256Uint64x2 ...) => (VPBROADCASTQ256 ...)
(Broadcast256MaskedFloat32x4 x mask) => (VBROADCASTSSMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
(Broadcast256MaskedFloat64x2 x mask) => (VBROADCASTSDMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
(Broadcast256MaskedInt8x16 x mask) => (VPBROADCASTBMasked256 x (VPMOVVec8x16ToM <types.TypeMask> mask))
(Broadcast256MaskedInt16x8 x mask) => (VPBROADCASTWMasked256 x (VPMOVVec16x8ToM <types.TypeMask> mask))
(Broadcast256MaskedInt32x4 x mask) => (VPBROADCASTDMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
(Broadcast256MaskedInt64x2 x mask) => (VPBROADCASTQMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
(Broadcast256MaskedUint8x16 x mask) => (VPBROADCASTBMasked256 x (VPMOVVec8x16ToM <types.TypeMask> mask))
(Broadcast256MaskedUint16x8 x mask) => (VPBROADCASTWMasked256 x (VPMOVVec16x8ToM <types.TypeMask> mask))
(Broadcast256MaskedUint32x4 x mask) => (VPBROADCASTDMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
(Broadcast256MaskedUint64x2 x mask) => (VPBROADCASTQMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
(Broadcast512Float32x4 ...) => (VBROADCASTSS512 ...)
(Broadcast512Float64x2 ...) => (VBROADCASTSD512 ...)
(Broadcast512Int8x16 ...) => (VPBROADCASTB512 ...)
(Broadcast512Int16x8 ...) => (VPBROADCASTW512 ...)
(Broadcast512Int32x4 ...) => (VPBROADCASTD512 ...)
(Broadcast512Int64x2 ...) => (VPBROADCASTQ512 ...)
(Broadcast512Uint8x16 ...) => (VPBROADCASTB512 ...)
(Broadcast512Uint16x8 ...) => (VPBROADCASTW512 ...)
(Broadcast512Uint32x4 ...) => (VPBROADCASTD512 ...)
(Broadcast512Uint64x2 ...) => (VPBROADCASTQ512 ...)
(Broadcast512MaskedFloat32x4 x mask) => (VBROADCASTSSMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
(Broadcast512MaskedFloat64x2 x mask) => (VBROADCASTSDMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
(Broadcast512MaskedInt8x16 x mask) => (VPBROADCASTBMasked512 x (VPMOVVec8x16ToM <types.TypeMask> mask))
(Broadcast512MaskedInt16x8 x mask) => (VPBROADCASTWMasked512 x (VPMOVVec16x8ToM <types.TypeMask> mask))
(Broadcast512MaskedInt32x4 x mask) => (VPBROADCASTDMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
(Broadcast512MaskedInt64x2 x mask) => (VPBROADCASTQMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
(Broadcast512MaskedUint8x16 x mask) => (VPBROADCASTBMasked512 x (VPMOVVec8x16ToM <types.TypeMask> mask))
(Broadcast512MaskedUint16x8 x mask) => (VPBROADCASTWMasked512 x (VPMOVVec16x8ToM <types.TypeMask> mask))
(Broadcast512MaskedUint32x4 x mask) => (VPBROADCASTDMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
(Broadcast512MaskedUint64x2 x mask) => (VPBROADCASTQMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
(CeilFloat32x4 x) => (VROUNDPS128 [2] x)
(CeilFloat32x8 x) => (VROUNDPS256 [2] x)
(CeilFloat64x2 x) => (VROUNDPD128 [2] x)
@@ -1396,6 +1456,8 @@
(ScaleMaskedFloat64x2 x y mask) => (VSCALEFPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(ScaleMaskedFloat64x4 x y mask) => (VSCALEFPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(ScaleMaskedFloat64x8 x y mask) => (VSCALEFPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
(SetElemFloat32x4 ...) => (VPINSRD128 ...)
(SetElemFloat64x2 ...) => (VPINSRQ128 ...)
(SetElemInt8x16 ...) => (VPINSRB128 ...)
(SetElemInt16x8 ...) => (VPINSRW128 ...)
(SetElemInt32x4 ...) => (VPINSRD128 ...)

View File

@@ -20,6 +20,16 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VADDSUBPD256", argLength: 2, reg: v21, asm: "VADDSUBPD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VADDSUBPS128", argLength: 2, reg: v21, asm: "VADDSUBPS", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VADDSUBPS256", argLength: 2, reg: v21, asm: "VADDSUBPS", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VBROADCASTSD256", argLength: 1, reg: v11, asm: "VBROADCASTSD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VBROADCASTSD512", argLength: 1, reg: w11, asm: "VBROADCASTSD", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VBROADCASTSDMasked256", argLength: 2, reg: wkw, asm: "VBROADCASTSD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VBROADCASTSDMasked512", argLength: 2, reg: wkw, asm: "VBROADCASTSD", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VBROADCASTSS128", argLength: 1, reg: v11, asm: "VBROADCASTSS", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VBROADCASTSS256", argLength: 1, reg: v11, asm: "VBROADCASTSS", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VBROADCASTSS512", argLength: 1, reg: w11, asm: "VBROADCASTSS", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VBROADCASTSSMasked128", argLength: 2, reg: wkw, asm: "VBROADCASTSS", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VBROADCASTSSMasked256", argLength: 2, reg: wkw, asm: "VBROADCASTSS", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VBROADCASTSSMasked512", argLength: 2, reg: wkw, asm: "VBROADCASTSS", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VCOMPRESSPDMasked128", argLength: 2, reg: wkw, asm: "VCOMPRESSPD", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VCOMPRESSPDMasked256", argLength: 2, reg: wkw, asm: "VCOMPRESSPD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VCOMPRESSPDMasked512", argLength: 2, reg: wkw, asm: "VCOMPRESSPD", commutative: false, typ: "Vec512", resultInArg0: false},
@@ -252,6 +262,30 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPBLENDMWMasked512", argLength: 3, reg: w2kw, asm: "VPBLENDMW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPBLENDVB128", argLength: 3, reg: v31, asm: "VPBLENDVB", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPBLENDVB256", argLength: 3, reg: v31, asm: "VPBLENDVB", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPBROADCASTB128", argLength: 1, reg: v11, asm: "VPBROADCASTB", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPBROADCASTB256", argLength: 1, reg: v11, asm: "VPBROADCASTB", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPBROADCASTB512", argLength: 1, reg: w11, asm: "VPBROADCASTB", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPBROADCASTBMasked128", argLength: 2, reg: wkw, asm: "VPBROADCASTB", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPBROADCASTBMasked256", argLength: 2, reg: wkw, asm: "VPBROADCASTB", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPBROADCASTBMasked512", argLength: 2, reg: wkw, asm: "VPBROADCASTB", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPBROADCASTD128", argLength: 1, reg: v11, asm: "VPBROADCASTD", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPBROADCASTD256", argLength: 1, reg: v11, asm: "VPBROADCASTD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPBROADCASTD512", argLength: 1, reg: w11, asm: "VPBROADCASTD", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPBROADCASTDMasked128", argLength: 2, reg: wkw, asm: "VPBROADCASTD", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPBROADCASTDMasked256", argLength: 2, reg: wkw, asm: "VPBROADCASTD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPBROADCASTDMasked512", argLength: 2, reg: wkw, asm: "VPBROADCASTD", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPBROADCASTQ128", argLength: 1, reg: v11, asm: "VPBROADCASTQ", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPBROADCASTQ256", argLength: 1, reg: v11, asm: "VPBROADCASTQ", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPBROADCASTQ512", argLength: 1, reg: w11, asm: "VPBROADCASTQ", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPBROADCASTQMasked128", argLength: 2, reg: wkw, asm: "VPBROADCASTQ", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPBROADCASTQMasked256", argLength: 2, reg: wkw, asm: "VPBROADCASTQ", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPBROADCASTQMasked512", argLength: 2, reg: wkw, asm: "VPBROADCASTQ", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPBROADCASTW128", argLength: 1, reg: v11, asm: "VPBROADCASTW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPBROADCASTW256", argLength: 1, reg: v11, asm: "VPBROADCASTW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPBROADCASTW512", argLength: 1, reg: w11, asm: "VPBROADCASTW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPBROADCASTWMasked128", argLength: 2, reg: wkw, asm: "VPBROADCASTW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPBROADCASTWMasked256", argLength: 2, reg: wkw, asm: "VPBROADCASTW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPBROADCASTWMasked512", argLength: 2, reg: wkw, asm: "VPBROADCASTW", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPCMPEQB128", argLength: 2, reg: v21, asm: "VPCMPEQB", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPCMPEQB256", argLength: 2, reg: v21, asm: "VPCMPEQB", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPCMPEQB512", argLength: 2, reg: w2k, asm: "VPCMPEQB", commutative: true, typ: "Mask", resultInArg0: false},
@@ -1000,10 +1034,10 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPRORQMasked128", argLength: 2, reg: wkw, asm: "VPRORQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPRORQMasked256", argLength: 2, reg: wkw, asm: "VPRORQ", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPRORQMasked512", argLength: 2, reg: wkw, asm: "VPRORQ", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPINSRB128", argLength: 2, reg: vgpv, asm: "VPINSRB", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPINSRW128", argLength: 2, reg: vgpv, asm: "VPINSRW", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPINSRD128", argLength: 2, reg: vgpv, asm: "VPINSRD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPINSRQ128", argLength: 2, reg: vgpv, asm: "VPINSRQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPINSRB128", argLength: 2, reg: vgpv, asm: "VPINSRB", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPINSRW128", argLength: 2, reg: vgpv, asm: "VPINSRW", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VINSERTF128256", argLength: 2, reg: v21, asm: "VINSERTF128", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VINSERTF64X4512", argLength: 2, reg: w21, asm: "VINSERTF64X4", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VINSERTI128256", argLength: 2, reg: v21, asm: "VINSERTI128", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},

View File

@@ -232,6 +232,66 @@ func simdGenericOps() []opData {
{name: "AverageUint16x8", argLength: 2, commutative: true},
{name: "AverageUint16x16", argLength: 2, commutative: true},
{name: "AverageUint16x32", argLength: 2, commutative: true},
{name: "Broadcast128Float32x4", argLength: 1, commutative: false},
{name: "Broadcast128Float64x2", argLength: 1, commutative: false},
{name: "Broadcast128Int8x16", argLength: 1, commutative: false},
{name: "Broadcast128Int16x8", argLength: 1, commutative: false},
{name: "Broadcast128Int32x4", argLength: 1, commutative: false},
{name: "Broadcast128Int64x2", argLength: 1, commutative: false},
{name: "Broadcast128MaskedFloat32x4", argLength: 2, commutative: false},
{name: "Broadcast128MaskedFloat64x2", argLength: 2, commutative: false},
{name: "Broadcast128MaskedInt8x16", argLength: 2, commutative: false},
{name: "Broadcast128MaskedInt16x8", argLength: 2, commutative: false},
{name: "Broadcast128MaskedInt32x4", argLength: 2, commutative: false},
{name: "Broadcast128MaskedInt64x2", argLength: 2, commutative: false},
{name: "Broadcast128MaskedUint8x16", argLength: 2, commutative: false},
{name: "Broadcast128MaskedUint16x8", argLength: 2, commutative: false},
{name: "Broadcast128MaskedUint32x4", argLength: 2, commutative: false},
{name: "Broadcast128MaskedUint64x2", argLength: 2, commutative: false},
{name: "Broadcast128Uint8x16", argLength: 1, commutative: false},
{name: "Broadcast128Uint16x8", argLength: 1, commutative: false},
{name: "Broadcast128Uint32x4", argLength: 1, commutative: false},
{name: "Broadcast128Uint64x2", argLength: 1, commutative: false},
{name: "Broadcast256Float32x4", argLength: 1, commutative: false},
{name: "Broadcast256Float64x2", argLength: 1, commutative: false},
{name: "Broadcast256Int8x16", argLength: 1, commutative: false},
{name: "Broadcast256Int16x8", argLength: 1, commutative: false},
{name: "Broadcast256Int32x4", argLength: 1, commutative: false},
{name: "Broadcast256Int64x2", argLength: 1, commutative: false},
{name: "Broadcast256MaskedFloat32x4", argLength: 2, commutative: false},
{name: "Broadcast256MaskedFloat64x2", argLength: 2, commutative: false},
{name: "Broadcast256MaskedInt8x16", argLength: 2, commutative: false},
{name: "Broadcast256MaskedInt16x8", argLength: 2, commutative: false},
{name: "Broadcast256MaskedInt32x4", argLength: 2, commutative: false},
{name: "Broadcast256MaskedInt64x2", argLength: 2, commutative: false},
{name: "Broadcast256MaskedUint8x16", argLength: 2, commutative: false},
{name: "Broadcast256MaskedUint16x8", argLength: 2, commutative: false},
{name: "Broadcast256MaskedUint32x4", argLength: 2, commutative: false},
{name: "Broadcast256MaskedUint64x2", argLength: 2, commutative: false},
{name: "Broadcast256Uint8x16", argLength: 1, commutative: false},
{name: "Broadcast256Uint16x8", argLength: 1, commutative: false},
{name: "Broadcast256Uint32x4", argLength: 1, commutative: false},
{name: "Broadcast256Uint64x2", argLength: 1, commutative: false},
{name: "Broadcast512Float32x4", argLength: 1, commutative: false},
{name: "Broadcast512Float64x2", argLength: 1, commutative: false},
{name: "Broadcast512Int8x16", argLength: 1, commutative: false},
{name: "Broadcast512Int16x8", argLength: 1, commutative: false},
{name: "Broadcast512Int32x4", argLength: 1, commutative: false},
{name: "Broadcast512Int64x2", argLength: 1, commutative: false},
{name: "Broadcast512MaskedFloat32x4", argLength: 2, commutative: false},
{name: "Broadcast512MaskedFloat64x2", argLength: 2, commutative: false},
{name: "Broadcast512MaskedInt8x16", argLength: 2, commutative: false},
{name: "Broadcast512MaskedInt16x8", argLength: 2, commutative: false},
{name: "Broadcast512MaskedInt32x4", argLength: 2, commutative: false},
{name: "Broadcast512MaskedInt64x2", argLength: 2, commutative: false},
{name: "Broadcast512MaskedUint8x16", argLength: 2, commutative: false},
{name: "Broadcast512MaskedUint16x8", argLength: 2, commutative: false},
{name: "Broadcast512MaskedUint32x4", argLength: 2, commutative: false},
{name: "Broadcast512MaskedUint64x2", argLength: 2, commutative: false},
{name: "Broadcast512Uint8x16", argLength: 1, commutative: false},
{name: "Broadcast512Uint16x8", argLength: 1, commutative: false},
{name: "Broadcast512Uint32x4", argLength: 1, commutative: false},
{name: "Broadcast512Uint64x2", argLength: 1, commutative: false},
{name: "CeilFloat32x4", argLength: 1, commutative: false},
{name: "CeilFloat32x8", argLength: 1, commutative: false},
{name: "CeilFloat64x2", argLength: 1, commutative: false},
@@ -1812,6 +1872,8 @@ func simdGenericOps() []opData {
{name: "RoundToEvenScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
{name: "RoundToEvenScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
{name: "RoundToEvenScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
{name: "SetElemFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
{name: "SetElemFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
{name: "SetElemInt8x16", argLength: 2, commutative: false, aux: "UInt8"},
{name: "SetElemInt16x8", argLength: 2, commutative: false, aux: "UInt8"},
{name: "SetElemInt32x4", argLength: 2, commutative: false, aux: "UInt8"},

View File

@@ -1242,6 +1242,16 @@ const (
OpAMD64VADDSUBPD256
OpAMD64VADDSUBPS128
OpAMD64VADDSUBPS256
OpAMD64VBROADCASTSD256
OpAMD64VBROADCASTSD512
OpAMD64VBROADCASTSDMasked256
OpAMD64VBROADCASTSDMasked512
OpAMD64VBROADCASTSS128
OpAMD64VBROADCASTSS256
OpAMD64VBROADCASTSS512
OpAMD64VBROADCASTSSMasked128
OpAMD64VBROADCASTSSMasked256
OpAMD64VBROADCASTSSMasked512
OpAMD64VCOMPRESSPDMasked128
OpAMD64VCOMPRESSPDMasked256
OpAMD64VCOMPRESSPDMasked512
@@ -1474,6 +1484,30 @@ const (
OpAMD64VPBLENDMWMasked512
OpAMD64VPBLENDVB128
OpAMD64VPBLENDVB256
OpAMD64VPBROADCASTB128
OpAMD64VPBROADCASTB256
OpAMD64VPBROADCASTB512
OpAMD64VPBROADCASTBMasked128
OpAMD64VPBROADCASTBMasked256
OpAMD64VPBROADCASTBMasked512
OpAMD64VPBROADCASTD128
OpAMD64VPBROADCASTD256
OpAMD64VPBROADCASTD512
OpAMD64VPBROADCASTDMasked128
OpAMD64VPBROADCASTDMasked256
OpAMD64VPBROADCASTDMasked512
OpAMD64VPBROADCASTQ128
OpAMD64VPBROADCASTQ256
OpAMD64VPBROADCASTQ512
OpAMD64VPBROADCASTQMasked128
OpAMD64VPBROADCASTQMasked256
OpAMD64VPBROADCASTQMasked512
OpAMD64VPBROADCASTW128
OpAMD64VPBROADCASTW256
OpAMD64VPBROADCASTW512
OpAMD64VPBROADCASTWMasked128
OpAMD64VPBROADCASTWMasked256
OpAMD64VPBROADCASTWMasked512
OpAMD64VPCMPEQB128
OpAMD64VPCMPEQB256
OpAMD64VPCMPEQB512
@@ -2222,10 +2256,10 @@ const (
OpAMD64VPRORQMasked128
OpAMD64VPRORQMasked256
OpAMD64VPRORQMasked512
OpAMD64VPINSRB128
OpAMD64VPINSRW128
OpAMD64VPINSRD128
OpAMD64VPINSRQ128
OpAMD64VPINSRB128
OpAMD64VPINSRW128
OpAMD64VINSERTF128256
OpAMD64VINSERTF64X4512
OpAMD64VINSERTI128256
@@ -4839,6 +4873,66 @@ const (
OpAverageUint16x8
OpAverageUint16x16
OpAverageUint16x32
OpBroadcast128Float32x4
OpBroadcast128Float64x2
OpBroadcast128Int8x16
OpBroadcast128Int16x8
OpBroadcast128Int32x4
OpBroadcast128Int64x2
OpBroadcast128MaskedFloat32x4
OpBroadcast128MaskedFloat64x2
OpBroadcast128MaskedInt8x16
OpBroadcast128MaskedInt16x8
OpBroadcast128MaskedInt32x4
OpBroadcast128MaskedInt64x2
OpBroadcast128MaskedUint8x16
OpBroadcast128MaskedUint16x8
OpBroadcast128MaskedUint32x4
OpBroadcast128MaskedUint64x2
OpBroadcast128Uint8x16
OpBroadcast128Uint16x8
OpBroadcast128Uint32x4
OpBroadcast128Uint64x2
OpBroadcast256Float32x4
OpBroadcast256Float64x2
OpBroadcast256Int8x16
OpBroadcast256Int16x8
OpBroadcast256Int32x4
OpBroadcast256Int64x2
OpBroadcast256MaskedFloat32x4
OpBroadcast256MaskedFloat64x2
OpBroadcast256MaskedInt8x16
OpBroadcast256MaskedInt16x8
OpBroadcast256MaskedInt32x4
OpBroadcast256MaskedInt64x2
OpBroadcast256MaskedUint8x16
OpBroadcast256MaskedUint16x8
OpBroadcast256MaskedUint32x4
OpBroadcast256MaskedUint64x2
OpBroadcast256Uint8x16
OpBroadcast256Uint16x8
OpBroadcast256Uint32x4
OpBroadcast256Uint64x2
OpBroadcast512Float32x4
OpBroadcast512Float64x2
OpBroadcast512Int8x16
OpBroadcast512Int16x8
OpBroadcast512Int32x4
OpBroadcast512Int64x2
OpBroadcast512MaskedFloat32x4
OpBroadcast512MaskedFloat64x2
OpBroadcast512MaskedInt8x16
OpBroadcast512MaskedInt16x8
OpBroadcast512MaskedInt32x4
OpBroadcast512MaskedInt64x2
OpBroadcast512MaskedUint8x16
OpBroadcast512MaskedUint16x8
OpBroadcast512MaskedUint32x4
OpBroadcast512MaskedUint64x2
OpBroadcast512Uint8x16
OpBroadcast512Uint16x8
OpBroadcast512Uint32x4
OpBroadcast512Uint64x2
OpCeilFloat32x4
OpCeilFloat32x8
OpCeilFloat64x2
@@ -6419,6 +6513,8 @@ const (
OpRoundToEvenScaledResidueMaskedFloat64x2
OpRoundToEvenScaledResidueMaskedFloat64x4
OpRoundToEvenScaledResidueMaskedFloat64x8
OpSetElemFloat32x4
OpSetElemFloat64x2
OpSetElemInt8x16
OpSetElemInt16x8
OpSetElemInt32x4
@@ -19771,6 +19867,141 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "VBROADCASTSD256",
argLen: 1,
asm: x86.AVBROADCASTSD,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VBROADCASTSD512",
argLen: 1,
asm: x86.AVBROADCASTSD,
reg: regInfo{
inputs: []inputInfo{
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
},
outputs: []outputInfo{
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
},
},
},
{
name: "VBROADCASTSDMasked256",
argLen: 2,
asm: x86.AVBROADCASTSD,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VBROADCASTSDMasked512",
argLen: 2,
asm: x86.AVBROADCASTSD,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VBROADCASTSS128",
argLen: 1,
asm: x86.AVBROADCASTSS,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VBROADCASTSS256",
argLen: 1,
asm: x86.AVBROADCASTSS,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VBROADCASTSS512",
argLen: 1,
asm: x86.AVBROADCASTSS,
reg: regInfo{
inputs: []inputInfo{
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
},
outputs: []outputInfo{
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
},
},
},
{
name: "VBROADCASTSSMasked128",
argLen: 2,
asm: x86.AVBROADCASTSS,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VBROADCASTSSMasked256",
argLen: 2,
asm: x86.AVBROADCASTSS,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VBROADCASTSSMasked512",
argLen: 2,
asm: x86.AVBROADCASTSS,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VCOMPRESSPDMasked128",
argLen: 2,
@@ -23272,6 +23503,330 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "VPBROADCASTB128",
argLen: 1,
asm: x86.AVPBROADCASTB,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPBROADCASTB256",
argLen: 1,
asm: x86.AVPBROADCASTB,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPBROADCASTB512",
argLen: 1,
asm: x86.AVPBROADCASTB,
reg: regInfo{
inputs: []inputInfo{
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
},
outputs: []outputInfo{
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
},
},
},
{
name: "VPBROADCASTBMasked128",
argLen: 2,
asm: x86.AVPBROADCASTB,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPBROADCASTBMasked256",
argLen: 2,
asm: x86.AVPBROADCASTB,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPBROADCASTBMasked512",
argLen: 2,
asm: x86.AVPBROADCASTB,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPBROADCASTD128",
argLen: 1,
asm: x86.AVPBROADCASTD,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPBROADCASTD256",
argLen: 1,
asm: x86.AVPBROADCASTD,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPBROADCASTD512",
argLen: 1,
asm: x86.AVPBROADCASTD,
reg: regInfo{
inputs: []inputInfo{
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
},
outputs: []outputInfo{
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
},
},
},
{
name: "VPBROADCASTDMasked128",
argLen: 2,
asm: x86.AVPBROADCASTD,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPBROADCASTDMasked256",
argLen: 2,
asm: x86.AVPBROADCASTD,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPBROADCASTDMasked512",
argLen: 2,
asm: x86.AVPBROADCASTD,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPBROADCASTQ128",
argLen: 1,
asm: x86.AVPBROADCASTQ,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPBROADCASTQ256",
argLen: 1,
asm: x86.AVPBROADCASTQ,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPBROADCASTQ512",
argLen: 1,
asm: x86.AVPBROADCASTQ,
reg: regInfo{
inputs: []inputInfo{
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
},
outputs: []outputInfo{
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
},
},
},
{
name: "VPBROADCASTQMasked128",
argLen: 2,
asm: x86.AVPBROADCASTQ,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPBROADCASTQMasked256",
argLen: 2,
asm: x86.AVPBROADCASTQ,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPBROADCASTQMasked512",
argLen: 2,
asm: x86.AVPBROADCASTQ,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPBROADCASTW128",
argLen: 1,
asm: x86.AVPBROADCASTW,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPBROADCASTW256",
argLen: 1,
asm: x86.AVPBROADCASTW,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPBROADCASTW512",
argLen: 1,
asm: x86.AVPBROADCASTW,
reg: regInfo{
inputs: []inputInfo{
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
},
outputs: []outputInfo{
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
},
},
},
{
name: "VPBROADCASTWMasked128",
argLen: 2,
asm: x86.AVPBROADCASTW,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPBROADCASTWMasked256",
argLen: 2,
asm: x86.AVPBROADCASTW,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPBROADCASTWMasked512",
argLen: 2,
asm: x86.AVPBROADCASTW,
reg: regInfo{
inputs: []inputInfo{
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPCMPEQB128",
argLen: 2,
@@ -34481,36 +35036,6 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "VPINSRB128",
auxType: auxUInt8,
argLen: 2,
asm: x86.AVPINSRB,
reg: regInfo{
inputs: []inputInfo{
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPINSRW128",
auxType: auxUInt8,
argLen: 2,
asm: x86.AVPINSRW,
reg: regInfo{
inputs: []inputInfo{
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPINSRD128",
auxType: auxUInt8,
@@ -34541,6 +35066,36 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "VPINSRB128",
auxType: auxUInt8,
argLen: 2,
asm: x86.AVPINSRB,
reg: regInfo{
inputs: []inputInfo{
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPINSRW128",
auxType: auxUInt8,
argLen: 2,
asm: x86.AVPINSRW,
reg: regInfo{
inputs: []inputInfo{
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VINSERTF128256",
auxType: auxUInt8,
@@ -64725,6 +65280,306 @@ var opcodeTable = [...]opInfo{
commutative: true,
generic: true,
},
{
name: "Broadcast128Float32x4",
argLen: 1,
generic: true,
},
{
name: "Broadcast128Float64x2",
argLen: 1,
generic: true,
},
{
name: "Broadcast128Int8x16",
argLen: 1,
generic: true,
},
{
name: "Broadcast128Int16x8",
argLen: 1,
generic: true,
},
{
name: "Broadcast128Int32x4",
argLen: 1,
generic: true,
},
{
name: "Broadcast128Int64x2",
argLen: 1,
generic: true,
},
{
name: "Broadcast128MaskedFloat32x4",
argLen: 2,
generic: true,
},
{
name: "Broadcast128MaskedFloat64x2",
argLen: 2,
generic: true,
},
{
name: "Broadcast128MaskedInt8x16",
argLen: 2,
generic: true,
},
{
name: "Broadcast128MaskedInt16x8",
argLen: 2,
generic: true,
},
{
name: "Broadcast128MaskedInt32x4",
argLen: 2,
generic: true,
},
{
name: "Broadcast128MaskedInt64x2",
argLen: 2,
generic: true,
},
{
name: "Broadcast128MaskedUint8x16",
argLen: 2,
generic: true,
},
{
name: "Broadcast128MaskedUint16x8",
argLen: 2,
generic: true,
},
{
name: "Broadcast128MaskedUint32x4",
argLen: 2,
generic: true,
},
{
name: "Broadcast128MaskedUint64x2",
argLen: 2,
generic: true,
},
{
name: "Broadcast128Uint8x16",
argLen: 1,
generic: true,
},
{
name: "Broadcast128Uint16x8",
argLen: 1,
generic: true,
},
{
name: "Broadcast128Uint32x4",
argLen: 1,
generic: true,
},
{
name: "Broadcast128Uint64x2",
argLen: 1,
generic: true,
},
{
name: "Broadcast256Float32x4",
argLen: 1,
generic: true,
},
{
name: "Broadcast256Float64x2",
argLen: 1,
generic: true,
},
{
name: "Broadcast256Int8x16",
argLen: 1,
generic: true,
},
{
name: "Broadcast256Int16x8",
argLen: 1,
generic: true,
},
{
name: "Broadcast256Int32x4",
argLen: 1,
generic: true,
},
{
name: "Broadcast256Int64x2",
argLen: 1,
generic: true,
},
{
name: "Broadcast256MaskedFloat32x4",
argLen: 2,
generic: true,
},
{
name: "Broadcast256MaskedFloat64x2",
argLen: 2,
generic: true,
},
{
name: "Broadcast256MaskedInt8x16",
argLen: 2,
generic: true,
},
{
name: "Broadcast256MaskedInt16x8",
argLen: 2,
generic: true,
},
{
name: "Broadcast256MaskedInt32x4",
argLen: 2,
generic: true,
},
{
name: "Broadcast256MaskedInt64x2",
argLen: 2,
generic: true,
},
{
name: "Broadcast256MaskedUint8x16",
argLen: 2,
generic: true,
},
{
name: "Broadcast256MaskedUint16x8",
argLen: 2,
generic: true,
},
{
name: "Broadcast256MaskedUint32x4",
argLen: 2,
generic: true,
},
{
name: "Broadcast256MaskedUint64x2",
argLen: 2,
generic: true,
},
{
name: "Broadcast256Uint8x16",
argLen: 1,
generic: true,
},
{
name: "Broadcast256Uint16x8",
argLen: 1,
generic: true,
},
{
name: "Broadcast256Uint32x4",
argLen: 1,
generic: true,
},
{
name: "Broadcast256Uint64x2",
argLen: 1,
generic: true,
},
{
name: "Broadcast512Float32x4",
argLen: 1,
generic: true,
},
{
name: "Broadcast512Float64x2",
argLen: 1,
generic: true,
},
{
name: "Broadcast512Int8x16",
argLen: 1,
generic: true,
},
{
name: "Broadcast512Int16x8",
argLen: 1,
generic: true,
},
{
name: "Broadcast512Int32x4",
argLen: 1,
generic: true,
},
{
name: "Broadcast512Int64x2",
argLen: 1,
generic: true,
},
{
name: "Broadcast512MaskedFloat32x4",
argLen: 2,
generic: true,
},
{
name: "Broadcast512MaskedFloat64x2",
argLen: 2,
generic: true,
},
{
name: "Broadcast512MaskedInt8x16",
argLen: 2,
generic: true,
},
{
name: "Broadcast512MaskedInt16x8",
argLen: 2,
generic: true,
},
{
name: "Broadcast512MaskedInt32x4",
argLen: 2,
generic: true,
},
{
name: "Broadcast512MaskedInt64x2",
argLen: 2,
generic: true,
},
{
name: "Broadcast512MaskedUint8x16",
argLen: 2,
generic: true,
},
{
name: "Broadcast512MaskedUint16x8",
argLen: 2,
generic: true,
},
{
name: "Broadcast512MaskedUint32x4",
argLen: 2,
generic: true,
},
{
name: "Broadcast512MaskedUint64x2",
argLen: 2,
generic: true,
},
{
name: "Broadcast512Uint8x16",
argLen: 1,
generic: true,
},
{
name: "Broadcast512Uint16x8",
argLen: 1,
generic: true,
},
{
name: "Broadcast512Uint32x4",
argLen: 1,
generic: true,
},
{
name: "Broadcast512Uint64x2",
argLen: 1,
generic: true,
},
{
name: "CeilFloat32x4",
argLen: 1,
@@ -73153,6 +74008,18 @@ var opcodeTable = [...]opInfo{
argLen: 2,
generic: true,
},
{
name: "SetElemFloat32x4",
auxType: auxUInt8,
argLen: 2,
generic: true,
},
{
name: "SetElemFloat64x2",
auxType: auxUInt8,
argLen: 2,
generic: true,
},
{
name: "SetElemInt8x16",
auxType: auxUInt8,

View File

@@ -1317,6 +1317,156 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpBitLen64(v)
case OpBitLen8:
return rewriteValueAMD64_OpBitLen8(v)
case OpBroadcast128Float32x4:
v.Op = OpAMD64VBROADCASTSS128
return true
case OpBroadcast128Float64x2:
v.Op = OpAMD64VPBROADCASTQ128
return true
case OpBroadcast128Int16x8:
v.Op = OpAMD64VPBROADCASTW128
return true
case OpBroadcast128Int32x4:
v.Op = OpAMD64VPBROADCASTD128
return true
case OpBroadcast128Int64x2:
v.Op = OpAMD64VPBROADCASTQ128
return true
case OpBroadcast128Int8x16:
v.Op = OpAMD64VPBROADCASTB128
return true
case OpBroadcast128MaskedFloat32x4:
return rewriteValueAMD64_OpBroadcast128MaskedFloat32x4(v)
case OpBroadcast128MaskedFloat64x2:
return rewriteValueAMD64_OpBroadcast128MaskedFloat64x2(v)
case OpBroadcast128MaskedInt16x8:
return rewriteValueAMD64_OpBroadcast128MaskedInt16x8(v)
case OpBroadcast128MaskedInt32x4:
return rewriteValueAMD64_OpBroadcast128MaskedInt32x4(v)
case OpBroadcast128MaskedInt64x2:
return rewriteValueAMD64_OpBroadcast128MaskedInt64x2(v)
case OpBroadcast128MaskedInt8x16:
return rewriteValueAMD64_OpBroadcast128MaskedInt8x16(v)
case OpBroadcast128MaskedUint16x8:
return rewriteValueAMD64_OpBroadcast128MaskedUint16x8(v)
case OpBroadcast128MaskedUint32x4:
return rewriteValueAMD64_OpBroadcast128MaskedUint32x4(v)
case OpBroadcast128MaskedUint64x2:
return rewriteValueAMD64_OpBroadcast128MaskedUint64x2(v)
case OpBroadcast128MaskedUint8x16:
return rewriteValueAMD64_OpBroadcast128MaskedUint8x16(v)
case OpBroadcast128Uint16x8:
v.Op = OpAMD64VPBROADCASTW128
return true
case OpBroadcast128Uint32x4:
v.Op = OpAMD64VPBROADCASTD128
return true
case OpBroadcast128Uint64x2:
v.Op = OpAMD64VPBROADCASTQ128
return true
case OpBroadcast128Uint8x16:
v.Op = OpAMD64VPBROADCASTB128
return true
case OpBroadcast256Float32x4:
v.Op = OpAMD64VBROADCASTSS256
return true
case OpBroadcast256Float64x2:
v.Op = OpAMD64VBROADCASTSD256
return true
case OpBroadcast256Int16x8:
v.Op = OpAMD64VPBROADCASTW256
return true
case OpBroadcast256Int32x4:
v.Op = OpAMD64VPBROADCASTD256
return true
case OpBroadcast256Int64x2:
v.Op = OpAMD64VPBROADCASTQ256
return true
case OpBroadcast256Int8x16:
v.Op = OpAMD64VPBROADCASTB256
return true
case OpBroadcast256MaskedFloat32x4:
return rewriteValueAMD64_OpBroadcast256MaskedFloat32x4(v)
case OpBroadcast256MaskedFloat64x2:
return rewriteValueAMD64_OpBroadcast256MaskedFloat64x2(v)
case OpBroadcast256MaskedInt16x8:
return rewriteValueAMD64_OpBroadcast256MaskedInt16x8(v)
case OpBroadcast256MaskedInt32x4:
return rewriteValueAMD64_OpBroadcast256MaskedInt32x4(v)
case OpBroadcast256MaskedInt64x2:
return rewriteValueAMD64_OpBroadcast256MaskedInt64x2(v)
case OpBroadcast256MaskedInt8x16:
return rewriteValueAMD64_OpBroadcast256MaskedInt8x16(v)
case OpBroadcast256MaskedUint16x8:
return rewriteValueAMD64_OpBroadcast256MaskedUint16x8(v)
case OpBroadcast256MaskedUint32x4:
return rewriteValueAMD64_OpBroadcast256MaskedUint32x4(v)
case OpBroadcast256MaskedUint64x2:
return rewriteValueAMD64_OpBroadcast256MaskedUint64x2(v)
case OpBroadcast256MaskedUint8x16:
return rewriteValueAMD64_OpBroadcast256MaskedUint8x16(v)
case OpBroadcast256Uint16x8:
v.Op = OpAMD64VPBROADCASTW256
return true
case OpBroadcast256Uint32x4:
v.Op = OpAMD64VPBROADCASTD256
return true
case OpBroadcast256Uint64x2:
v.Op = OpAMD64VPBROADCASTQ256
return true
case OpBroadcast256Uint8x16:
v.Op = OpAMD64VPBROADCASTB256
return true
case OpBroadcast512Float32x4:
v.Op = OpAMD64VBROADCASTSS512
return true
case OpBroadcast512Float64x2:
v.Op = OpAMD64VBROADCASTSD512
return true
case OpBroadcast512Int16x8:
v.Op = OpAMD64VPBROADCASTW512
return true
case OpBroadcast512Int32x4:
v.Op = OpAMD64VPBROADCASTD512
return true
case OpBroadcast512Int64x2:
v.Op = OpAMD64VPBROADCASTQ512
return true
case OpBroadcast512Int8x16:
v.Op = OpAMD64VPBROADCASTB512
return true
case OpBroadcast512MaskedFloat32x4:
return rewriteValueAMD64_OpBroadcast512MaskedFloat32x4(v)
case OpBroadcast512MaskedFloat64x2:
return rewriteValueAMD64_OpBroadcast512MaskedFloat64x2(v)
case OpBroadcast512MaskedInt16x8:
return rewriteValueAMD64_OpBroadcast512MaskedInt16x8(v)
case OpBroadcast512MaskedInt32x4:
return rewriteValueAMD64_OpBroadcast512MaskedInt32x4(v)
case OpBroadcast512MaskedInt64x2:
return rewriteValueAMD64_OpBroadcast512MaskedInt64x2(v)
case OpBroadcast512MaskedInt8x16:
return rewriteValueAMD64_OpBroadcast512MaskedInt8x16(v)
case OpBroadcast512MaskedUint16x8:
return rewriteValueAMD64_OpBroadcast512MaskedUint16x8(v)
case OpBroadcast512MaskedUint32x4:
return rewriteValueAMD64_OpBroadcast512MaskedUint32x4(v)
case OpBroadcast512MaskedUint64x2:
return rewriteValueAMD64_OpBroadcast512MaskedUint64x2(v)
case OpBroadcast512MaskedUint8x16:
return rewriteValueAMD64_OpBroadcast512MaskedUint8x16(v)
case OpBroadcast512Uint16x8:
v.Op = OpAMD64VPBROADCASTW512
return true
case OpBroadcast512Uint32x4:
v.Op = OpAMD64VPBROADCASTD512
return true
case OpBroadcast512Uint64x2:
v.Op = OpAMD64VPBROADCASTQ512
return true
case OpBroadcast512Uint8x16:
v.Op = OpAMD64VPBROADCASTB512
return true
case OpBswap16:
return rewriteValueAMD64_OpBswap16(v)
case OpBswap32:
@@ -4539,6 +4689,12 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpSelect1(v)
case OpSelectN:
return rewriteValueAMD64_OpSelectN(v)
case OpSetElemFloat32x4:
v.Op = OpAMD64VPINSRD128
return true
case OpSetElemFloat64x2:
v.Op = OpAMD64VPINSRQ128
return true
case OpSetElemInt16x8:
v.Op = OpAMD64VPINSRW128
return true
@@ -31628,6 +31784,486 @@ func rewriteValueAMD64_OpBitLen8(v *Value) bool {
}
return false
}
func rewriteValueAMD64_OpBroadcast128MaskedFloat32x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Broadcast128MaskedFloat32x4 x mask)
// result: (VBROADCASTSSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VBROADCASTSSMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpBroadcast128MaskedFloat64x2(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Broadcast128MaskedFloat64x2 x mask)
// result: (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPBROADCASTQMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpBroadcast128MaskedInt16x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Broadcast128MaskedInt16x8 x mask)
// result: (VPBROADCASTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPBROADCASTWMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpBroadcast128MaskedInt32x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Broadcast128MaskedInt32x4 x mask)
// result: (VPBROADCASTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPBROADCASTDMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpBroadcast128MaskedInt64x2(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Broadcast128MaskedInt64x2 x mask)
// result: (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPBROADCASTQMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpBroadcast128MaskedInt8x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Broadcast128MaskedInt8x16 x mask)
// result: (VPBROADCASTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPBROADCASTBMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpBroadcast128MaskedUint16x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Broadcast128MaskedUint16x8 x mask)
// result: (VPBROADCASTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPBROADCASTWMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpBroadcast128MaskedUint32x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Broadcast128MaskedUint32x4 x mask)
// result: (VPBROADCASTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPBROADCASTDMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpBroadcast128MaskedUint64x2(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Broadcast128MaskedUint64x2 x mask)
// result: (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPBROADCASTQMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpBroadcast128MaskedUint8x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Broadcast128MaskedUint8x16 x mask)
// result: (VPBROADCASTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPBROADCASTBMasked128)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpBroadcast256MaskedFloat32x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Broadcast256MaskedFloat32x4 x mask)
// result: (VBROADCASTSSMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VBROADCASTSSMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpBroadcast256MaskedFloat64x2(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Broadcast256MaskedFloat64x2 x mask)
// result: (VBROADCASTSDMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VBROADCASTSDMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpBroadcast256MaskedInt16x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Broadcast256MaskedInt16x8 x mask)
// result: (VPBROADCASTWMasked256 x (VPMOVVec16x8ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPBROADCASTWMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpBroadcast256MaskedInt32x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Broadcast256MaskedInt32x4 x mask)
// result: (VPBROADCASTDMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPBROADCASTDMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpBroadcast256MaskedInt64x2(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Broadcast256MaskedInt64x2 x mask)
// result: (VPBROADCASTQMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPBROADCASTQMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpBroadcast256MaskedInt8x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Broadcast256MaskedInt8x16 x mask)
// result: (VPBROADCASTBMasked256 x (VPMOVVec8x16ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPBROADCASTBMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpBroadcast256MaskedUint16x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Broadcast256MaskedUint16x8 x mask)
// result: (VPBROADCASTWMasked256 x (VPMOVVec16x8ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPBROADCASTWMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpBroadcast256MaskedUint32x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Broadcast256MaskedUint32x4 x mask)
// result: (VPBROADCASTDMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPBROADCASTDMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpBroadcast256MaskedUint64x2(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Broadcast256MaskedUint64x2 x mask)
// result: (VPBROADCASTQMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPBROADCASTQMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpBroadcast256MaskedUint8x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Broadcast256MaskedUint8x16 x mask)
// result: (VPBROADCASTBMasked256 x (VPMOVVec8x16ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPBROADCASTBMasked256)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpBroadcast512MaskedFloat32x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Broadcast512MaskedFloat32x4 x mask)
// result: (VBROADCASTSSMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VBROADCASTSSMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpBroadcast512MaskedFloat64x2(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Broadcast512MaskedFloat64x2 x mask)
// result: (VBROADCASTSDMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VBROADCASTSDMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpBroadcast512MaskedInt16x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Broadcast512MaskedInt16x8 x mask)
// result: (VPBROADCASTWMasked512 x (VPMOVVec16x8ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPBROADCASTWMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpBroadcast512MaskedInt32x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Broadcast512MaskedInt32x4 x mask)
// result: (VPBROADCASTDMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPBROADCASTDMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpBroadcast512MaskedInt64x2(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Broadcast512MaskedInt64x2 x mask)
// result: (VPBROADCASTQMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPBROADCASTQMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpBroadcast512MaskedInt8x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Broadcast512MaskedInt8x16 x mask)
// result: (VPBROADCASTBMasked512 x (VPMOVVec8x16ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPBROADCASTBMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpBroadcast512MaskedUint16x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Broadcast512MaskedUint16x8 x mask)
// result: (VPBROADCASTWMasked512 x (VPMOVVec16x8ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPBROADCASTWMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpBroadcast512MaskedUint32x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Broadcast512MaskedUint32x4 x mask)
// result: (VPBROADCASTDMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPBROADCASTDMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpBroadcast512MaskedUint64x2(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Broadcast512MaskedUint64x2 x mask)
// result: (VPBROADCASTQMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPBROADCASTQMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpBroadcast512MaskedUint8x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Broadcast512MaskedUint8x16 x mask)
// result: (VPBROADCASTBMasked512 x (VPMOVVec8x16ToM <types.TypeMask> mask))
for {
x := v_0
mask := v_1
v.reset(OpAMD64VPBROADCASTBMasked512)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg2(x, v0)
return true
}
}
func rewriteValueAMD64_OpBswap16(v *Value) bool {
v_0 := v.Args[0]
// match: (Bswap16 x)

View File

@@ -240,6 +240,66 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint16x8.AverageMasked", opLen3(ssa.OpAverageMaskedUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x16.AverageMasked", opLen3(ssa.OpAverageMaskedUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x32.AverageMasked", opLen3(ssa.OpAverageMaskedUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.Broadcast128", opLen1(ssa.OpBroadcast128Float32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x2.Broadcast128", opLen1(ssa.OpBroadcast128Float64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x16.Broadcast128", opLen1(ssa.OpBroadcast128Int8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x8.Broadcast128", opLen1(ssa.OpBroadcast128Int16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x4.Broadcast128", opLen1(ssa.OpBroadcast128Int32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int64x2.Broadcast128", opLen1(ssa.OpBroadcast128Int64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x16.Broadcast128", opLen1(ssa.OpBroadcast128Uint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x8.Broadcast128", opLen1(ssa.OpBroadcast128Uint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x4.Broadcast128", opLen1(ssa.OpBroadcast128Uint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x2.Broadcast128", opLen1(ssa.OpBroadcast128Uint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x4.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x2.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x16.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x8.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x4.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int64x2.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedInt64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x16.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x8.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x4.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedUint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x2.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x4.Broadcast256", opLen1(ssa.OpBroadcast256Float32x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x2.Broadcast256", opLen1(ssa.OpBroadcast256Float64x2, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x16.Broadcast256", opLen1(ssa.OpBroadcast256Int8x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x8.Broadcast256", opLen1(ssa.OpBroadcast256Int16x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x4.Broadcast256", opLen1(ssa.OpBroadcast256Int32x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int64x2.Broadcast256", opLen1(ssa.OpBroadcast256Int64x2, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x16.Broadcast256", opLen1(ssa.OpBroadcast256Uint8x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x8.Broadcast256", opLen1(ssa.OpBroadcast256Uint16x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x4.Broadcast256", opLen1(ssa.OpBroadcast256Uint32x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x2.Broadcast256", opLen1(ssa.OpBroadcast256Uint64x2, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x4.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedFloat32x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x2.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedFloat64x2, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x16.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedInt8x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x8.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedInt16x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x4.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedInt32x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int64x2.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedInt64x2, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x16.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedUint8x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x8.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedUint16x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x4.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedUint32x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x2.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedUint64x2, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x4.Broadcast512", opLen1(ssa.OpBroadcast512Float32x4, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float64x2.Broadcast512", opLen1(ssa.OpBroadcast512Float64x2, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.Broadcast512", opLen1(ssa.OpBroadcast512Int8x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x8.Broadcast512", opLen1(ssa.OpBroadcast512Int16x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.Broadcast512", opLen1(ssa.OpBroadcast512Int32x4, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int64x2.Broadcast512", opLen1(ssa.OpBroadcast512Int64x2, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.Broadcast512", opLen1(ssa.OpBroadcast512Uint8x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x8.Broadcast512", opLen1(ssa.OpBroadcast512Uint16x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint32x4.Broadcast512", opLen1(ssa.OpBroadcast512Uint32x4, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint64x2.Broadcast512", opLen1(ssa.OpBroadcast512Uint64x2, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedFloat32x4, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float64x2.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedFloat64x2, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedInt8x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x8.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedInt16x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedInt32x4, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int64x2.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedInt64x2, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedUint8x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x8.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedUint16x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint32x4.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedUint32x4, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint64x2.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedUint64x2, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.Ceil", opLen1(ssa.OpCeilFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.Ceil", opLen1(ssa.OpCeilFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x2.Ceil", opLen1(ssa.OpCeilFloat64x2, types.TypeVec128), sys.AMD64)
@@ -1408,6 +1468,8 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Float64x2.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.SetElem", opLen2Imm8(ssa.OpSetElemFloat32x4, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Float64x2.SetElem", opLen2Imm8(ssa.OpSetElemFloat64x2, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Int8x16.SetElem", opLen2Imm8(ssa.OpSetElemInt8x16, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Int16x8.SetElem", opLen2Imm8(ssa.OpSetElemInt16x8, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Int32x4.SetElem", opLen2Imm8(ssa.OpSetElemInt32x4, types.TypeVec128, 0), sys.AMD64)

View File

@@ -87,6 +87,23 @@ var ternaryFlaky = &shapes{ // for tests that support flaky equality
floats: []int{32},
}
type templateData struct {
Vec string // the type of the vector, e.g. Float32x4
AOrAn string // for documentation, the article "a" or "an"
Width int // the bit width of the element type, e.g. 32
Vwidth int // the width of the vector type, e.g. 128
Count int // the number of elements, e.g. 4
WxC string // the width-by-type string, e.g., "32x4"
BxC string // as if bytes, in the proper count, e.g., "8x16" (W==8)
Base string // the capitalized Base Type of the vector, e.g., "Float"
Type string // the element type, e.g. "float32"
OxFF string // a mask for the lowest 'count' bits
}
func (t templateData) As128BitVec() string {
return fmt.Sprintf("%s%dx%d", t.Base, t.Width, 128/t.Width)
}
func oneTemplate(t *template.Template, baseType string, width, count int, out io.Writer) {
b := width * count
if b < 128 || b > 512 {
@@ -102,26 +119,17 @@ func oneTemplate(t *template.Template, baseType string, width, count int, out io
aOrAn = "an"
}
oxFF := fmt.Sprintf("0x%x", uint64((1<<count)-1))
t.Execute(out, struct {
Vec string // the type of the vector, e.g. Float32x4
AOrAn string // for documentation, the article "a" or "an"
Width int // the bit width of the element type, e.g. 32
Count int // the number of elements, e.g. 4
WxC string // the width-by-type string, e.g., "32x4"
BxC string // as if bytes, in the proper count, e.g., "8x16" (W==8)
Base string // the capitalized Base Type of the vector, e.g., "Float"
Type string // the element type, e.g. "float32"
OxFF string // a mask for the lowest 'count' bits
}{
Vec: vType,
AOrAn: aOrAn,
Width: width,
Count: count,
WxC: wxc,
BxC: bxc,
Base: BaseType,
Type: eType,
OxFF: oxFF,
t.Execute(out, templateData{
Vec: vType,
AOrAn: aOrAn,
Width: width,
Vwidth: b,
Count: count,
WxC: wxc,
BxC: bxc,
Base: BaseType,
Type: eType,
OxFF: oxFF,
})
}
@@ -480,7 +488,7 @@ func (x {{.Vec}}) StoreSlicePart(s []{{.Type}}) {
var unsafePATemplate = templateOf("unsafe PA helper", `
// pa{{.Vec}} returns a type-unsafe pointer to array that can
// only be used with partial load/store operations that only
// only be used with partial load/store operations that only
// access the known-safe portions of the array.
func pa{{.Vec}}(s []{{.Type}}) *[{{.Count}}]{{.Type}} {
return (*[{{.Count}}]{{.Type}})(unsafe.Pointer(&s[0]))
@@ -500,7 +508,7 @@ func (x {{.Vec}}) Masked(mask Mask{{.WxC}}) {{.Vec}} {
// Merge returns x but with elements set to y where mask is false.
func (x {{.Vec}}) Merge(y {{.Vec}}, mask Mask{{.WxC}}) {{.Vec}} {
{{- if eq .BxC .WxC }}
{{- if eq .BxC .WxC -}}
im := mask.AsInt{{.BxC}}()
{{- else}}
im := mask.AsInt{{.WxC}}().AsInt{{.BxC}}()
@@ -539,6 +547,32 @@ func (x {{.Vec}}) Merge(y {{.Vec}}, mask Mask{{.WxC}}) {{.Vec}} {
}
`)
func (t templateData) CPUfeatureBC() string {
switch t.Vwidth {
case 128:
return "AVX2"
case 256:
return "AVX2"
case 512:
if t.Width <= 16 {
return "AVX512BW"
}
return "AVX512F"
}
panic(fmt.Errorf("unexpected vector width %d", t.Vwidth))
}
var broadcastTemplate = templateOf("Broadcast functions", `
// Broadcast{{.Vec}} returns a vector with the input
// x assigned to all elements of the output.
//
// Emulated, CPU Feature {{.CPUfeatureBC}}
func Broadcast{{.Vec}}(x {{.Type}}) {{.Vec}} {
var z {{.As128BitVec }}
return z.SetElem(0, x).Broadcast{{.Vwidth}}()
}
`)
func main() {
sl := flag.String("sl", "slice_amd64.go", "file name for slice operations")
ush := flag.String("ush", "unsafe_helpers.go", "file name for unsafe helpers")
@@ -557,6 +591,7 @@ func main() {
avx2SmallLoadSlicePartTemplate,
avx2MaskedTemplate,
avx512MaskedTemplate,
broadcastTemplate,
)
}
if *ush != "" {

View File

@@ -1386,6 +1386,438 @@ func (x Uint16x16) AverageMasked(y Uint16x16, mask Mask16x16) Uint16x16
// Asm: VPAVGW, CPU Feature: AVX512
func (x Uint16x32) AverageMasked(y Uint16x32, mask Mask16x32) Uint16x32
/* Broadcast128 */
// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VBROADCASTSS, CPU Feature: AVX2
func (x Float32x4) Broadcast128() Float32x4
// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX2
func (x Float64x2) Broadcast128() Float64x2
// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTB, CPU Feature: AVX2
func (x Int8x16) Broadcast128() Int8x16
// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX2
func (x Int16x8) Broadcast128() Int16x8
// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX2
func (x Int32x4) Broadcast128() Int32x4
// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX2
func (x Int64x2) Broadcast128() Int64x2
// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTB, CPU Feature: AVX2
func (x Uint8x16) Broadcast128() Uint8x16
// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX2
func (x Uint16x8) Broadcast128() Uint16x8
// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX2
func (x Uint32x4) Broadcast128() Uint32x4
// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX2
func (x Uint64x2) Broadcast128() Uint64x2
/* Broadcast128Masked */
// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// This operation is applied selectively under a write mask.
//
// Asm: VBROADCASTSS, CPU Feature: AVX512
func (x Float32x4) Broadcast128Masked(mask Mask32x4) Float32x4
// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX512
func (x Float64x2) Broadcast128Masked(mask Mask64x2) Float64x2
// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPBROADCASTB, CPU Feature: AVX512
func (x Int8x16) Broadcast128Masked(mask Mask8x16) Int8x16
// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPBROADCASTW, CPU Feature: AVX512
func (x Int16x8) Broadcast128Masked(mask Mask16x8) Int16x8
// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPBROADCASTD, CPU Feature: AVX512
func (x Int32x4) Broadcast128Masked(mask Mask32x4) Int32x4
// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX512
func (x Int64x2) Broadcast128Masked(mask Mask64x2) Int64x2
// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPBROADCASTB, CPU Feature: AVX512
func (x Uint8x16) Broadcast128Masked(mask Mask8x16) Uint8x16
// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPBROADCASTW, CPU Feature: AVX512
func (x Uint16x8) Broadcast128Masked(mask Mask16x8) Uint16x8
// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPBROADCASTD, CPU Feature: AVX512
func (x Uint32x4) Broadcast128Masked(mask Mask32x4) Uint32x4
// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX512
func (x Uint64x2) Broadcast128Masked(mask Mask64x2) Uint64x2
/* Broadcast256 */
// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VBROADCASTSS, CPU Feature: AVX2
func (x Float32x4) Broadcast256() Float32x8
// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VBROADCASTSD, CPU Feature: AVX2
func (x Float64x2) Broadcast256() Float64x4
// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VPBROADCASTB, CPU Feature: AVX2
func (x Int8x16) Broadcast256() Int8x32
// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX2
func (x Int16x8) Broadcast256() Int16x16
// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX2
func (x Int32x4) Broadcast256() Int32x8
// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX2
func (x Int64x2) Broadcast256() Int64x4
// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VPBROADCASTB, CPU Feature: AVX2
func (x Uint8x16) Broadcast256() Uint8x32
// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX2
func (x Uint16x8) Broadcast256() Uint16x16
// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX2
func (x Uint32x4) Broadcast256() Uint32x8
// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX2
func (x Uint64x2) Broadcast256() Uint64x4
/* Broadcast256Masked */
// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// This operation is applied selectively under a write mask.
//
// Asm: VBROADCASTSS, CPU Feature: AVX512
func (x Float32x4) Broadcast256Masked(mask Mask32x4) Float32x8
// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// This operation is applied selectively under a write mask.
//
// Asm: VBROADCASTSD, CPU Feature: AVX512
func (x Float64x2) Broadcast256Masked(mask Mask64x2) Float64x4
// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPBROADCASTB, CPU Feature: AVX512
func (x Int8x16) Broadcast256Masked(mask Mask8x16) Int8x32
// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPBROADCASTW, CPU Feature: AVX512
func (x Int16x8) Broadcast256Masked(mask Mask16x8) Int16x16
// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPBROADCASTD, CPU Feature: AVX512
func (x Int32x4) Broadcast256Masked(mask Mask32x4) Int32x8
// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX512
func (x Int64x2) Broadcast256Masked(mask Mask64x2) Int64x4
// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPBROADCASTB, CPU Feature: AVX512
func (x Uint8x16) Broadcast256Masked(mask Mask8x16) Uint8x32
// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPBROADCASTW, CPU Feature: AVX512
func (x Uint16x8) Broadcast256Masked(mask Mask16x8) Uint16x16
// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPBROADCASTD, CPU Feature: AVX512
func (x Uint32x4) Broadcast256Masked(mask Mask32x4) Uint32x8
// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX512
func (x Uint64x2) Broadcast256Masked(mask Mask64x2) Uint64x4
/* Broadcast512 */
// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// Asm: VBROADCASTSS, CPU Feature: AVX512
func (x Float32x4) Broadcast512() Float32x16
// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// Asm: VBROADCASTSD, CPU Feature: AVX512
func (x Float64x2) Broadcast512() Float64x8
// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// Asm: VPBROADCASTB, CPU Feature: AVX512
func (x Int8x16) Broadcast512() Int8x64
// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX512
func (x Int16x8) Broadcast512() Int16x32
// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX512
func (x Int32x4) Broadcast512() Int32x16
// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX512
func (x Int64x2) Broadcast512() Int64x8
// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// Asm: VPBROADCASTB, CPU Feature: AVX512
func (x Uint8x16) Broadcast512() Uint8x64
// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX512
func (x Uint16x8) Broadcast512() Uint16x32
// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX512
func (x Uint32x4) Broadcast512() Uint32x16
// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX512
func (x Uint64x2) Broadcast512() Uint64x8
/* Broadcast512Masked */
// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// This operation is applied selectively under a write mask.
//
// Asm: VBROADCASTSS, CPU Feature: AVX512
func (x Float32x4) Broadcast512Masked(mask Mask32x4) Float32x16
// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// This operation is applied selectively under a write mask.
//
// Asm: VBROADCASTSD, CPU Feature: AVX512
func (x Float64x2) Broadcast512Masked(mask Mask64x2) Float64x8
// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPBROADCASTB, CPU Feature: AVX512
func (x Int8x16) Broadcast512Masked(mask Mask8x16) Int8x64
// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPBROADCASTW, CPU Feature: AVX512
func (x Int16x8) Broadcast512Masked(mask Mask16x8) Int16x32
// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPBROADCASTD, CPU Feature: AVX512
func (x Int32x4) Broadcast512Masked(mask Mask32x4) Int32x16
// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX512
func (x Int64x2) Broadcast512Masked(mask Mask64x2) Int64x8
// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPBROADCASTB, CPU Feature: AVX512
func (x Uint8x16) Broadcast512Masked(mask Mask8x16) Uint8x64
// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPBROADCASTW, CPU Feature: AVX512
func (x Uint16x8) Broadcast512Masked(mask Mask16x8) Uint16x32
// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPBROADCASTD, CPU Feature: AVX512
func (x Uint32x4) Broadcast512Masked(mask Mask32x4) Uint32x16
// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// This operation is applied selectively under a write mask.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX512
func (x Uint64x2) Broadcast512Masked(mask Mask64x2) Uint64x8
/* Ceil */
// Ceil rounds elements up to the nearest integer.
@@ -9116,6 +9548,20 @@ func (x Float64x8) ScaleMasked(y Float64x8, mask Mask64x8) Float64x8
/* SetElem */
// SetElem sets a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPINSRD, CPU Feature: AVX
func (x Float32x4) SetElem(index uint8, y float32) Float32x4
// SetElem sets a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPINSRQ, CPU Feature: AVX
func (x Float64x2) SetElem(index uint8, y float64) Float64x2
// SetElem sets a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.

View File

@@ -412,3 +412,15 @@ func TestRotateAllVariable(t *testing.T) {
}
}
}
func TestBroadcastUint32x4(t *testing.T) {
s := make([]uint32, 4, 4)
simd.BroadcastUint32x4(123456789).StoreSlice(s)
checkSlices(t, s, []uint32{123456789, 123456789, 123456789, 123456789})
}
func TestBroadcastFloat32x8(t *testing.T) {
s := make([]float32, 8, 8)
simd.BroadcastFloat32x8(123456789).StoreSlice(s)
checkSlices(t, s, []float32{123456789, 123456789, 123456789, 123456789, 123456789, 123456789, 123456789, 123456789})
}

View File

@@ -1499,3 +1499,273 @@ func (x Float64x8) Merge(y Float64x8, mask Mask64x8) Float64x8 {
iy := y.AsInt64x8()
return iy.blendMasked(ix, mask).AsFloat64x8()
}
// BroadcastInt8x16 returns a vector with the input
// x assigned to all elements of the output.
//
// Emulated, CPU Feature AVX2
func BroadcastInt8x16(x int8) Int8x16 {
var z Int8x16
return z.SetElem(0, x).Broadcast128()
}
// BroadcastInt16x8 returns a vector with the input
// x assigned to all elements of the output.
//
// Emulated, CPU Feature AVX2
func BroadcastInt16x8(x int16) Int16x8 {
var z Int16x8
return z.SetElem(0, x).Broadcast128()
}
// BroadcastInt32x4 returns a vector with the input
// x assigned to all elements of the output.
//
// Emulated, CPU Feature AVX2
func BroadcastInt32x4(x int32) Int32x4 {
var z Int32x4
return z.SetElem(0, x).Broadcast128()
}
// BroadcastInt64x2 returns a vector with the input
// x assigned to all elements of the output.
//
// Emulated, CPU Feature AVX2
func BroadcastInt64x2(x int64) Int64x2 {
var z Int64x2
return z.SetElem(0, x).Broadcast128()
}
// BroadcastUint8x16 returns a vector with the input
// x assigned to all elements of the output.
//
// Emulated, CPU Feature AVX2
func BroadcastUint8x16(x uint8) Uint8x16 {
var z Uint8x16
return z.SetElem(0, x).Broadcast128()
}
// BroadcastUint16x8 returns a vector with the input
// x assigned to all elements of the output.
//
// Emulated, CPU Feature AVX2
func BroadcastUint16x8(x uint16) Uint16x8 {
var z Uint16x8
return z.SetElem(0, x).Broadcast128()
}
// BroadcastUint32x4 returns a vector with the input
// x assigned to all elements of the output.
//
// Emulated, CPU Feature AVX2
func BroadcastUint32x4(x uint32) Uint32x4 {
var z Uint32x4
return z.SetElem(0, x).Broadcast128()
}
// BroadcastUint64x2 returns a vector with the input
// x assigned to all elements of the output.
//
// Emulated, CPU Feature AVX2
func BroadcastUint64x2(x uint64) Uint64x2 {
var z Uint64x2
return z.SetElem(0, x).Broadcast128()
}
// BroadcastFloat32x4 returns a vector with the input
// x assigned to all elements of the output.
//
// Emulated, CPU Feature AVX2
func BroadcastFloat32x4(x float32) Float32x4 {
var z Float32x4
return z.SetElem(0, x).Broadcast128()
}
// BroadcastFloat64x2 returns a vector with the input
// x assigned to all elements of the output.
//
// Emulated, CPU Feature AVX2
func BroadcastFloat64x2(x float64) Float64x2 {
var z Float64x2
return z.SetElem(0, x).Broadcast128()
}
// BroadcastInt8x32 returns a vector with the input
// x assigned to all elements of the output.
//
// Emulated, CPU Feature AVX2
func BroadcastInt8x32(x int8) Int8x32 {
var z Int8x16
return z.SetElem(0, x).Broadcast256()
}
// BroadcastInt16x16 returns a vector with the input
// x assigned to all elements of the output.
//
// Emulated, CPU Feature AVX2
func BroadcastInt16x16(x int16) Int16x16 {
var z Int16x8
return z.SetElem(0, x).Broadcast256()
}
// BroadcastInt32x8 returns a vector with the input
// x assigned to all elements of the output.
//
// Emulated, CPU Feature AVX2
func BroadcastInt32x8(x int32) Int32x8 {
var z Int32x4
return z.SetElem(0, x).Broadcast256()
}
// BroadcastInt64x4 returns a vector with the input
// x assigned to all elements of the output.
//
// Emulated, CPU Feature AVX2
func BroadcastInt64x4(x int64) Int64x4 {
var z Int64x2
return z.SetElem(0, x).Broadcast256()
}
// BroadcastUint8x32 returns a vector with the input
// x assigned to all elements of the output.
//
// Emulated, CPU Feature AVX2
func BroadcastUint8x32(x uint8) Uint8x32 {
var z Uint8x16
return z.SetElem(0, x).Broadcast256()
}
// BroadcastUint16x16 returns a vector with the input
// x assigned to all elements of the output.
//
// Emulated, CPU Feature AVX2
func BroadcastUint16x16(x uint16) Uint16x16 {
var z Uint16x8
return z.SetElem(0, x).Broadcast256()
}
// BroadcastUint32x8 returns a vector with the input
// x assigned to all elements of the output.
//
// Emulated, CPU Feature AVX2
func BroadcastUint32x8(x uint32) Uint32x8 {
var z Uint32x4
return z.SetElem(0, x).Broadcast256()
}
// BroadcastUint64x4 returns a vector with the input
// x assigned to all elements of the output.
//
// Emulated, CPU Feature AVX2
func BroadcastUint64x4(x uint64) Uint64x4 {
var z Uint64x2
return z.SetElem(0, x).Broadcast256()
}
// BroadcastFloat32x8 returns a vector with the input
// x assigned to all elements of the output.
//
// Emulated, CPU Feature AVX2
func BroadcastFloat32x8(x float32) Float32x8 {
var z Float32x4
return z.SetElem(0, x).Broadcast256()
}
// BroadcastFloat64x4 returns a vector with the input
// x assigned to all elements of the output.
//
// Emulated, CPU Feature AVX2
func BroadcastFloat64x4(x float64) Float64x4 {
var z Float64x2
return z.SetElem(0, x).Broadcast256()
}
// BroadcastInt8x64 returns a vector with the input
// x assigned to all elements of the output.
//
// Emulated, CPU Feature AVX512BW
func BroadcastInt8x64(x int8) Int8x64 {
var z Int8x16
return z.SetElem(0, x).Broadcast512()
}
// BroadcastInt16x32 returns a vector with the input
// x assigned to all elements of the output.
//
// Emulated, CPU Feature AVX512BW
func BroadcastInt16x32(x int16) Int16x32 {
var z Int16x8
return z.SetElem(0, x).Broadcast512()
}
// BroadcastInt32x16 returns a vector with the input
// x assigned to all elements of the output.
//
// Emulated, CPU Feature AVX512F
func BroadcastInt32x16(x int32) Int32x16 {
var z Int32x4
return z.SetElem(0, x).Broadcast512()
}
// BroadcastInt64x8 returns a vector with the input
// x assigned to all elements of the output.
//
// Emulated, CPU Feature AVX512F
func BroadcastInt64x8(x int64) Int64x8 {
var z Int64x2
return z.SetElem(0, x).Broadcast512()
}
// BroadcastUint8x64 returns a vector with the input
// x assigned to all elements of the output.
//
// Emulated, CPU Feature AVX512BW
func BroadcastUint8x64(x uint8) Uint8x64 {
var z Uint8x16
return z.SetElem(0, x).Broadcast512()
}
// BroadcastUint16x32 returns a vector with the input
// x assigned to all elements of the output.
//
// Emulated, CPU Feature AVX512BW
func BroadcastUint16x32(x uint16) Uint16x32 {
var z Uint16x8
return z.SetElem(0, x).Broadcast512()
}
// BroadcastUint32x16 returns a vector with the input
// x assigned to all elements of the output.
//
// Emulated, CPU Feature AVX512F
func BroadcastUint32x16(x uint32) Uint32x16 {
var z Uint32x4
return z.SetElem(0, x).Broadcast512()
}
// BroadcastUint64x8 returns a vector with the input
// x assigned to all elements of the output.
//
// Emulated, CPU Feature AVX512F
func BroadcastUint64x8(x uint64) Uint64x8 {
var z Uint64x2
return z.SetElem(0, x).Broadcast512()
}
// BroadcastFloat32x16 returns a vector with the input
// x assigned to all elements of the output.
//
// Emulated, CPU Feature AVX512F
func BroadcastFloat32x16(x float32) Float32x16 {
var z Float32x4
return z.SetElem(0, x).Broadcast512()
}
// BroadcastFloat64x8 returns a vector with the input
// x assigned to all elements of the output.
//
// Emulated, CPU Feature AVX512F
func BroadcastFloat64x8(x float64) Float64x8 {
var z Float64x2
return z.SetElem(0, x).Broadcast512()
}