mirror of
https://github.com/golang/go.git
synced 2026-01-29 07:02:05 +03:00
simd/archsimd: rename Broadcast methods
Currently the Broadcast128/256/512 methods broadcast the lowest element of the input vector to a vector of the corresponding width. There are also variations of broadcast operations that broadcast the whole (128- or 256-bit) vector to a larger vector, which we don't yet support. Our current naming is unclear which version it is, though. Rename the current ones to Broadcast1ToN, to be clear that they broadcast one element. The vector version probably will be named BoradcastAllToN (not included in this CL). Change-Id: I47a21e367f948ec0b578d63706a40d20f5a9f46d Reviewed-on: https://go-review.googlesource.com/c/go/+/734840 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Junyang Shao <shaojunyang@google.com>
This commit is contained in:
@@ -25,23 +25,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||
ssa.OpAMD64VPABSQ128,
|
||||
ssa.OpAMD64VPABSQ256,
|
||||
ssa.OpAMD64VPABSQ512,
|
||||
ssa.OpAMD64VBROADCASTSS128,
|
||||
ssa.OpAMD64VPBROADCASTQ128,
|
||||
ssa.OpAMD64VPBROADCASTB128,
|
||||
ssa.OpAMD64VPBROADCASTW128,
|
||||
ssa.OpAMD64VPBROADCASTD128,
|
||||
ssa.OpAMD64VBROADCASTSS256,
|
||||
ssa.OpAMD64VBROADCASTSS128,
|
||||
ssa.OpAMD64VBROADCASTSD256,
|
||||
ssa.OpAMD64VPBROADCASTB256,
|
||||
ssa.OpAMD64VPBROADCASTW256,
|
||||
ssa.OpAMD64VPBROADCASTD256,
|
||||
ssa.OpAMD64VPBROADCASTD128,
|
||||
ssa.OpAMD64VPBROADCASTQ256,
|
||||
ssa.OpAMD64VBROADCASTSS512,
|
||||
ssa.OpAMD64VBROADCASTSS256,
|
||||
ssa.OpAMD64VBROADCASTSD512,
|
||||
ssa.OpAMD64VPBROADCASTB512,
|
||||
ssa.OpAMD64VPBROADCASTW512,
|
||||
ssa.OpAMD64VPBROADCASTD512,
|
||||
ssa.OpAMD64VPBROADCASTW128,
|
||||
ssa.OpAMD64VPBROADCASTD256,
|
||||
ssa.OpAMD64VPBROADCASTQ512,
|
||||
ssa.OpAMD64VBROADCASTSS512,
|
||||
ssa.OpAMD64VPBROADCASTB128,
|
||||
ssa.OpAMD64VPBROADCASTW256,
|
||||
ssa.OpAMD64VPBROADCASTD512,
|
||||
ssa.OpAMD64VPBROADCASTB256,
|
||||
ssa.OpAMD64VPBROADCASTW512,
|
||||
ssa.OpAMD64VPBROADCASTB512,
|
||||
ssa.OpAMD64VCVTPD2PSX128,
|
||||
ssa.OpAMD64VCVTPD2PSY128,
|
||||
ssa.OpAMD64VCVTPD2PS256,
|
||||
@@ -832,23 +832,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||
ssa.OpAMD64VPABSQMasked128,
|
||||
ssa.OpAMD64VPABSQMasked256,
|
||||
ssa.OpAMD64VPABSQMasked512,
|
||||
ssa.OpAMD64VBROADCASTSSMasked128,
|
||||
ssa.OpAMD64VPBROADCASTQMasked128,
|
||||
ssa.OpAMD64VPBROADCASTBMasked128,
|
||||
ssa.OpAMD64VPBROADCASTWMasked128,
|
||||
ssa.OpAMD64VPBROADCASTDMasked128,
|
||||
ssa.OpAMD64VBROADCASTSSMasked256,
|
||||
ssa.OpAMD64VBROADCASTSSMasked128,
|
||||
ssa.OpAMD64VBROADCASTSDMasked256,
|
||||
ssa.OpAMD64VPBROADCASTBMasked256,
|
||||
ssa.OpAMD64VPBROADCASTWMasked256,
|
||||
ssa.OpAMD64VPBROADCASTDMasked256,
|
||||
ssa.OpAMD64VPBROADCASTDMasked128,
|
||||
ssa.OpAMD64VPBROADCASTQMasked256,
|
||||
ssa.OpAMD64VBROADCASTSSMasked512,
|
||||
ssa.OpAMD64VBROADCASTSSMasked256,
|
||||
ssa.OpAMD64VBROADCASTSDMasked512,
|
||||
ssa.OpAMD64VPBROADCASTBMasked512,
|
||||
ssa.OpAMD64VPBROADCASTWMasked512,
|
||||
ssa.OpAMD64VPBROADCASTDMasked512,
|
||||
ssa.OpAMD64VPBROADCASTWMasked128,
|
||||
ssa.OpAMD64VPBROADCASTDMasked256,
|
||||
ssa.OpAMD64VPBROADCASTQMasked512,
|
||||
ssa.OpAMD64VBROADCASTSSMasked512,
|
||||
ssa.OpAMD64VPBROADCASTBMasked128,
|
||||
ssa.OpAMD64VPBROADCASTWMasked256,
|
||||
ssa.OpAMD64VPBROADCASTDMasked512,
|
||||
ssa.OpAMD64VPBROADCASTBMasked256,
|
||||
ssa.OpAMD64VPBROADCASTWMasked512,
|
||||
ssa.OpAMD64VPBROADCASTBMasked512,
|
||||
ssa.OpAMD64VCOMPRESSPSMasked128,
|
||||
ssa.OpAMD64VCOMPRESSPSMasked256,
|
||||
ssa.OpAMD64VCOMPRESSPSMasked512,
|
||||
@@ -2460,23 +2460,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||
ssa.OpAMD64VPABSQMasked128Merging,
|
||||
ssa.OpAMD64VPABSQMasked256Merging,
|
||||
ssa.OpAMD64VPABSQMasked512Merging,
|
||||
ssa.OpAMD64VBROADCASTSSMasked128Merging,
|
||||
ssa.OpAMD64VPBROADCASTQMasked128Merging,
|
||||
ssa.OpAMD64VPBROADCASTBMasked128Merging,
|
||||
ssa.OpAMD64VPBROADCASTWMasked128Merging,
|
||||
ssa.OpAMD64VPBROADCASTDMasked128Merging,
|
||||
ssa.OpAMD64VBROADCASTSSMasked256Merging,
|
||||
ssa.OpAMD64VBROADCASTSSMasked128Merging,
|
||||
ssa.OpAMD64VBROADCASTSDMasked256Merging,
|
||||
ssa.OpAMD64VPBROADCASTBMasked256Merging,
|
||||
ssa.OpAMD64VPBROADCASTWMasked256Merging,
|
||||
ssa.OpAMD64VPBROADCASTDMasked256Merging,
|
||||
ssa.OpAMD64VPBROADCASTDMasked128Merging,
|
||||
ssa.OpAMD64VPBROADCASTQMasked256Merging,
|
||||
ssa.OpAMD64VBROADCASTSSMasked512Merging,
|
||||
ssa.OpAMD64VBROADCASTSSMasked256Merging,
|
||||
ssa.OpAMD64VBROADCASTSDMasked512Merging,
|
||||
ssa.OpAMD64VPBROADCASTBMasked512Merging,
|
||||
ssa.OpAMD64VPBROADCASTWMasked512Merging,
|
||||
ssa.OpAMD64VPBROADCASTDMasked512Merging,
|
||||
ssa.OpAMD64VPBROADCASTWMasked128Merging,
|
||||
ssa.OpAMD64VPBROADCASTDMasked256Merging,
|
||||
ssa.OpAMD64VPBROADCASTQMasked512Merging,
|
||||
ssa.OpAMD64VBROADCASTSSMasked512Merging,
|
||||
ssa.OpAMD64VPBROADCASTBMasked128Merging,
|
||||
ssa.OpAMD64VPBROADCASTWMasked256Merging,
|
||||
ssa.OpAMD64VPBROADCASTDMasked512Merging,
|
||||
ssa.OpAMD64VPBROADCASTBMasked256Merging,
|
||||
ssa.OpAMD64VPBROADCASTWMasked512Merging,
|
||||
ssa.OpAMD64VPBROADCASTBMasked512Merging,
|
||||
ssa.OpAMD64VRNDSCALEPSMasked128Merging,
|
||||
ssa.OpAMD64VRNDSCALEPSMasked256Merging,
|
||||
ssa.OpAMD64VRNDSCALEPSMasked512Merging,
|
||||
@@ -2817,23 +2817,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||
ssa.OpAMD64VPAVGWMasked128,
|
||||
ssa.OpAMD64VPAVGWMasked256,
|
||||
ssa.OpAMD64VPAVGWMasked512,
|
||||
ssa.OpAMD64VBROADCASTSSMasked128,
|
||||
ssa.OpAMD64VPBROADCASTQMasked128,
|
||||
ssa.OpAMD64VPBROADCASTBMasked128,
|
||||
ssa.OpAMD64VPBROADCASTWMasked128,
|
||||
ssa.OpAMD64VPBROADCASTDMasked128,
|
||||
ssa.OpAMD64VBROADCASTSSMasked256,
|
||||
ssa.OpAMD64VBROADCASTSSMasked128,
|
||||
ssa.OpAMD64VBROADCASTSDMasked256,
|
||||
ssa.OpAMD64VPBROADCASTBMasked256,
|
||||
ssa.OpAMD64VPBROADCASTWMasked256,
|
||||
ssa.OpAMD64VPBROADCASTDMasked256,
|
||||
ssa.OpAMD64VPBROADCASTDMasked128,
|
||||
ssa.OpAMD64VPBROADCASTQMasked256,
|
||||
ssa.OpAMD64VBROADCASTSSMasked512,
|
||||
ssa.OpAMD64VBROADCASTSSMasked256,
|
||||
ssa.OpAMD64VBROADCASTSDMasked512,
|
||||
ssa.OpAMD64VPBROADCASTBMasked512,
|
||||
ssa.OpAMD64VPBROADCASTWMasked512,
|
||||
ssa.OpAMD64VPBROADCASTDMasked512,
|
||||
ssa.OpAMD64VPBROADCASTWMasked128,
|
||||
ssa.OpAMD64VPBROADCASTDMasked256,
|
||||
ssa.OpAMD64VPBROADCASTQMasked512,
|
||||
ssa.OpAMD64VBROADCASTSSMasked512,
|
||||
ssa.OpAMD64VPBROADCASTBMasked128,
|
||||
ssa.OpAMD64VPBROADCASTWMasked256,
|
||||
ssa.OpAMD64VPBROADCASTDMasked512,
|
||||
ssa.OpAMD64VPBROADCASTBMasked256,
|
||||
ssa.OpAMD64VPBROADCASTWMasked512,
|
||||
ssa.OpAMD64VPBROADCASTBMasked512,
|
||||
ssa.OpAMD64VRNDSCALEPSMasked128,
|
||||
ssa.OpAMD64VRNDSCALEPSMasked128load,
|
||||
ssa.OpAMD64VRNDSCALEPSMasked256,
|
||||
|
||||
@@ -140,36 +140,36 @@
|
||||
(AverageUint16x8 ...) => (VPAVGW128 ...)
|
||||
(AverageUint16x16 ...) => (VPAVGW256 ...)
|
||||
(AverageUint16x32 ...) => (VPAVGW512 ...)
|
||||
(Broadcast128Float32x4 ...) => (VBROADCASTSS128 ...)
|
||||
(Broadcast128Float64x2 ...) => (VPBROADCASTQ128 ...)
|
||||
(Broadcast128Int8x16 ...) => (VPBROADCASTB128 ...)
|
||||
(Broadcast128Int16x8 ...) => (VPBROADCASTW128 ...)
|
||||
(Broadcast128Int32x4 ...) => (VPBROADCASTD128 ...)
|
||||
(Broadcast128Int64x2 ...) => (VPBROADCASTQ128 ...)
|
||||
(Broadcast128Uint8x16 ...) => (VPBROADCASTB128 ...)
|
||||
(Broadcast128Uint16x8 ...) => (VPBROADCASTW128 ...)
|
||||
(Broadcast128Uint32x4 ...) => (VPBROADCASTD128 ...)
|
||||
(Broadcast128Uint64x2 ...) => (VPBROADCASTQ128 ...)
|
||||
(Broadcast256Float32x4 ...) => (VBROADCASTSS256 ...)
|
||||
(Broadcast256Float64x2 ...) => (VBROADCASTSD256 ...)
|
||||
(Broadcast256Int8x16 ...) => (VPBROADCASTB256 ...)
|
||||
(Broadcast256Int16x8 ...) => (VPBROADCASTW256 ...)
|
||||
(Broadcast256Int32x4 ...) => (VPBROADCASTD256 ...)
|
||||
(Broadcast256Int64x2 ...) => (VPBROADCASTQ256 ...)
|
||||
(Broadcast256Uint8x16 ...) => (VPBROADCASTB256 ...)
|
||||
(Broadcast256Uint16x8 ...) => (VPBROADCASTW256 ...)
|
||||
(Broadcast256Uint32x4 ...) => (VPBROADCASTD256 ...)
|
||||
(Broadcast256Uint64x2 ...) => (VPBROADCASTQ256 ...)
|
||||
(Broadcast512Float32x4 ...) => (VBROADCASTSS512 ...)
|
||||
(Broadcast512Float64x2 ...) => (VBROADCASTSD512 ...)
|
||||
(Broadcast512Int8x16 ...) => (VPBROADCASTB512 ...)
|
||||
(Broadcast512Int16x8 ...) => (VPBROADCASTW512 ...)
|
||||
(Broadcast512Int32x4 ...) => (VPBROADCASTD512 ...)
|
||||
(Broadcast512Int64x2 ...) => (VPBROADCASTQ512 ...)
|
||||
(Broadcast512Uint8x16 ...) => (VPBROADCASTB512 ...)
|
||||
(Broadcast512Uint16x8 ...) => (VPBROADCASTW512 ...)
|
||||
(Broadcast512Uint32x4 ...) => (VPBROADCASTD512 ...)
|
||||
(Broadcast512Uint64x2 ...) => (VPBROADCASTQ512 ...)
|
||||
(Broadcast1To2Float64x2 ...) => (VPBROADCASTQ128 ...)
|
||||
(Broadcast1To2Int64x2 ...) => (VPBROADCASTQ128 ...)
|
||||
(Broadcast1To2Uint64x2 ...) => (VPBROADCASTQ128 ...)
|
||||
(Broadcast1To4Float32x4 ...) => (VBROADCASTSS128 ...)
|
||||
(Broadcast1To4Float64x2 ...) => (VBROADCASTSD256 ...)
|
||||
(Broadcast1To4Int32x4 ...) => (VPBROADCASTD128 ...)
|
||||
(Broadcast1To4Int64x2 ...) => (VPBROADCASTQ256 ...)
|
||||
(Broadcast1To4Uint32x4 ...) => (VPBROADCASTD128 ...)
|
||||
(Broadcast1To4Uint64x2 ...) => (VPBROADCASTQ256 ...)
|
||||
(Broadcast1To8Float32x4 ...) => (VBROADCASTSS256 ...)
|
||||
(Broadcast1To8Float64x2 ...) => (VBROADCASTSD512 ...)
|
||||
(Broadcast1To8Int16x8 ...) => (VPBROADCASTW128 ...)
|
||||
(Broadcast1To8Int32x4 ...) => (VPBROADCASTD256 ...)
|
||||
(Broadcast1To8Int64x2 ...) => (VPBROADCASTQ512 ...)
|
||||
(Broadcast1To8Uint16x8 ...) => (VPBROADCASTW128 ...)
|
||||
(Broadcast1To8Uint32x4 ...) => (VPBROADCASTD256 ...)
|
||||
(Broadcast1To8Uint64x2 ...) => (VPBROADCASTQ512 ...)
|
||||
(Broadcast1To16Float32x4 ...) => (VBROADCASTSS512 ...)
|
||||
(Broadcast1To16Int8x16 ...) => (VPBROADCASTB128 ...)
|
||||
(Broadcast1To16Int16x8 ...) => (VPBROADCASTW256 ...)
|
||||
(Broadcast1To16Int32x4 ...) => (VPBROADCASTD512 ...)
|
||||
(Broadcast1To16Uint8x16 ...) => (VPBROADCASTB128 ...)
|
||||
(Broadcast1To16Uint16x8 ...) => (VPBROADCASTW256 ...)
|
||||
(Broadcast1To16Uint32x4 ...) => (VPBROADCASTD512 ...)
|
||||
(Broadcast1To32Int8x16 ...) => (VPBROADCASTB256 ...)
|
||||
(Broadcast1To32Int16x8 ...) => (VPBROADCASTW512 ...)
|
||||
(Broadcast1To32Uint8x16 ...) => (VPBROADCASTB256 ...)
|
||||
(Broadcast1To32Uint16x8 ...) => (VPBROADCASTW512 ...)
|
||||
(Broadcast1To64Int8x16 ...) => (VPBROADCASTB512 ...)
|
||||
(Broadcast1To64Uint8x16 ...) => (VPBROADCASTB512 ...)
|
||||
(CeilFloat32x4 x) => (VROUNDPS128 [2] x)
|
||||
(CeilFloat32x8 x) => (VROUNDPS256 [2] x)
|
||||
(CeilFloat64x2 x) => (VROUNDPD128 [2] x)
|
||||
@@ -1424,23 +1424,23 @@
|
||||
(VMOVDQU16Masked128 (VPAVGW128 x y) mask) => (VPAVGWMasked128 x y mask)
|
||||
(VMOVDQU16Masked256 (VPAVGW256 x y) mask) => (VPAVGWMasked256 x y mask)
|
||||
(VMOVDQU16Masked512 (VPAVGW512 x y) mask) => (VPAVGWMasked512 x y mask)
|
||||
(VMOVDQU32Masked128 (VBROADCASTSS128 x) mask) => (VBROADCASTSSMasked128 x mask)
|
||||
(VMOVDQU64Masked128 (VPBROADCASTQ128 x) mask) => (VPBROADCASTQMasked128 x mask)
|
||||
(VMOVDQU8Masked128 (VPBROADCASTB128 x) mask) => (VPBROADCASTBMasked128 x mask)
|
||||
(VMOVDQU16Masked128 (VPBROADCASTW128 x) mask) => (VPBROADCASTWMasked128 x mask)
|
||||
(VMOVDQU32Masked128 (VPBROADCASTD128 x) mask) => (VPBROADCASTDMasked128 x mask)
|
||||
(VMOVDQU32Masked256 (VBROADCASTSS256 x) mask) => (VBROADCASTSSMasked256 x mask)
|
||||
(VMOVDQU32Masked128 (VBROADCASTSS128 x) mask) => (VBROADCASTSSMasked128 x mask)
|
||||
(VMOVDQU64Masked256 (VBROADCASTSD256 x) mask) => (VBROADCASTSDMasked256 x mask)
|
||||
(VMOVDQU8Masked256 (VPBROADCASTB256 x) mask) => (VPBROADCASTBMasked256 x mask)
|
||||
(VMOVDQU16Masked256 (VPBROADCASTW256 x) mask) => (VPBROADCASTWMasked256 x mask)
|
||||
(VMOVDQU32Masked256 (VPBROADCASTD256 x) mask) => (VPBROADCASTDMasked256 x mask)
|
||||
(VMOVDQU32Masked128 (VPBROADCASTD128 x) mask) => (VPBROADCASTDMasked128 x mask)
|
||||
(VMOVDQU64Masked256 (VPBROADCASTQ256 x) mask) => (VPBROADCASTQMasked256 x mask)
|
||||
(VMOVDQU32Masked512 (VBROADCASTSS512 x) mask) => (VBROADCASTSSMasked512 x mask)
|
||||
(VMOVDQU32Masked256 (VBROADCASTSS256 x) mask) => (VBROADCASTSSMasked256 x mask)
|
||||
(VMOVDQU64Masked512 (VBROADCASTSD512 x) mask) => (VBROADCASTSDMasked512 x mask)
|
||||
(VMOVDQU8Masked512 (VPBROADCASTB512 x) mask) => (VPBROADCASTBMasked512 x mask)
|
||||
(VMOVDQU16Masked512 (VPBROADCASTW512 x) mask) => (VPBROADCASTWMasked512 x mask)
|
||||
(VMOVDQU32Masked512 (VPBROADCASTD512 x) mask) => (VPBROADCASTDMasked512 x mask)
|
||||
(VMOVDQU16Masked128 (VPBROADCASTW128 x) mask) => (VPBROADCASTWMasked128 x mask)
|
||||
(VMOVDQU32Masked256 (VPBROADCASTD256 x) mask) => (VPBROADCASTDMasked256 x mask)
|
||||
(VMOVDQU64Masked512 (VPBROADCASTQ512 x) mask) => (VPBROADCASTQMasked512 x mask)
|
||||
(VMOVDQU32Masked512 (VBROADCASTSS512 x) mask) => (VBROADCASTSSMasked512 x mask)
|
||||
(VMOVDQU8Masked128 (VPBROADCASTB128 x) mask) => (VPBROADCASTBMasked128 x mask)
|
||||
(VMOVDQU16Masked256 (VPBROADCASTW256 x) mask) => (VPBROADCASTWMasked256 x mask)
|
||||
(VMOVDQU32Masked512 (VPBROADCASTD512 x) mask) => (VPBROADCASTDMasked512 x mask)
|
||||
(VMOVDQU8Masked256 (VPBROADCASTB256 x) mask) => (VPBROADCASTBMasked256 x mask)
|
||||
(VMOVDQU16Masked512 (VPBROADCASTW512 x) mask) => (VPBROADCASTWMasked512 x mask)
|
||||
(VMOVDQU8Masked512 (VPBROADCASTB512 x) mask) => (VPBROADCASTBMasked512 x mask)
|
||||
(VMOVDQU32Masked128 (VRNDSCALEPS128 [a] x) mask) => (VRNDSCALEPSMasked128 [a] x mask)
|
||||
(VMOVDQU32Masked256 (VRNDSCALEPS256 [a] x) mask) => (VRNDSCALEPSMasked256 [a] x mask)
|
||||
(VMOVDQU32Masked512 (VRNDSCALEPS512 [a] x) mask) => (VRNDSCALEPSMasked512 [a] x mask)
|
||||
|
||||
@@ -143,36 +143,36 @@ func simdGenericOps() []opData {
|
||||
{name: "AverageUint16x8", argLength: 2, commutative: true},
|
||||
{name: "AverageUint16x16", argLength: 2, commutative: true},
|
||||
{name: "AverageUint16x32", argLength: 2, commutative: true},
|
||||
{name: "Broadcast128Float32x4", argLength: 1, commutative: false},
|
||||
{name: "Broadcast128Float64x2", argLength: 1, commutative: false},
|
||||
{name: "Broadcast128Int8x16", argLength: 1, commutative: false},
|
||||
{name: "Broadcast128Int16x8", argLength: 1, commutative: false},
|
||||
{name: "Broadcast128Int32x4", argLength: 1, commutative: false},
|
||||
{name: "Broadcast128Int64x2", argLength: 1, commutative: false},
|
||||
{name: "Broadcast128Uint8x16", argLength: 1, commutative: false},
|
||||
{name: "Broadcast128Uint16x8", argLength: 1, commutative: false},
|
||||
{name: "Broadcast128Uint32x4", argLength: 1, commutative: false},
|
||||
{name: "Broadcast128Uint64x2", argLength: 1, commutative: false},
|
||||
{name: "Broadcast256Float32x4", argLength: 1, commutative: false},
|
||||
{name: "Broadcast256Float64x2", argLength: 1, commutative: false},
|
||||
{name: "Broadcast256Int8x16", argLength: 1, commutative: false},
|
||||
{name: "Broadcast256Int16x8", argLength: 1, commutative: false},
|
||||
{name: "Broadcast256Int32x4", argLength: 1, commutative: false},
|
||||
{name: "Broadcast256Int64x2", argLength: 1, commutative: false},
|
||||
{name: "Broadcast256Uint8x16", argLength: 1, commutative: false},
|
||||
{name: "Broadcast256Uint16x8", argLength: 1, commutative: false},
|
||||
{name: "Broadcast256Uint32x4", argLength: 1, commutative: false},
|
||||
{name: "Broadcast256Uint64x2", argLength: 1, commutative: false},
|
||||
{name: "Broadcast512Float32x4", argLength: 1, commutative: false},
|
||||
{name: "Broadcast512Float64x2", argLength: 1, commutative: false},
|
||||
{name: "Broadcast512Int8x16", argLength: 1, commutative: false},
|
||||
{name: "Broadcast512Int16x8", argLength: 1, commutative: false},
|
||||
{name: "Broadcast512Int32x4", argLength: 1, commutative: false},
|
||||
{name: "Broadcast512Int64x2", argLength: 1, commutative: false},
|
||||
{name: "Broadcast512Uint8x16", argLength: 1, commutative: false},
|
||||
{name: "Broadcast512Uint16x8", argLength: 1, commutative: false},
|
||||
{name: "Broadcast512Uint32x4", argLength: 1, commutative: false},
|
||||
{name: "Broadcast512Uint64x2", argLength: 1, commutative: false},
|
||||
{name: "Broadcast1To2Float64x2", argLength: 1, commutative: false},
|
||||
{name: "Broadcast1To2Int64x2", argLength: 1, commutative: false},
|
||||
{name: "Broadcast1To2Uint64x2", argLength: 1, commutative: false},
|
||||
{name: "Broadcast1To4Float32x4", argLength: 1, commutative: false},
|
||||
{name: "Broadcast1To4Float64x2", argLength: 1, commutative: false},
|
||||
{name: "Broadcast1To4Int32x4", argLength: 1, commutative: false},
|
||||
{name: "Broadcast1To4Int64x2", argLength: 1, commutative: false},
|
||||
{name: "Broadcast1To4Uint32x4", argLength: 1, commutative: false},
|
||||
{name: "Broadcast1To4Uint64x2", argLength: 1, commutative: false},
|
||||
{name: "Broadcast1To8Float32x4", argLength: 1, commutative: false},
|
||||
{name: "Broadcast1To8Float64x2", argLength: 1, commutative: false},
|
||||
{name: "Broadcast1To8Int16x8", argLength: 1, commutative: false},
|
||||
{name: "Broadcast1To8Int32x4", argLength: 1, commutative: false},
|
||||
{name: "Broadcast1To8Int64x2", argLength: 1, commutative: false},
|
||||
{name: "Broadcast1To8Uint16x8", argLength: 1, commutative: false},
|
||||
{name: "Broadcast1To8Uint32x4", argLength: 1, commutative: false},
|
||||
{name: "Broadcast1To8Uint64x2", argLength: 1, commutative: false},
|
||||
{name: "Broadcast1To16Float32x4", argLength: 1, commutative: false},
|
||||
{name: "Broadcast1To16Int8x16", argLength: 1, commutative: false},
|
||||
{name: "Broadcast1To16Int16x8", argLength: 1, commutative: false},
|
||||
{name: "Broadcast1To16Int32x4", argLength: 1, commutative: false},
|
||||
{name: "Broadcast1To16Uint8x16", argLength: 1, commutative: false},
|
||||
{name: "Broadcast1To16Uint16x8", argLength: 1, commutative: false},
|
||||
{name: "Broadcast1To16Uint32x4", argLength: 1, commutative: false},
|
||||
{name: "Broadcast1To32Int8x16", argLength: 1, commutative: false},
|
||||
{name: "Broadcast1To32Int16x8", argLength: 1, commutative: false},
|
||||
{name: "Broadcast1To32Uint8x16", argLength: 1, commutative: false},
|
||||
{name: "Broadcast1To32Uint16x8", argLength: 1, commutative: false},
|
||||
{name: "Broadcast1To64Int8x16", argLength: 1, commutative: false},
|
||||
{name: "Broadcast1To64Uint8x16", argLength: 1, commutative: false},
|
||||
{name: "CeilFloat32x4", argLength: 1, commutative: false},
|
||||
{name: "CeilFloat32x8", argLength: 1, commutative: false},
|
||||
{name: "CeilFloat64x2", argLength: 1, commutative: false},
|
||||
|
||||
@@ -6309,36 +6309,36 @@ const (
|
||||
OpAverageUint16x8
|
||||
OpAverageUint16x16
|
||||
OpAverageUint16x32
|
||||
OpBroadcast128Float32x4
|
||||
OpBroadcast128Float64x2
|
||||
OpBroadcast128Int8x16
|
||||
OpBroadcast128Int16x8
|
||||
OpBroadcast128Int32x4
|
||||
OpBroadcast128Int64x2
|
||||
OpBroadcast128Uint8x16
|
||||
OpBroadcast128Uint16x8
|
||||
OpBroadcast128Uint32x4
|
||||
OpBroadcast128Uint64x2
|
||||
OpBroadcast256Float32x4
|
||||
OpBroadcast256Float64x2
|
||||
OpBroadcast256Int8x16
|
||||
OpBroadcast256Int16x8
|
||||
OpBroadcast256Int32x4
|
||||
OpBroadcast256Int64x2
|
||||
OpBroadcast256Uint8x16
|
||||
OpBroadcast256Uint16x8
|
||||
OpBroadcast256Uint32x4
|
||||
OpBroadcast256Uint64x2
|
||||
OpBroadcast512Float32x4
|
||||
OpBroadcast512Float64x2
|
||||
OpBroadcast512Int8x16
|
||||
OpBroadcast512Int16x8
|
||||
OpBroadcast512Int32x4
|
||||
OpBroadcast512Int64x2
|
||||
OpBroadcast512Uint8x16
|
||||
OpBroadcast512Uint16x8
|
||||
OpBroadcast512Uint32x4
|
||||
OpBroadcast512Uint64x2
|
||||
OpBroadcast1To2Float64x2
|
||||
OpBroadcast1To2Int64x2
|
||||
OpBroadcast1To2Uint64x2
|
||||
OpBroadcast1To4Float32x4
|
||||
OpBroadcast1To4Float64x2
|
||||
OpBroadcast1To4Int32x4
|
||||
OpBroadcast1To4Int64x2
|
||||
OpBroadcast1To4Uint32x4
|
||||
OpBroadcast1To4Uint64x2
|
||||
OpBroadcast1To8Float32x4
|
||||
OpBroadcast1To8Float64x2
|
||||
OpBroadcast1To8Int16x8
|
||||
OpBroadcast1To8Int32x4
|
||||
OpBroadcast1To8Int64x2
|
||||
OpBroadcast1To8Uint16x8
|
||||
OpBroadcast1To8Uint32x4
|
||||
OpBroadcast1To8Uint64x2
|
||||
OpBroadcast1To16Float32x4
|
||||
OpBroadcast1To16Int8x16
|
||||
OpBroadcast1To16Int16x8
|
||||
OpBroadcast1To16Int32x4
|
||||
OpBroadcast1To16Uint8x16
|
||||
OpBroadcast1To16Uint16x8
|
||||
OpBroadcast1To16Uint32x4
|
||||
OpBroadcast1To32Int8x16
|
||||
OpBroadcast1To32Int16x8
|
||||
OpBroadcast1To32Uint8x16
|
||||
OpBroadcast1To32Uint16x8
|
||||
OpBroadcast1To64Int8x16
|
||||
OpBroadcast1To64Uint8x16
|
||||
OpCeilFloat32x4
|
||||
OpCeilFloat32x8
|
||||
OpCeilFloat64x2
|
||||
@@ -89875,152 +89875,152 @@ var opcodeTable = [...]opInfo{
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast128Float32x4",
|
||||
name: "Broadcast1To2Float64x2",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast128Float64x2",
|
||||
name: "Broadcast1To2Int64x2",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast128Int8x16",
|
||||
name: "Broadcast1To2Uint64x2",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast128Int16x8",
|
||||
name: "Broadcast1To4Float32x4",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast128Int32x4",
|
||||
name: "Broadcast1To4Float64x2",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast128Int64x2",
|
||||
name: "Broadcast1To4Int32x4",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast128Uint8x16",
|
||||
name: "Broadcast1To4Int64x2",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast128Uint16x8",
|
||||
name: "Broadcast1To4Uint32x4",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast128Uint32x4",
|
||||
name: "Broadcast1To4Uint64x2",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast128Uint64x2",
|
||||
name: "Broadcast1To8Float32x4",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast256Float32x4",
|
||||
name: "Broadcast1To8Float64x2",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast256Float64x2",
|
||||
name: "Broadcast1To8Int16x8",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast256Int8x16",
|
||||
name: "Broadcast1To8Int32x4",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast256Int16x8",
|
||||
name: "Broadcast1To8Int64x2",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast256Int32x4",
|
||||
name: "Broadcast1To8Uint16x8",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast256Int64x2",
|
||||
name: "Broadcast1To8Uint32x4",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast256Uint8x16",
|
||||
name: "Broadcast1To8Uint64x2",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast256Uint16x8",
|
||||
name: "Broadcast1To16Float32x4",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast256Uint32x4",
|
||||
name: "Broadcast1To16Int8x16",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast256Uint64x2",
|
||||
name: "Broadcast1To16Int16x8",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast512Float32x4",
|
||||
name: "Broadcast1To16Int32x4",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast512Float64x2",
|
||||
name: "Broadcast1To16Uint8x16",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast512Int8x16",
|
||||
name: "Broadcast1To16Uint16x8",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast512Int16x8",
|
||||
name: "Broadcast1To16Uint32x4",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast512Int32x4",
|
||||
name: "Broadcast1To32Int8x16",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast512Int64x2",
|
||||
name: "Broadcast1To32Int16x8",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast512Uint8x16",
|
||||
name: "Broadcast1To32Uint8x16",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast512Uint16x8",
|
||||
name: "Broadcast1To32Uint16x8",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast512Uint32x4",
|
||||
name: "Broadcast1To64Int8x16",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Broadcast512Uint64x2",
|
||||
name: "Broadcast1To64Uint8x16",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
|
||||
@@ -2479,96 +2479,96 @@ func rewriteValueAMD64(v *Value) bool {
|
||||
return rewriteValueAMD64_OpBitLen64(v)
|
||||
case OpBitLen8:
|
||||
return rewriteValueAMD64_OpBitLen8(v)
|
||||
case OpBroadcast128Float32x4:
|
||||
v.Op = OpAMD64VBROADCASTSS128
|
||||
return true
|
||||
case OpBroadcast128Float64x2:
|
||||
v.Op = OpAMD64VPBROADCASTQ128
|
||||
return true
|
||||
case OpBroadcast128Int16x8:
|
||||
v.Op = OpAMD64VPBROADCASTW128
|
||||
return true
|
||||
case OpBroadcast128Int32x4:
|
||||
v.Op = OpAMD64VPBROADCASTD128
|
||||
return true
|
||||
case OpBroadcast128Int64x2:
|
||||
v.Op = OpAMD64VPBROADCASTQ128
|
||||
return true
|
||||
case OpBroadcast128Int8x16:
|
||||
v.Op = OpAMD64VPBROADCASTB128
|
||||
return true
|
||||
case OpBroadcast128Uint16x8:
|
||||
v.Op = OpAMD64VPBROADCASTW128
|
||||
return true
|
||||
case OpBroadcast128Uint32x4:
|
||||
v.Op = OpAMD64VPBROADCASTD128
|
||||
return true
|
||||
case OpBroadcast128Uint64x2:
|
||||
v.Op = OpAMD64VPBROADCASTQ128
|
||||
return true
|
||||
case OpBroadcast128Uint8x16:
|
||||
v.Op = OpAMD64VPBROADCASTB128
|
||||
return true
|
||||
case OpBroadcast256Float32x4:
|
||||
v.Op = OpAMD64VBROADCASTSS256
|
||||
return true
|
||||
case OpBroadcast256Float64x2:
|
||||
v.Op = OpAMD64VBROADCASTSD256
|
||||
return true
|
||||
case OpBroadcast256Int16x8:
|
||||
v.Op = OpAMD64VPBROADCASTW256
|
||||
return true
|
||||
case OpBroadcast256Int32x4:
|
||||
v.Op = OpAMD64VPBROADCASTD256
|
||||
return true
|
||||
case OpBroadcast256Int64x2:
|
||||
v.Op = OpAMD64VPBROADCASTQ256
|
||||
return true
|
||||
case OpBroadcast256Int8x16:
|
||||
v.Op = OpAMD64VPBROADCASTB256
|
||||
return true
|
||||
case OpBroadcast256Uint16x8:
|
||||
v.Op = OpAMD64VPBROADCASTW256
|
||||
return true
|
||||
case OpBroadcast256Uint32x4:
|
||||
v.Op = OpAMD64VPBROADCASTD256
|
||||
return true
|
||||
case OpBroadcast256Uint64x2:
|
||||
v.Op = OpAMD64VPBROADCASTQ256
|
||||
return true
|
||||
case OpBroadcast256Uint8x16:
|
||||
v.Op = OpAMD64VPBROADCASTB256
|
||||
return true
|
||||
case OpBroadcast512Float32x4:
|
||||
case OpBroadcast1To16Float32x4:
|
||||
v.Op = OpAMD64VBROADCASTSS512
|
||||
return true
|
||||
case OpBroadcast512Float64x2:
|
||||
case OpBroadcast1To16Int16x8:
|
||||
v.Op = OpAMD64VPBROADCASTW256
|
||||
return true
|
||||
case OpBroadcast1To16Int32x4:
|
||||
v.Op = OpAMD64VPBROADCASTD512
|
||||
return true
|
||||
case OpBroadcast1To16Int8x16:
|
||||
v.Op = OpAMD64VPBROADCASTB128
|
||||
return true
|
||||
case OpBroadcast1To16Uint16x8:
|
||||
v.Op = OpAMD64VPBROADCASTW256
|
||||
return true
|
||||
case OpBroadcast1To16Uint32x4:
|
||||
v.Op = OpAMD64VPBROADCASTD512
|
||||
return true
|
||||
case OpBroadcast1To16Uint8x16:
|
||||
v.Op = OpAMD64VPBROADCASTB128
|
||||
return true
|
||||
case OpBroadcast1To2Float64x2:
|
||||
v.Op = OpAMD64VPBROADCASTQ128
|
||||
return true
|
||||
case OpBroadcast1To2Int64x2:
|
||||
v.Op = OpAMD64VPBROADCASTQ128
|
||||
return true
|
||||
case OpBroadcast1To2Uint64x2:
|
||||
v.Op = OpAMD64VPBROADCASTQ128
|
||||
return true
|
||||
case OpBroadcast1To32Int16x8:
|
||||
v.Op = OpAMD64VPBROADCASTW512
|
||||
return true
|
||||
case OpBroadcast1To32Int8x16:
|
||||
v.Op = OpAMD64VPBROADCASTB256
|
||||
return true
|
||||
case OpBroadcast1To32Uint16x8:
|
||||
v.Op = OpAMD64VPBROADCASTW512
|
||||
return true
|
||||
case OpBroadcast1To32Uint8x16:
|
||||
v.Op = OpAMD64VPBROADCASTB256
|
||||
return true
|
||||
case OpBroadcast1To4Float32x4:
|
||||
v.Op = OpAMD64VBROADCASTSS128
|
||||
return true
|
||||
case OpBroadcast1To4Float64x2:
|
||||
v.Op = OpAMD64VBROADCASTSD256
|
||||
return true
|
||||
case OpBroadcast1To4Int32x4:
|
||||
v.Op = OpAMD64VPBROADCASTD128
|
||||
return true
|
||||
case OpBroadcast1To4Int64x2:
|
||||
v.Op = OpAMD64VPBROADCASTQ256
|
||||
return true
|
||||
case OpBroadcast1To4Uint32x4:
|
||||
v.Op = OpAMD64VPBROADCASTD128
|
||||
return true
|
||||
case OpBroadcast1To4Uint64x2:
|
||||
v.Op = OpAMD64VPBROADCASTQ256
|
||||
return true
|
||||
case OpBroadcast1To64Int8x16:
|
||||
v.Op = OpAMD64VPBROADCASTB512
|
||||
return true
|
||||
case OpBroadcast1To64Uint8x16:
|
||||
v.Op = OpAMD64VPBROADCASTB512
|
||||
return true
|
||||
case OpBroadcast1To8Float32x4:
|
||||
v.Op = OpAMD64VBROADCASTSS256
|
||||
return true
|
||||
case OpBroadcast1To8Float64x2:
|
||||
v.Op = OpAMD64VBROADCASTSD512
|
||||
return true
|
||||
case OpBroadcast512Int16x8:
|
||||
v.Op = OpAMD64VPBROADCASTW512
|
||||
case OpBroadcast1To8Int16x8:
|
||||
v.Op = OpAMD64VPBROADCASTW128
|
||||
return true
|
||||
case OpBroadcast512Int32x4:
|
||||
v.Op = OpAMD64VPBROADCASTD512
|
||||
case OpBroadcast1To8Int32x4:
|
||||
v.Op = OpAMD64VPBROADCASTD256
|
||||
return true
|
||||
case OpBroadcast512Int64x2:
|
||||
case OpBroadcast1To8Int64x2:
|
||||
v.Op = OpAMD64VPBROADCASTQ512
|
||||
return true
|
||||
case OpBroadcast512Int8x16:
|
||||
v.Op = OpAMD64VPBROADCASTB512
|
||||
case OpBroadcast1To8Uint16x8:
|
||||
v.Op = OpAMD64VPBROADCASTW128
|
||||
return true
|
||||
case OpBroadcast512Uint16x8:
|
||||
v.Op = OpAMD64VPBROADCASTW512
|
||||
case OpBroadcast1To8Uint32x4:
|
||||
v.Op = OpAMD64VPBROADCASTD256
|
||||
return true
|
||||
case OpBroadcast512Uint32x4:
|
||||
v.Op = OpAMD64VPBROADCASTD512
|
||||
return true
|
||||
case OpBroadcast512Uint64x2:
|
||||
case OpBroadcast1To8Uint64x2:
|
||||
v.Op = OpAMD64VPBROADCASTQ512
|
||||
return true
|
||||
case OpBroadcast512Uint8x16:
|
||||
v.Op = OpAMD64VPBROADCASTB512
|
||||
return true
|
||||
case OpBswap16:
|
||||
return rewriteValueAMD64_OpBswap16(v)
|
||||
case OpBswap32:
|
||||
|
||||
@@ -152,36 +152,36 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
||||
addF(simdPackage, "Uint16x8.Average", opLen2(ssa.OpAverageUint16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x16.Average", opLen2(ssa.OpAverageUint16x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x32.Average", opLen2(ssa.OpAverageUint16x32, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.Broadcast128", opLen1(ssa.OpBroadcast128Float32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.Broadcast128", opLen1(ssa.OpBroadcast128Float64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.Broadcast128", opLen1(ssa.OpBroadcast128Int8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.Broadcast128", opLen1(ssa.OpBroadcast128Int16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.Broadcast128", opLen1(ssa.OpBroadcast128Int32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int64x2.Broadcast128", opLen1(ssa.OpBroadcast128Int64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.Broadcast128", opLen1(ssa.OpBroadcast128Uint8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.Broadcast128", opLen1(ssa.OpBroadcast128Uint16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.Broadcast128", opLen1(ssa.OpBroadcast128Uint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x2.Broadcast128", opLen1(ssa.OpBroadcast128Uint64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.Broadcast256", opLen1(ssa.OpBroadcast256Float32x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.Broadcast256", opLen1(ssa.OpBroadcast256Float64x2, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.Broadcast256", opLen1(ssa.OpBroadcast256Int8x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.Broadcast256", opLen1(ssa.OpBroadcast256Int16x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.Broadcast256", opLen1(ssa.OpBroadcast256Int32x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int64x2.Broadcast256", opLen1(ssa.OpBroadcast256Int64x2, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.Broadcast256", opLen1(ssa.OpBroadcast256Uint8x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.Broadcast256", opLen1(ssa.OpBroadcast256Uint16x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.Broadcast256", opLen1(ssa.OpBroadcast256Uint32x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x2.Broadcast256", opLen1(ssa.OpBroadcast256Uint64x2, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.Broadcast512", opLen1(ssa.OpBroadcast512Float32x4, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.Broadcast512", opLen1(ssa.OpBroadcast512Float64x2, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.Broadcast512", opLen1(ssa.OpBroadcast512Int8x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.Broadcast512", opLen1(ssa.OpBroadcast512Int16x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.Broadcast512", opLen1(ssa.OpBroadcast512Int32x4, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int64x2.Broadcast512", opLen1(ssa.OpBroadcast512Int64x2, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.Broadcast512", opLen1(ssa.OpBroadcast512Uint8x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.Broadcast512", opLen1(ssa.OpBroadcast512Uint16x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.Broadcast512", opLen1(ssa.OpBroadcast512Uint32x4, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x2.Broadcast512", opLen1(ssa.OpBroadcast512Uint64x2, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.Broadcast1To2", opLen1(ssa.OpBroadcast1To2Float64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int64x2.Broadcast1To2", opLen1(ssa.OpBroadcast1To2Int64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x2.Broadcast1To2", opLen1(ssa.OpBroadcast1To2Uint64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Float32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Float64x2, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Int32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int64x2.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Int64x2, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Uint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x2.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Uint64x2, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Float32x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Float64x2, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Int16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Int32x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int64x2.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Int64x2, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Uint16x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Uint32x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x2.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Uint64x2, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Float32x4, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Int8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Int16x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Int32x4, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Uint8x16, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Uint16x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Uint32x4, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.Broadcast1To32", opLen1(ssa.OpBroadcast1To32Int8x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int16x8.Broadcast1To32", opLen1(ssa.OpBroadcast1To32Int16x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.Broadcast1To32", opLen1(ssa.OpBroadcast1To32Uint8x16, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint16x8.Broadcast1To32", opLen1(ssa.OpBroadcast1To32Uint16x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.Broadcast1To64", opLen1(ssa.OpBroadcast1To64Int8x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint8x16.Broadcast1To64", opLen1(ssa.OpBroadcast1To64Uint8x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.Ceil", opLen1(ssa.OpCeilFloat32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.Ceil", opLen1(ssa.OpCeilFloat32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.Ceil", opLen1(ssa.OpCeilFloat64x2, types.TypeVec128), sys.AMD64)
|
||||
|
||||
@@ -69,21 +69,36 @@
|
||||
documentation: !string |-
|
||||
// NAME performs an expansion on a vector x whose elements are packed to lower parts.
|
||||
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
|
||||
- go: Broadcast128
|
||||
- go: Broadcast1To2
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME copies element zero of its (128-bit) input to all elements of
|
||||
// the 128-bit output vector.
|
||||
- go: Broadcast256
|
||||
// NAME copies the lowest element of its input to all 2 elements of
|
||||
// the output vector.
|
||||
- go: Broadcast1To4
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME copies element zero of its (128-bit) input to all elements of
|
||||
// the 256-bit output vector.
|
||||
- go: Broadcast512
|
||||
// NAME copies the lowest element of its input to all 4 elements of
|
||||
// the output vector.
|
||||
- go: Broadcast1To8
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME copies element zero of its (128-bit) input to all elements of
|
||||
// the 512-bit output vector.
|
||||
// NAME copies the lowest element of its input to all 8 elements of
|
||||
// the output vector.
|
||||
- go: Broadcast1To16
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME copies the lowest element of its input to all 16 elements of
|
||||
// the output vector.
|
||||
- go: Broadcast1To32
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME copies the lowest element of its input to all 32 elements of
|
||||
// the output vector.
|
||||
- go: Broadcast1To64
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME copies the lowest element of its input to all 64 elements of
|
||||
// the output vector.
|
||||
- go: PermuteOrZeroGrouped
|
||||
commutative: false
|
||||
documentation: !string |- # Detailed documentation will rely on the specific ops.
|
||||
|
||||
@@ -376,21 +376,21 @@
|
||||
out:
|
||||
- *any
|
||||
|
||||
- go: Broadcast128
|
||||
asm: VPBROADCAST[BWDQ]
|
||||
- go: Broadcast1To2
|
||||
asm: VPBROADCASTQ
|
||||
in:
|
||||
- class: vreg
|
||||
bits: 128
|
||||
elemBits: $e
|
||||
elemBits: 64
|
||||
base: $b
|
||||
out:
|
||||
- class: vreg
|
||||
bits: 128
|
||||
elemBits: $e
|
||||
elemBits: 64
|
||||
base: $b
|
||||
|
||||
# weirdly, this one case on AVX2 is memory-operand-only
|
||||
- go: Broadcast128
|
||||
- go: Broadcast1To2
|
||||
asm: VPBROADCASTQ
|
||||
in:
|
||||
- class: vreg
|
||||
@@ -405,70 +405,93 @@
|
||||
base: int
|
||||
OverwriteBase: float
|
||||
|
||||
- go: Broadcast256
|
||||
- go: Broadcast1To4
|
||||
asm: VPBROADCAST[BWDQ]
|
||||
in:
|
||||
- class: vreg
|
||||
bits: 128
|
||||
elemBits: $e
|
||||
base: $b
|
||||
out:
|
||||
- class: vreg
|
||||
bits: 256
|
||||
elemBits: $e
|
||||
lanes: 4
|
||||
base: $b
|
||||
|
||||
- go: Broadcast512
|
||||
- go: Broadcast1To8
|
||||
asm: VPBROADCAST[BWDQ]
|
||||
in:
|
||||
- class: vreg
|
||||
bits: 128
|
||||
elemBits: $e
|
||||
base: $b
|
||||
out:
|
||||
- class: vreg
|
||||
bits: 512
|
||||
elemBits: $e
|
||||
lanes: 8
|
||||
base: $b
|
||||
|
||||
- go: Broadcast128
|
||||
- go: Broadcast1To16
|
||||
asm: VPBROADCAST[BWDQ]
|
||||
in:
|
||||
- class: vreg
|
||||
bits: 128
|
||||
base: $b
|
||||
out:
|
||||
- class: vreg
|
||||
lanes: 16
|
||||
base: $b
|
||||
|
||||
- go: Broadcast1To32
|
||||
asm: VPBROADCAST[BWDQ]
|
||||
in:
|
||||
- class: vreg
|
||||
bits: 128
|
||||
base: $b
|
||||
out:
|
||||
- class: vreg
|
||||
lanes: 32
|
||||
base: $b
|
||||
|
||||
- go: Broadcast1To64
|
||||
asm: VPBROADCASTB
|
||||
in:
|
||||
- class: vreg
|
||||
bits: 128
|
||||
base: $b
|
||||
out:
|
||||
- class: vreg
|
||||
lanes: 64
|
||||
base: $b
|
||||
|
||||
- go: Broadcast1To4
|
||||
asm: VBROADCASTS[SD]
|
||||
in:
|
||||
- class: vreg
|
||||
bits: 128
|
||||
elemBits: $e
|
||||
base: $b
|
||||
base: float
|
||||
out:
|
||||
- class: vreg
|
||||
bits: 128
|
||||
elemBits: $e
|
||||
base: $b
|
||||
lanes: 4
|
||||
base: float
|
||||
|
||||
- go: Broadcast256
|
||||
- go: Broadcast1To8
|
||||
asm: VBROADCASTS[SD]
|
||||
in:
|
||||
- class: vreg
|
||||
bits: 128
|
||||
elemBits: $e
|
||||
base: $b
|
||||
base: float
|
||||
out:
|
||||
- class: vreg
|
||||
bits: 256
|
||||
elemBits: $e
|
||||
base: $b
|
||||
lanes: 8
|
||||
base: float
|
||||
|
||||
- go: Broadcast512
|
||||
- go: Broadcast1To16
|
||||
asm: VBROADCASTS[SD]
|
||||
in:
|
||||
- class: vreg
|
||||
bits: 128
|
||||
elemBits: $e
|
||||
base: $b
|
||||
base: float
|
||||
out:
|
||||
- class: vreg
|
||||
bits: 512
|
||||
elemBits: $e
|
||||
base: $b
|
||||
lanes: 16
|
||||
base: float
|
||||
|
||||
# VPSHUFB for 128-bit byte shuffles will be picked with higher priority than VPERMB, given its lower CPU feature requirement. (It's AVX)
|
||||
- go: PermuteOrZero
|
||||
|
||||
@@ -873,7 +873,7 @@ var broadcastTemplate = templateOf("Broadcast functions", `
|
||||
// Emulated, CPU Feature: {{.CPUfeatureBC}}
|
||||
func Broadcast{{.VType}}(x {{.Etype}}) {{.VType}} {
|
||||
var z {{.As128BitVec }}
|
||||
return z.SetElem(0, x).Broadcast{{.Vwidth}}()
|
||||
return z.SetElem(0, x).Broadcast1To{{.Count}}()
|
||||
}
|
||||
`)
|
||||
|
||||
|
||||
@@ -805,191 +805,197 @@ func (x Uint16x16) Average(y Uint16x16) Uint16x16
|
||||
// Asm: VPAVGW, CPU Feature: AVX512
|
||||
func (x Uint16x32) Average(y Uint16x32) Uint16x32
|
||||
|
||||
/* Broadcast128 */
|
||||
/* Broadcast1To2 */
|
||||
|
||||
// Broadcast128 copies element zero of its (128-bit) input to all elements of
|
||||
// the 128-bit output vector.
|
||||
// Broadcast1To2 copies the lowest element of its input to all 2 elements of
|
||||
// the output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTQ, CPU Feature: AVX2
|
||||
func (x Float64x2) Broadcast1To2() Float64x2
|
||||
|
||||
// Broadcast1To2 copies the lowest element of its input to all 2 elements of
|
||||
// the output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTQ, CPU Feature: AVX2
|
||||
func (x Int64x2) Broadcast1To2() Int64x2
|
||||
|
||||
// Broadcast1To2 copies the lowest element of its input to all 2 elements of
|
||||
// the output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTQ, CPU Feature: AVX2
|
||||
func (x Uint64x2) Broadcast1To2() Uint64x2
|
||||
|
||||
/* Broadcast1To4 */
|
||||
|
||||
// Broadcast1To4 copies the lowest element of its input to all 4 elements of
|
||||
// the output vector.
|
||||
//
|
||||
// Asm: VBROADCASTSS, CPU Feature: AVX2
|
||||
func (x Float32x4) Broadcast128() Float32x4
|
||||
func (x Float32x4) Broadcast1To4() Float32x4
|
||||
|
||||
// Broadcast128 copies element zero of its (128-bit) input to all elements of
|
||||
// the 128-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTQ, CPU Feature: AVX2
|
||||
func (x Float64x2) Broadcast128() Float64x2
|
||||
|
||||
// Broadcast128 copies element zero of its (128-bit) input to all elements of
|
||||
// the 128-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTB, CPU Feature: AVX2
|
||||
func (x Int8x16) Broadcast128() Int8x16
|
||||
|
||||
// Broadcast128 copies element zero of its (128-bit) input to all elements of
|
||||
// the 128-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTW, CPU Feature: AVX2
|
||||
func (x Int16x8) Broadcast128() Int16x8
|
||||
|
||||
// Broadcast128 copies element zero of its (128-bit) input to all elements of
|
||||
// the 128-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTD, CPU Feature: AVX2
|
||||
func (x Int32x4) Broadcast128() Int32x4
|
||||
|
||||
// Broadcast128 copies element zero of its (128-bit) input to all elements of
|
||||
// the 128-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTQ, CPU Feature: AVX2
|
||||
func (x Int64x2) Broadcast128() Int64x2
|
||||
|
||||
// Broadcast128 copies element zero of its (128-bit) input to all elements of
|
||||
// the 128-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTB, CPU Feature: AVX2
|
||||
func (x Uint8x16) Broadcast128() Uint8x16
|
||||
|
||||
// Broadcast128 copies element zero of its (128-bit) input to all elements of
|
||||
// the 128-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTW, CPU Feature: AVX2
|
||||
func (x Uint16x8) Broadcast128() Uint16x8
|
||||
|
||||
// Broadcast128 copies element zero of its (128-bit) input to all elements of
|
||||
// the 128-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTD, CPU Feature: AVX2
|
||||
func (x Uint32x4) Broadcast128() Uint32x4
|
||||
|
||||
// Broadcast128 copies element zero of its (128-bit) input to all elements of
|
||||
// the 128-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTQ, CPU Feature: AVX2
|
||||
func (x Uint64x2) Broadcast128() Uint64x2
|
||||
|
||||
/* Broadcast256 */
|
||||
|
||||
// Broadcast256 copies element zero of its (128-bit) input to all elements of
|
||||
// the 256-bit output vector.
|
||||
//
|
||||
// Asm: VBROADCASTSS, CPU Feature: AVX2
|
||||
func (x Float32x4) Broadcast256() Float32x8
|
||||
|
||||
// Broadcast256 copies element zero of its (128-bit) input to all elements of
|
||||
// the 256-bit output vector.
|
||||
// Broadcast1To4 copies the lowest element of its input to all 4 elements of
|
||||
// the output vector.
|
||||
//
|
||||
// Asm: VBROADCASTSD, CPU Feature: AVX2
|
||||
func (x Float64x2) Broadcast256() Float64x4
|
||||
func (x Float64x2) Broadcast1To4() Float64x4
|
||||
|
||||
// Broadcast256 copies element zero of its (128-bit) input to all elements of
|
||||
// the 256-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTB, CPU Feature: AVX2
|
||||
func (x Int8x16) Broadcast256() Int8x32
|
||||
|
||||
// Broadcast256 copies element zero of its (128-bit) input to all elements of
|
||||
// the 256-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTW, CPU Feature: AVX2
|
||||
func (x Int16x8) Broadcast256() Int16x16
|
||||
|
||||
// Broadcast256 copies element zero of its (128-bit) input to all elements of
|
||||
// the 256-bit output vector.
|
||||
// Broadcast1To4 copies the lowest element of its input to all 4 elements of
|
||||
// the output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTD, CPU Feature: AVX2
|
||||
func (x Int32x4) Broadcast256() Int32x8
|
||||
func (x Int32x4) Broadcast1To4() Int32x4
|
||||
|
||||
// Broadcast256 copies element zero of its (128-bit) input to all elements of
|
||||
// the 256-bit output vector.
|
||||
// Broadcast1To4 copies the lowest element of its input to all 4 elements of
|
||||
// the output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTQ, CPU Feature: AVX2
|
||||
func (x Int64x2) Broadcast256() Int64x4
|
||||
func (x Int64x2) Broadcast1To4() Int64x4
|
||||
|
||||
// Broadcast256 copies element zero of its (128-bit) input to all elements of
|
||||
// the 256-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTB, CPU Feature: AVX2
|
||||
func (x Uint8x16) Broadcast256() Uint8x32
|
||||
|
||||
// Broadcast256 copies element zero of its (128-bit) input to all elements of
|
||||
// the 256-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTW, CPU Feature: AVX2
|
||||
func (x Uint16x8) Broadcast256() Uint16x16
|
||||
|
||||
// Broadcast256 copies element zero of its (128-bit) input to all elements of
|
||||
// the 256-bit output vector.
|
||||
// Broadcast1To4 copies the lowest element of its input to all 4 elements of
|
||||
// the output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTD, CPU Feature: AVX2
|
||||
func (x Uint32x4) Broadcast256() Uint32x8
|
||||
func (x Uint32x4) Broadcast1To4() Uint32x4
|
||||
|
||||
// Broadcast256 copies element zero of its (128-bit) input to all elements of
|
||||
// the 256-bit output vector.
|
||||
// Broadcast1To4 copies the lowest element of its input to all 4 elements of
|
||||
// the output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTQ, CPU Feature: AVX2
|
||||
func (x Uint64x2) Broadcast256() Uint64x4
|
||||
func (x Uint64x2) Broadcast1To4() Uint64x4
|
||||
|
||||
/* Broadcast512 */
|
||||
/* Broadcast1To8 */
|
||||
|
||||
// Broadcast512 copies element zero of its (128-bit) input to all elements of
|
||||
// the 512-bit output vector.
|
||||
// Broadcast1To8 copies the lowest element of its input to all 8 elements of
|
||||
// the output vector.
|
||||
//
|
||||
// Asm: VBROADCASTSS, CPU Feature: AVX512
|
||||
func (x Float32x4) Broadcast512() Float32x16
|
||||
// Asm: VBROADCASTSS, CPU Feature: AVX2
|
||||
func (x Float32x4) Broadcast1To8() Float32x8
|
||||
|
||||
// Broadcast512 copies element zero of its (128-bit) input to all elements of
|
||||
// the 512-bit output vector.
|
||||
// Broadcast1To8 copies the lowest element of its input to all 8 elements of
|
||||
// the output vector.
|
||||
//
|
||||
// Asm: VBROADCASTSD, CPU Feature: AVX512
|
||||
func (x Float64x2) Broadcast512() Float64x8
|
||||
func (x Float64x2) Broadcast1To8() Float64x8
|
||||
|
||||
// Broadcast512 copies element zero of its (128-bit) input to all elements of
|
||||
// the 512-bit output vector.
|
||||
// Broadcast1To8 copies the lowest element of its input to all 8 elements of
|
||||
// the output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTB, CPU Feature: AVX512
|
||||
func (x Int8x16) Broadcast512() Int8x64
|
||||
// Asm: VPBROADCASTW, CPU Feature: AVX2
|
||||
func (x Int16x8) Broadcast1To8() Int16x8
|
||||
|
||||
// Broadcast512 copies element zero of its (128-bit) input to all elements of
|
||||
// the 512-bit output vector.
|
||||
// Broadcast1To8 copies the lowest element of its input to all 8 elements of
|
||||
// the output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTW, CPU Feature: AVX512
|
||||
func (x Int16x8) Broadcast512() Int16x32
|
||||
// Asm: VPBROADCASTD, CPU Feature: AVX2
|
||||
func (x Int32x4) Broadcast1To8() Int32x8
|
||||
|
||||
// Broadcast512 copies element zero of its (128-bit) input to all elements of
|
||||
// the 512-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTD, CPU Feature: AVX512
|
||||
func (x Int32x4) Broadcast512() Int32x16
|
||||
|
||||
// Broadcast512 copies element zero of its (128-bit) input to all elements of
|
||||
// the 512-bit output vector.
|
||||
// Broadcast1To8 copies the lowest element of its input to all 8 elements of
|
||||
// the output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTQ, CPU Feature: AVX512
|
||||
func (x Int64x2) Broadcast512() Int64x8
|
||||
func (x Int64x2) Broadcast1To8() Int64x8
|
||||
|
||||
// Broadcast512 copies element zero of its (128-bit) input to all elements of
|
||||
// the 512-bit output vector.
|
||||
// Broadcast1To8 copies the lowest element of its input to all 8 elements of
|
||||
// the output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTB, CPU Feature: AVX512
|
||||
func (x Uint8x16) Broadcast512() Uint8x64
|
||||
// Asm: VPBROADCASTW, CPU Feature: AVX2
|
||||
func (x Uint16x8) Broadcast1To8() Uint16x8
|
||||
|
||||
// Broadcast512 copies element zero of its (128-bit) input to all elements of
|
||||
// the 512-bit output vector.
|
||||
// Broadcast1To8 copies the lowest element of its input to all 8 elements of
|
||||
// the output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTW, CPU Feature: AVX512
|
||||
func (x Uint16x8) Broadcast512() Uint16x32
|
||||
// Asm: VPBROADCASTD, CPU Feature: AVX2
|
||||
func (x Uint32x4) Broadcast1To8() Uint32x8
|
||||
|
||||
// Broadcast512 copies element zero of its (128-bit) input to all elements of
|
||||
// the 512-bit output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTD, CPU Feature: AVX512
|
||||
func (x Uint32x4) Broadcast512() Uint32x16
|
||||
|
||||
// Broadcast512 copies element zero of its (128-bit) input to all elements of
|
||||
// the 512-bit output vector.
|
||||
// Broadcast1To8 copies the lowest element of its input to all 8 elements of
|
||||
// the output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTQ, CPU Feature: AVX512
|
||||
func (x Uint64x2) Broadcast512() Uint64x8
|
||||
func (x Uint64x2) Broadcast1To8() Uint64x8
|
||||
|
||||
/* Broadcast1To16 */
|
||||
|
||||
// Broadcast1To16 copies the lowest element of its input to all 16 elements of
|
||||
// the output vector.
|
||||
//
|
||||
// Asm: VBROADCASTSS, CPU Feature: AVX512
|
||||
func (x Float32x4) Broadcast1To16() Float32x16
|
||||
|
||||
// Broadcast1To16 copies the lowest element of its input to all 16 elements of
|
||||
// the output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTB, CPU Feature: AVX2
|
||||
func (x Int8x16) Broadcast1To16() Int8x16
|
||||
|
||||
// Broadcast1To16 copies the lowest element of its input to all 16 elements of
|
||||
// the output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTW, CPU Feature: AVX2
|
||||
func (x Int16x8) Broadcast1To16() Int16x16
|
||||
|
||||
// Broadcast1To16 copies the lowest element of its input to all 16 elements of
|
||||
// the output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTD, CPU Feature: AVX512
|
||||
func (x Int32x4) Broadcast1To16() Int32x16
|
||||
|
||||
// Broadcast1To16 copies the lowest element of its input to all 16 elements of
|
||||
// the output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTB, CPU Feature: AVX2
|
||||
func (x Uint8x16) Broadcast1To16() Uint8x16
|
||||
|
||||
// Broadcast1To16 copies the lowest element of its input to all 16 elements of
|
||||
// the output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTW, CPU Feature: AVX2
|
||||
func (x Uint16x8) Broadcast1To16() Uint16x16
|
||||
|
||||
// Broadcast1To16 copies the lowest element of its input to all 16 elements of
|
||||
// the output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTD, CPU Feature: AVX512
|
||||
func (x Uint32x4) Broadcast1To16() Uint32x16
|
||||
|
||||
/* Broadcast1To32 */
|
||||
|
||||
// Broadcast1To32 copies the lowest element of its input to all 32 elements of
|
||||
// the output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTB, CPU Feature: AVX2
|
||||
func (x Int8x16) Broadcast1To32() Int8x32
|
||||
|
||||
// Broadcast1To32 copies the lowest element of its input to all 32 elements of
|
||||
// the output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTW, CPU Feature: AVX512
|
||||
func (x Int16x8) Broadcast1To32() Int16x32
|
||||
|
||||
// Broadcast1To32 copies the lowest element of its input to all 32 elements of
|
||||
// the output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTB, CPU Feature: AVX2
|
||||
func (x Uint8x16) Broadcast1To32() Uint8x32
|
||||
|
||||
// Broadcast1To32 copies the lowest element of its input to all 32 elements of
|
||||
// the output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTW, CPU Feature: AVX512
|
||||
func (x Uint16x8) Broadcast1To32() Uint16x32
|
||||
|
||||
/* Broadcast1To64 */
|
||||
|
||||
// Broadcast1To64 copies the lowest element of its input to all 64 elements of
|
||||
// the output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTB, CPU Feature: AVX512
|
||||
func (x Int8x16) Broadcast1To64() Int8x64
|
||||
|
||||
// Broadcast1To64 copies the lowest element of its input to all 64 elements of
|
||||
// the output vector.
|
||||
//
|
||||
// Asm: VPBROADCASTB, CPU Feature: AVX512
|
||||
func (x Uint8x16) Broadcast1To64() Uint8x64
|
||||
|
||||
/* Ceil */
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ package archsimd
|
||||
// Emulated, CPU Feature: AVX2
|
||||
func BroadcastInt8x16(x int8) Int8x16 {
|
||||
var z Int8x16
|
||||
return z.SetElem(0, x).Broadcast128()
|
||||
return z.SetElem(0, x).Broadcast1To16()
|
||||
}
|
||||
|
||||
// BroadcastInt16x8 returns a vector with the input
|
||||
@@ -19,7 +19,7 @@ func BroadcastInt8x16(x int8) Int8x16 {
|
||||
// Emulated, CPU Feature: AVX2
|
||||
func BroadcastInt16x8(x int16) Int16x8 {
|
||||
var z Int16x8
|
||||
return z.SetElem(0, x).Broadcast128()
|
||||
return z.SetElem(0, x).Broadcast1To8()
|
||||
}
|
||||
|
||||
// BroadcastInt32x4 returns a vector with the input
|
||||
@@ -28,7 +28,7 @@ func BroadcastInt16x8(x int16) Int16x8 {
|
||||
// Emulated, CPU Feature: AVX2
|
||||
func BroadcastInt32x4(x int32) Int32x4 {
|
||||
var z Int32x4
|
||||
return z.SetElem(0, x).Broadcast128()
|
||||
return z.SetElem(0, x).Broadcast1To4()
|
||||
}
|
||||
|
||||
// BroadcastInt64x2 returns a vector with the input
|
||||
@@ -37,7 +37,7 @@ func BroadcastInt32x4(x int32) Int32x4 {
|
||||
// Emulated, CPU Feature: AVX2
|
||||
func BroadcastInt64x2(x int64) Int64x2 {
|
||||
var z Int64x2
|
||||
return z.SetElem(0, x).Broadcast128()
|
||||
return z.SetElem(0, x).Broadcast1To2()
|
||||
}
|
||||
|
||||
// BroadcastUint8x16 returns a vector with the input
|
||||
@@ -46,7 +46,7 @@ func BroadcastInt64x2(x int64) Int64x2 {
|
||||
// Emulated, CPU Feature: AVX2
|
||||
func BroadcastUint8x16(x uint8) Uint8x16 {
|
||||
var z Uint8x16
|
||||
return z.SetElem(0, x).Broadcast128()
|
||||
return z.SetElem(0, x).Broadcast1To16()
|
||||
}
|
||||
|
||||
// BroadcastUint16x8 returns a vector with the input
|
||||
@@ -55,7 +55,7 @@ func BroadcastUint8x16(x uint8) Uint8x16 {
|
||||
// Emulated, CPU Feature: AVX2
|
||||
func BroadcastUint16x8(x uint16) Uint16x8 {
|
||||
var z Uint16x8
|
||||
return z.SetElem(0, x).Broadcast128()
|
||||
return z.SetElem(0, x).Broadcast1To8()
|
||||
}
|
||||
|
||||
// BroadcastUint32x4 returns a vector with the input
|
||||
@@ -64,7 +64,7 @@ func BroadcastUint16x8(x uint16) Uint16x8 {
|
||||
// Emulated, CPU Feature: AVX2
|
||||
func BroadcastUint32x4(x uint32) Uint32x4 {
|
||||
var z Uint32x4
|
||||
return z.SetElem(0, x).Broadcast128()
|
||||
return z.SetElem(0, x).Broadcast1To4()
|
||||
}
|
||||
|
||||
// BroadcastUint64x2 returns a vector with the input
|
||||
@@ -73,7 +73,7 @@ func BroadcastUint32x4(x uint32) Uint32x4 {
|
||||
// Emulated, CPU Feature: AVX2
|
||||
func BroadcastUint64x2(x uint64) Uint64x2 {
|
||||
var z Uint64x2
|
||||
return z.SetElem(0, x).Broadcast128()
|
||||
return z.SetElem(0, x).Broadcast1To2()
|
||||
}
|
||||
|
||||
// BroadcastFloat32x4 returns a vector with the input
|
||||
@@ -82,7 +82,7 @@ func BroadcastUint64x2(x uint64) Uint64x2 {
|
||||
// Emulated, CPU Feature: AVX2
|
||||
func BroadcastFloat32x4(x float32) Float32x4 {
|
||||
var z Float32x4
|
||||
return z.SetElem(0, x).Broadcast128()
|
||||
return z.SetElem(0, x).Broadcast1To4()
|
||||
}
|
||||
|
||||
// BroadcastFloat64x2 returns a vector with the input
|
||||
@@ -91,7 +91,7 @@ func BroadcastFloat32x4(x float32) Float32x4 {
|
||||
// Emulated, CPU Feature: AVX2
|
||||
func BroadcastFloat64x2(x float64) Float64x2 {
|
||||
var z Float64x2
|
||||
return z.SetElem(0, x).Broadcast128()
|
||||
return z.SetElem(0, x).Broadcast1To2()
|
||||
}
|
||||
|
||||
// BroadcastInt8x32 returns a vector with the input
|
||||
@@ -100,7 +100,7 @@ func BroadcastFloat64x2(x float64) Float64x2 {
|
||||
// Emulated, CPU Feature: AVX2
|
||||
func BroadcastInt8x32(x int8) Int8x32 {
|
||||
var z Int8x16
|
||||
return z.SetElem(0, x).Broadcast256()
|
||||
return z.SetElem(0, x).Broadcast1To32()
|
||||
}
|
||||
|
||||
// BroadcastInt16x16 returns a vector with the input
|
||||
@@ -109,7 +109,7 @@ func BroadcastInt8x32(x int8) Int8x32 {
|
||||
// Emulated, CPU Feature: AVX2
|
||||
func BroadcastInt16x16(x int16) Int16x16 {
|
||||
var z Int16x8
|
||||
return z.SetElem(0, x).Broadcast256()
|
||||
return z.SetElem(0, x).Broadcast1To16()
|
||||
}
|
||||
|
||||
// BroadcastInt32x8 returns a vector with the input
|
||||
@@ -118,7 +118,7 @@ func BroadcastInt16x16(x int16) Int16x16 {
|
||||
// Emulated, CPU Feature: AVX2
|
||||
func BroadcastInt32x8(x int32) Int32x8 {
|
||||
var z Int32x4
|
||||
return z.SetElem(0, x).Broadcast256()
|
||||
return z.SetElem(0, x).Broadcast1To8()
|
||||
}
|
||||
|
||||
// BroadcastInt64x4 returns a vector with the input
|
||||
@@ -127,7 +127,7 @@ func BroadcastInt32x8(x int32) Int32x8 {
|
||||
// Emulated, CPU Feature: AVX2
|
||||
func BroadcastInt64x4(x int64) Int64x4 {
|
||||
var z Int64x2
|
||||
return z.SetElem(0, x).Broadcast256()
|
||||
return z.SetElem(0, x).Broadcast1To4()
|
||||
}
|
||||
|
||||
// BroadcastUint8x32 returns a vector with the input
|
||||
@@ -136,7 +136,7 @@ func BroadcastInt64x4(x int64) Int64x4 {
|
||||
// Emulated, CPU Feature: AVX2
|
||||
func BroadcastUint8x32(x uint8) Uint8x32 {
|
||||
var z Uint8x16
|
||||
return z.SetElem(0, x).Broadcast256()
|
||||
return z.SetElem(0, x).Broadcast1To32()
|
||||
}
|
||||
|
||||
// BroadcastUint16x16 returns a vector with the input
|
||||
@@ -145,7 +145,7 @@ func BroadcastUint8x32(x uint8) Uint8x32 {
|
||||
// Emulated, CPU Feature: AVX2
|
||||
func BroadcastUint16x16(x uint16) Uint16x16 {
|
||||
var z Uint16x8
|
||||
return z.SetElem(0, x).Broadcast256()
|
||||
return z.SetElem(0, x).Broadcast1To16()
|
||||
}
|
||||
|
||||
// BroadcastUint32x8 returns a vector with the input
|
||||
@@ -154,7 +154,7 @@ func BroadcastUint16x16(x uint16) Uint16x16 {
|
||||
// Emulated, CPU Feature: AVX2
|
||||
func BroadcastUint32x8(x uint32) Uint32x8 {
|
||||
var z Uint32x4
|
||||
return z.SetElem(0, x).Broadcast256()
|
||||
return z.SetElem(0, x).Broadcast1To8()
|
||||
}
|
||||
|
||||
// BroadcastUint64x4 returns a vector with the input
|
||||
@@ -163,7 +163,7 @@ func BroadcastUint32x8(x uint32) Uint32x8 {
|
||||
// Emulated, CPU Feature: AVX2
|
||||
func BroadcastUint64x4(x uint64) Uint64x4 {
|
||||
var z Uint64x2
|
||||
return z.SetElem(0, x).Broadcast256()
|
||||
return z.SetElem(0, x).Broadcast1To4()
|
||||
}
|
||||
|
||||
// BroadcastFloat32x8 returns a vector with the input
|
||||
@@ -172,7 +172,7 @@ func BroadcastUint64x4(x uint64) Uint64x4 {
|
||||
// Emulated, CPU Feature: AVX2
|
||||
func BroadcastFloat32x8(x float32) Float32x8 {
|
||||
var z Float32x4
|
||||
return z.SetElem(0, x).Broadcast256()
|
||||
return z.SetElem(0, x).Broadcast1To8()
|
||||
}
|
||||
|
||||
// BroadcastFloat64x4 returns a vector with the input
|
||||
@@ -181,7 +181,7 @@ func BroadcastFloat32x8(x float32) Float32x8 {
|
||||
// Emulated, CPU Feature: AVX2
|
||||
func BroadcastFloat64x4(x float64) Float64x4 {
|
||||
var z Float64x2
|
||||
return z.SetElem(0, x).Broadcast256()
|
||||
return z.SetElem(0, x).Broadcast1To4()
|
||||
}
|
||||
|
||||
// BroadcastInt8x64 returns a vector with the input
|
||||
@@ -190,7 +190,7 @@ func BroadcastFloat64x4(x float64) Float64x4 {
|
||||
// Emulated, CPU Feature: AVX512BW
|
||||
func BroadcastInt8x64(x int8) Int8x64 {
|
||||
var z Int8x16
|
||||
return z.SetElem(0, x).Broadcast512()
|
||||
return z.SetElem(0, x).Broadcast1To64()
|
||||
}
|
||||
|
||||
// BroadcastInt16x32 returns a vector with the input
|
||||
@@ -199,7 +199,7 @@ func BroadcastInt8x64(x int8) Int8x64 {
|
||||
// Emulated, CPU Feature: AVX512BW
|
||||
func BroadcastInt16x32(x int16) Int16x32 {
|
||||
var z Int16x8
|
||||
return z.SetElem(0, x).Broadcast512()
|
||||
return z.SetElem(0, x).Broadcast1To32()
|
||||
}
|
||||
|
||||
// BroadcastInt32x16 returns a vector with the input
|
||||
@@ -208,7 +208,7 @@ func BroadcastInt16x32(x int16) Int16x32 {
|
||||
// Emulated, CPU Feature: AVX512F
|
||||
func BroadcastInt32x16(x int32) Int32x16 {
|
||||
var z Int32x4
|
||||
return z.SetElem(0, x).Broadcast512()
|
||||
return z.SetElem(0, x).Broadcast1To16()
|
||||
}
|
||||
|
||||
// BroadcastInt64x8 returns a vector with the input
|
||||
@@ -217,7 +217,7 @@ func BroadcastInt32x16(x int32) Int32x16 {
|
||||
// Emulated, CPU Feature: AVX512F
|
||||
func BroadcastInt64x8(x int64) Int64x8 {
|
||||
var z Int64x2
|
||||
return z.SetElem(0, x).Broadcast512()
|
||||
return z.SetElem(0, x).Broadcast1To8()
|
||||
}
|
||||
|
||||
// BroadcastUint8x64 returns a vector with the input
|
||||
@@ -226,7 +226,7 @@ func BroadcastInt64x8(x int64) Int64x8 {
|
||||
// Emulated, CPU Feature: AVX512BW
|
||||
func BroadcastUint8x64(x uint8) Uint8x64 {
|
||||
var z Uint8x16
|
||||
return z.SetElem(0, x).Broadcast512()
|
||||
return z.SetElem(0, x).Broadcast1To64()
|
||||
}
|
||||
|
||||
// BroadcastUint16x32 returns a vector with the input
|
||||
@@ -235,7 +235,7 @@ func BroadcastUint8x64(x uint8) Uint8x64 {
|
||||
// Emulated, CPU Feature: AVX512BW
|
||||
func BroadcastUint16x32(x uint16) Uint16x32 {
|
||||
var z Uint16x8
|
||||
return z.SetElem(0, x).Broadcast512()
|
||||
return z.SetElem(0, x).Broadcast1To32()
|
||||
}
|
||||
|
||||
// BroadcastUint32x16 returns a vector with the input
|
||||
@@ -244,7 +244,7 @@ func BroadcastUint16x32(x uint16) Uint16x32 {
|
||||
// Emulated, CPU Feature: AVX512F
|
||||
func BroadcastUint32x16(x uint32) Uint32x16 {
|
||||
var z Uint32x4
|
||||
return z.SetElem(0, x).Broadcast512()
|
||||
return z.SetElem(0, x).Broadcast1To16()
|
||||
}
|
||||
|
||||
// BroadcastUint64x8 returns a vector with the input
|
||||
@@ -253,7 +253,7 @@ func BroadcastUint32x16(x uint32) Uint32x16 {
|
||||
// Emulated, CPU Feature: AVX512F
|
||||
func BroadcastUint64x8(x uint64) Uint64x8 {
|
||||
var z Uint64x2
|
||||
return z.SetElem(0, x).Broadcast512()
|
||||
return z.SetElem(0, x).Broadcast1To8()
|
||||
}
|
||||
|
||||
// BroadcastFloat32x16 returns a vector with the input
|
||||
@@ -262,7 +262,7 @@ func BroadcastUint64x8(x uint64) Uint64x8 {
|
||||
// Emulated, CPU Feature: AVX512F
|
||||
func BroadcastFloat32x16(x float32) Float32x16 {
|
||||
var z Float32x4
|
||||
return z.SetElem(0, x).Broadcast512()
|
||||
return z.SetElem(0, x).Broadcast1To16()
|
||||
}
|
||||
|
||||
// BroadcastFloat64x8 returns a vector with the input
|
||||
@@ -271,7 +271,7 @@ func BroadcastFloat32x16(x float32) Float32x16 {
|
||||
// Emulated, CPU Feature: AVX512F
|
||||
func BroadcastFloat64x8(x float64) Float64x8 {
|
||||
var z Float64x2
|
||||
return z.SetElem(0, x).Broadcast512()
|
||||
return z.SetElem(0, x).Broadcast1To8()
|
||||
}
|
||||
|
||||
// ToMask converts from Int8x16 to Mask8x16, mask element is set to true when the corresponding vector element is non-zero.
|
||||
|
||||
Reference in New Issue
Block a user