simd/archsimd: rename Broadcast methods

Currently the Broadcast128/256/512 methods broadcast the lowest
element of the input vector to a vector of the corresponding width.
There are also variations of broadcast operations that broadcast
the whole (128- or 256-bit) vector to a larger vector, which we
don't yet support. Our current naming is unclear which version it
is, though. Rename the current ones to Broadcast1ToN, to be clear
that they broadcast one element. The vector version probably will
be named BoradcastAllToN (not included in this CL).

Change-Id: I47a21e367f948ec0b578d63706a40d20f5a9f46d
Reviewed-on: https://go-review.googlesource.com/c/go/+/734840
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
This commit is contained in:
Cherry Mui
2026-01-08 11:57:28 -05:00
committed by Junyang Shao
parent 5facb3b24b
commit 8ac4477d83
11 changed files with 550 additions and 506 deletions

View File

@@ -25,23 +25,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPABSQ128,
ssa.OpAMD64VPABSQ256,
ssa.OpAMD64VPABSQ512,
ssa.OpAMD64VBROADCASTSS128,
ssa.OpAMD64VPBROADCASTQ128,
ssa.OpAMD64VPBROADCASTB128,
ssa.OpAMD64VPBROADCASTW128,
ssa.OpAMD64VPBROADCASTD128,
ssa.OpAMD64VBROADCASTSS256,
ssa.OpAMD64VBROADCASTSS128,
ssa.OpAMD64VBROADCASTSD256,
ssa.OpAMD64VPBROADCASTB256,
ssa.OpAMD64VPBROADCASTW256,
ssa.OpAMD64VPBROADCASTD256,
ssa.OpAMD64VPBROADCASTD128,
ssa.OpAMD64VPBROADCASTQ256,
ssa.OpAMD64VBROADCASTSS512,
ssa.OpAMD64VBROADCASTSS256,
ssa.OpAMD64VBROADCASTSD512,
ssa.OpAMD64VPBROADCASTB512,
ssa.OpAMD64VPBROADCASTW512,
ssa.OpAMD64VPBROADCASTD512,
ssa.OpAMD64VPBROADCASTW128,
ssa.OpAMD64VPBROADCASTD256,
ssa.OpAMD64VPBROADCASTQ512,
ssa.OpAMD64VBROADCASTSS512,
ssa.OpAMD64VPBROADCASTB128,
ssa.OpAMD64VPBROADCASTW256,
ssa.OpAMD64VPBROADCASTD512,
ssa.OpAMD64VPBROADCASTB256,
ssa.OpAMD64VPBROADCASTW512,
ssa.OpAMD64VPBROADCASTB512,
ssa.OpAMD64VCVTPD2PSX128,
ssa.OpAMD64VCVTPD2PSY128,
ssa.OpAMD64VCVTPD2PS256,
@@ -832,23 +832,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPABSQMasked128,
ssa.OpAMD64VPABSQMasked256,
ssa.OpAMD64VPABSQMasked512,
ssa.OpAMD64VBROADCASTSSMasked128,
ssa.OpAMD64VPBROADCASTQMasked128,
ssa.OpAMD64VPBROADCASTBMasked128,
ssa.OpAMD64VPBROADCASTWMasked128,
ssa.OpAMD64VPBROADCASTDMasked128,
ssa.OpAMD64VBROADCASTSSMasked256,
ssa.OpAMD64VBROADCASTSSMasked128,
ssa.OpAMD64VBROADCASTSDMasked256,
ssa.OpAMD64VPBROADCASTBMasked256,
ssa.OpAMD64VPBROADCASTWMasked256,
ssa.OpAMD64VPBROADCASTDMasked256,
ssa.OpAMD64VPBROADCASTDMasked128,
ssa.OpAMD64VPBROADCASTQMasked256,
ssa.OpAMD64VBROADCASTSSMasked512,
ssa.OpAMD64VBROADCASTSSMasked256,
ssa.OpAMD64VBROADCASTSDMasked512,
ssa.OpAMD64VPBROADCASTBMasked512,
ssa.OpAMD64VPBROADCASTWMasked512,
ssa.OpAMD64VPBROADCASTDMasked512,
ssa.OpAMD64VPBROADCASTWMasked128,
ssa.OpAMD64VPBROADCASTDMasked256,
ssa.OpAMD64VPBROADCASTQMasked512,
ssa.OpAMD64VBROADCASTSSMasked512,
ssa.OpAMD64VPBROADCASTBMasked128,
ssa.OpAMD64VPBROADCASTWMasked256,
ssa.OpAMD64VPBROADCASTDMasked512,
ssa.OpAMD64VPBROADCASTBMasked256,
ssa.OpAMD64VPBROADCASTWMasked512,
ssa.OpAMD64VPBROADCASTBMasked512,
ssa.OpAMD64VCOMPRESSPSMasked128,
ssa.OpAMD64VCOMPRESSPSMasked256,
ssa.OpAMD64VCOMPRESSPSMasked512,
@@ -2460,23 +2460,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPABSQMasked128Merging,
ssa.OpAMD64VPABSQMasked256Merging,
ssa.OpAMD64VPABSQMasked512Merging,
ssa.OpAMD64VBROADCASTSSMasked128Merging,
ssa.OpAMD64VPBROADCASTQMasked128Merging,
ssa.OpAMD64VPBROADCASTBMasked128Merging,
ssa.OpAMD64VPBROADCASTWMasked128Merging,
ssa.OpAMD64VPBROADCASTDMasked128Merging,
ssa.OpAMD64VBROADCASTSSMasked256Merging,
ssa.OpAMD64VBROADCASTSSMasked128Merging,
ssa.OpAMD64VBROADCASTSDMasked256Merging,
ssa.OpAMD64VPBROADCASTBMasked256Merging,
ssa.OpAMD64VPBROADCASTWMasked256Merging,
ssa.OpAMD64VPBROADCASTDMasked256Merging,
ssa.OpAMD64VPBROADCASTDMasked128Merging,
ssa.OpAMD64VPBROADCASTQMasked256Merging,
ssa.OpAMD64VBROADCASTSSMasked512Merging,
ssa.OpAMD64VBROADCASTSSMasked256Merging,
ssa.OpAMD64VBROADCASTSDMasked512Merging,
ssa.OpAMD64VPBROADCASTBMasked512Merging,
ssa.OpAMD64VPBROADCASTWMasked512Merging,
ssa.OpAMD64VPBROADCASTDMasked512Merging,
ssa.OpAMD64VPBROADCASTWMasked128Merging,
ssa.OpAMD64VPBROADCASTDMasked256Merging,
ssa.OpAMD64VPBROADCASTQMasked512Merging,
ssa.OpAMD64VBROADCASTSSMasked512Merging,
ssa.OpAMD64VPBROADCASTBMasked128Merging,
ssa.OpAMD64VPBROADCASTWMasked256Merging,
ssa.OpAMD64VPBROADCASTDMasked512Merging,
ssa.OpAMD64VPBROADCASTBMasked256Merging,
ssa.OpAMD64VPBROADCASTWMasked512Merging,
ssa.OpAMD64VPBROADCASTBMasked512Merging,
ssa.OpAMD64VRNDSCALEPSMasked128Merging,
ssa.OpAMD64VRNDSCALEPSMasked256Merging,
ssa.OpAMD64VRNDSCALEPSMasked512Merging,
@@ -2817,23 +2817,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPAVGWMasked128,
ssa.OpAMD64VPAVGWMasked256,
ssa.OpAMD64VPAVGWMasked512,
ssa.OpAMD64VBROADCASTSSMasked128,
ssa.OpAMD64VPBROADCASTQMasked128,
ssa.OpAMD64VPBROADCASTBMasked128,
ssa.OpAMD64VPBROADCASTWMasked128,
ssa.OpAMD64VPBROADCASTDMasked128,
ssa.OpAMD64VBROADCASTSSMasked256,
ssa.OpAMD64VBROADCASTSSMasked128,
ssa.OpAMD64VBROADCASTSDMasked256,
ssa.OpAMD64VPBROADCASTBMasked256,
ssa.OpAMD64VPBROADCASTWMasked256,
ssa.OpAMD64VPBROADCASTDMasked256,
ssa.OpAMD64VPBROADCASTDMasked128,
ssa.OpAMD64VPBROADCASTQMasked256,
ssa.OpAMD64VBROADCASTSSMasked512,
ssa.OpAMD64VBROADCASTSSMasked256,
ssa.OpAMD64VBROADCASTSDMasked512,
ssa.OpAMD64VPBROADCASTBMasked512,
ssa.OpAMD64VPBROADCASTWMasked512,
ssa.OpAMD64VPBROADCASTDMasked512,
ssa.OpAMD64VPBROADCASTWMasked128,
ssa.OpAMD64VPBROADCASTDMasked256,
ssa.OpAMD64VPBROADCASTQMasked512,
ssa.OpAMD64VBROADCASTSSMasked512,
ssa.OpAMD64VPBROADCASTBMasked128,
ssa.OpAMD64VPBROADCASTWMasked256,
ssa.OpAMD64VPBROADCASTDMasked512,
ssa.OpAMD64VPBROADCASTBMasked256,
ssa.OpAMD64VPBROADCASTWMasked512,
ssa.OpAMD64VPBROADCASTBMasked512,
ssa.OpAMD64VRNDSCALEPSMasked128,
ssa.OpAMD64VRNDSCALEPSMasked128load,
ssa.OpAMD64VRNDSCALEPSMasked256,

View File

@@ -140,36 +140,36 @@
(AverageUint16x8 ...) => (VPAVGW128 ...)
(AverageUint16x16 ...) => (VPAVGW256 ...)
(AverageUint16x32 ...) => (VPAVGW512 ...)
(Broadcast128Float32x4 ...) => (VBROADCASTSS128 ...)
(Broadcast128Float64x2 ...) => (VPBROADCASTQ128 ...)
(Broadcast128Int8x16 ...) => (VPBROADCASTB128 ...)
(Broadcast128Int16x8 ...) => (VPBROADCASTW128 ...)
(Broadcast128Int32x4 ...) => (VPBROADCASTD128 ...)
(Broadcast128Int64x2 ...) => (VPBROADCASTQ128 ...)
(Broadcast128Uint8x16 ...) => (VPBROADCASTB128 ...)
(Broadcast128Uint16x8 ...) => (VPBROADCASTW128 ...)
(Broadcast128Uint32x4 ...) => (VPBROADCASTD128 ...)
(Broadcast128Uint64x2 ...) => (VPBROADCASTQ128 ...)
(Broadcast256Float32x4 ...) => (VBROADCASTSS256 ...)
(Broadcast256Float64x2 ...) => (VBROADCASTSD256 ...)
(Broadcast256Int8x16 ...) => (VPBROADCASTB256 ...)
(Broadcast256Int16x8 ...) => (VPBROADCASTW256 ...)
(Broadcast256Int32x4 ...) => (VPBROADCASTD256 ...)
(Broadcast256Int64x2 ...) => (VPBROADCASTQ256 ...)
(Broadcast256Uint8x16 ...) => (VPBROADCASTB256 ...)
(Broadcast256Uint16x8 ...) => (VPBROADCASTW256 ...)
(Broadcast256Uint32x4 ...) => (VPBROADCASTD256 ...)
(Broadcast256Uint64x2 ...) => (VPBROADCASTQ256 ...)
(Broadcast512Float32x4 ...) => (VBROADCASTSS512 ...)
(Broadcast512Float64x2 ...) => (VBROADCASTSD512 ...)
(Broadcast512Int8x16 ...) => (VPBROADCASTB512 ...)
(Broadcast512Int16x8 ...) => (VPBROADCASTW512 ...)
(Broadcast512Int32x4 ...) => (VPBROADCASTD512 ...)
(Broadcast512Int64x2 ...) => (VPBROADCASTQ512 ...)
(Broadcast512Uint8x16 ...) => (VPBROADCASTB512 ...)
(Broadcast512Uint16x8 ...) => (VPBROADCASTW512 ...)
(Broadcast512Uint32x4 ...) => (VPBROADCASTD512 ...)
(Broadcast512Uint64x2 ...) => (VPBROADCASTQ512 ...)
(Broadcast1To2Float64x2 ...) => (VPBROADCASTQ128 ...)
(Broadcast1To2Int64x2 ...) => (VPBROADCASTQ128 ...)
(Broadcast1To2Uint64x2 ...) => (VPBROADCASTQ128 ...)
(Broadcast1To4Float32x4 ...) => (VBROADCASTSS128 ...)
(Broadcast1To4Float64x2 ...) => (VBROADCASTSD256 ...)
(Broadcast1To4Int32x4 ...) => (VPBROADCASTD128 ...)
(Broadcast1To4Int64x2 ...) => (VPBROADCASTQ256 ...)
(Broadcast1To4Uint32x4 ...) => (VPBROADCASTD128 ...)
(Broadcast1To4Uint64x2 ...) => (VPBROADCASTQ256 ...)
(Broadcast1To8Float32x4 ...) => (VBROADCASTSS256 ...)
(Broadcast1To8Float64x2 ...) => (VBROADCASTSD512 ...)
(Broadcast1To8Int16x8 ...) => (VPBROADCASTW128 ...)
(Broadcast1To8Int32x4 ...) => (VPBROADCASTD256 ...)
(Broadcast1To8Int64x2 ...) => (VPBROADCASTQ512 ...)
(Broadcast1To8Uint16x8 ...) => (VPBROADCASTW128 ...)
(Broadcast1To8Uint32x4 ...) => (VPBROADCASTD256 ...)
(Broadcast1To8Uint64x2 ...) => (VPBROADCASTQ512 ...)
(Broadcast1To16Float32x4 ...) => (VBROADCASTSS512 ...)
(Broadcast1To16Int8x16 ...) => (VPBROADCASTB128 ...)
(Broadcast1To16Int16x8 ...) => (VPBROADCASTW256 ...)
(Broadcast1To16Int32x4 ...) => (VPBROADCASTD512 ...)
(Broadcast1To16Uint8x16 ...) => (VPBROADCASTB128 ...)
(Broadcast1To16Uint16x8 ...) => (VPBROADCASTW256 ...)
(Broadcast1To16Uint32x4 ...) => (VPBROADCASTD512 ...)
(Broadcast1To32Int8x16 ...) => (VPBROADCASTB256 ...)
(Broadcast1To32Int16x8 ...) => (VPBROADCASTW512 ...)
(Broadcast1To32Uint8x16 ...) => (VPBROADCASTB256 ...)
(Broadcast1To32Uint16x8 ...) => (VPBROADCASTW512 ...)
(Broadcast1To64Int8x16 ...) => (VPBROADCASTB512 ...)
(Broadcast1To64Uint8x16 ...) => (VPBROADCASTB512 ...)
(CeilFloat32x4 x) => (VROUNDPS128 [2] x)
(CeilFloat32x8 x) => (VROUNDPS256 [2] x)
(CeilFloat64x2 x) => (VROUNDPD128 [2] x)
@@ -1424,23 +1424,23 @@
(VMOVDQU16Masked128 (VPAVGW128 x y) mask) => (VPAVGWMasked128 x y mask)
(VMOVDQU16Masked256 (VPAVGW256 x y) mask) => (VPAVGWMasked256 x y mask)
(VMOVDQU16Masked512 (VPAVGW512 x y) mask) => (VPAVGWMasked512 x y mask)
(VMOVDQU32Masked128 (VBROADCASTSS128 x) mask) => (VBROADCASTSSMasked128 x mask)
(VMOVDQU64Masked128 (VPBROADCASTQ128 x) mask) => (VPBROADCASTQMasked128 x mask)
(VMOVDQU8Masked128 (VPBROADCASTB128 x) mask) => (VPBROADCASTBMasked128 x mask)
(VMOVDQU16Masked128 (VPBROADCASTW128 x) mask) => (VPBROADCASTWMasked128 x mask)
(VMOVDQU32Masked128 (VPBROADCASTD128 x) mask) => (VPBROADCASTDMasked128 x mask)
(VMOVDQU32Masked256 (VBROADCASTSS256 x) mask) => (VBROADCASTSSMasked256 x mask)
(VMOVDQU32Masked128 (VBROADCASTSS128 x) mask) => (VBROADCASTSSMasked128 x mask)
(VMOVDQU64Masked256 (VBROADCASTSD256 x) mask) => (VBROADCASTSDMasked256 x mask)
(VMOVDQU8Masked256 (VPBROADCASTB256 x) mask) => (VPBROADCASTBMasked256 x mask)
(VMOVDQU16Masked256 (VPBROADCASTW256 x) mask) => (VPBROADCASTWMasked256 x mask)
(VMOVDQU32Masked256 (VPBROADCASTD256 x) mask) => (VPBROADCASTDMasked256 x mask)
(VMOVDQU32Masked128 (VPBROADCASTD128 x) mask) => (VPBROADCASTDMasked128 x mask)
(VMOVDQU64Masked256 (VPBROADCASTQ256 x) mask) => (VPBROADCASTQMasked256 x mask)
(VMOVDQU32Masked512 (VBROADCASTSS512 x) mask) => (VBROADCASTSSMasked512 x mask)
(VMOVDQU32Masked256 (VBROADCASTSS256 x) mask) => (VBROADCASTSSMasked256 x mask)
(VMOVDQU64Masked512 (VBROADCASTSD512 x) mask) => (VBROADCASTSDMasked512 x mask)
(VMOVDQU8Masked512 (VPBROADCASTB512 x) mask) => (VPBROADCASTBMasked512 x mask)
(VMOVDQU16Masked512 (VPBROADCASTW512 x) mask) => (VPBROADCASTWMasked512 x mask)
(VMOVDQU32Masked512 (VPBROADCASTD512 x) mask) => (VPBROADCASTDMasked512 x mask)
(VMOVDQU16Masked128 (VPBROADCASTW128 x) mask) => (VPBROADCASTWMasked128 x mask)
(VMOVDQU32Masked256 (VPBROADCASTD256 x) mask) => (VPBROADCASTDMasked256 x mask)
(VMOVDQU64Masked512 (VPBROADCASTQ512 x) mask) => (VPBROADCASTQMasked512 x mask)
(VMOVDQU32Masked512 (VBROADCASTSS512 x) mask) => (VBROADCASTSSMasked512 x mask)
(VMOVDQU8Masked128 (VPBROADCASTB128 x) mask) => (VPBROADCASTBMasked128 x mask)
(VMOVDQU16Masked256 (VPBROADCASTW256 x) mask) => (VPBROADCASTWMasked256 x mask)
(VMOVDQU32Masked512 (VPBROADCASTD512 x) mask) => (VPBROADCASTDMasked512 x mask)
(VMOVDQU8Masked256 (VPBROADCASTB256 x) mask) => (VPBROADCASTBMasked256 x mask)
(VMOVDQU16Masked512 (VPBROADCASTW512 x) mask) => (VPBROADCASTWMasked512 x mask)
(VMOVDQU8Masked512 (VPBROADCASTB512 x) mask) => (VPBROADCASTBMasked512 x mask)
(VMOVDQU32Masked128 (VRNDSCALEPS128 [a] x) mask) => (VRNDSCALEPSMasked128 [a] x mask)
(VMOVDQU32Masked256 (VRNDSCALEPS256 [a] x) mask) => (VRNDSCALEPSMasked256 [a] x mask)
(VMOVDQU32Masked512 (VRNDSCALEPS512 [a] x) mask) => (VRNDSCALEPSMasked512 [a] x mask)

View File

@@ -143,36 +143,36 @@ func simdGenericOps() []opData {
{name: "AverageUint16x8", argLength: 2, commutative: true},
{name: "AverageUint16x16", argLength: 2, commutative: true},
{name: "AverageUint16x32", argLength: 2, commutative: true},
{name: "Broadcast128Float32x4", argLength: 1, commutative: false},
{name: "Broadcast128Float64x2", argLength: 1, commutative: false},
{name: "Broadcast128Int8x16", argLength: 1, commutative: false},
{name: "Broadcast128Int16x8", argLength: 1, commutative: false},
{name: "Broadcast128Int32x4", argLength: 1, commutative: false},
{name: "Broadcast128Int64x2", argLength: 1, commutative: false},
{name: "Broadcast128Uint8x16", argLength: 1, commutative: false},
{name: "Broadcast128Uint16x8", argLength: 1, commutative: false},
{name: "Broadcast128Uint32x4", argLength: 1, commutative: false},
{name: "Broadcast128Uint64x2", argLength: 1, commutative: false},
{name: "Broadcast256Float32x4", argLength: 1, commutative: false},
{name: "Broadcast256Float64x2", argLength: 1, commutative: false},
{name: "Broadcast256Int8x16", argLength: 1, commutative: false},
{name: "Broadcast256Int16x8", argLength: 1, commutative: false},
{name: "Broadcast256Int32x4", argLength: 1, commutative: false},
{name: "Broadcast256Int64x2", argLength: 1, commutative: false},
{name: "Broadcast256Uint8x16", argLength: 1, commutative: false},
{name: "Broadcast256Uint16x8", argLength: 1, commutative: false},
{name: "Broadcast256Uint32x4", argLength: 1, commutative: false},
{name: "Broadcast256Uint64x2", argLength: 1, commutative: false},
{name: "Broadcast512Float32x4", argLength: 1, commutative: false},
{name: "Broadcast512Float64x2", argLength: 1, commutative: false},
{name: "Broadcast512Int8x16", argLength: 1, commutative: false},
{name: "Broadcast512Int16x8", argLength: 1, commutative: false},
{name: "Broadcast512Int32x4", argLength: 1, commutative: false},
{name: "Broadcast512Int64x2", argLength: 1, commutative: false},
{name: "Broadcast512Uint8x16", argLength: 1, commutative: false},
{name: "Broadcast512Uint16x8", argLength: 1, commutative: false},
{name: "Broadcast512Uint32x4", argLength: 1, commutative: false},
{name: "Broadcast512Uint64x2", argLength: 1, commutative: false},
{name: "Broadcast1To2Float64x2", argLength: 1, commutative: false},
{name: "Broadcast1To2Int64x2", argLength: 1, commutative: false},
{name: "Broadcast1To2Uint64x2", argLength: 1, commutative: false},
{name: "Broadcast1To4Float32x4", argLength: 1, commutative: false},
{name: "Broadcast1To4Float64x2", argLength: 1, commutative: false},
{name: "Broadcast1To4Int32x4", argLength: 1, commutative: false},
{name: "Broadcast1To4Int64x2", argLength: 1, commutative: false},
{name: "Broadcast1To4Uint32x4", argLength: 1, commutative: false},
{name: "Broadcast1To4Uint64x2", argLength: 1, commutative: false},
{name: "Broadcast1To8Float32x4", argLength: 1, commutative: false},
{name: "Broadcast1To8Float64x2", argLength: 1, commutative: false},
{name: "Broadcast1To8Int16x8", argLength: 1, commutative: false},
{name: "Broadcast1To8Int32x4", argLength: 1, commutative: false},
{name: "Broadcast1To8Int64x2", argLength: 1, commutative: false},
{name: "Broadcast1To8Uint16x8", argLength: 1, commutative: false},
{name: "Broadcast1To8Uint32x4", argLength: 1, commutative: false},
{name: "Broadcast1To8Uint64x2", argLength: 1, commutative: false},
{name: "Broadcast1To16Float32x4", argLength: 1, commutative: false},
{name: "Broadcast1To16Int8x16", argLength: 1, commutative: false},
{name: "Broadcast1To16Int16x8", argLength: 1, commutative: false},
{name: "Broadcast1To16Int32x4", argLength: 1, commutative: false},
{name: "Broadcast1To16Uint8x16", argLength: 1, commutative: false},
{name: "Broadcast1To16Uint16x8", argLength: 1, commutative: false},
{name: "Broadcast1To16Uint32x4", argLength: 1, commutative: false},
{name: "Broadcast1To32Int8x16", argLength: 1, commutative: false},
{name: "Broadcast1To32Int16x8", argLength: 1, commutative: false},
{name: "Broadcast1To32Uint8x16", argLength: 1, commutative: false},
{name: "Broadcast1To32Uint16x8", argLength: 1, commutative: false},
{name: "Broadcast1To64Int8x16", argLength: 1, commutative: false},
{name: "Broadcast1To64Uint8x16", argLength: 1, commutative: false},
{name: "CeilFloat32x4", argLength: 1, commutative: false},
{name: "CeilFloat32x8", argLength: 1, commutative: false},
{name: "CeilFloat64x2", argLength: 1, commutative: false},

View File

@@ -6309,36 +6309,36 @@ const (
OpAverageUint16x8
OpAverageUint16x16
OpAverageUint16x32
OpBroadcast128Float32x4
OpBroadcast128Float64x2
OpBroadcast128Int8x16
OpBroadcast128Int16x8
OpBroadcast128Int32x4
OpBroadcast128Int64x2
OpBroadcast128Uint8x16
OpBroadcast128Uint16x8
OpBroadcast128Uint32x4
OpBroadcast128Uint64x2
OpBroadcast256Float32x4
OpBroadcast256Float64x2
OpBroadcast256Int8x16
OpBroadcast256Int16x8
OpBroadcast256Int32x4
OpBroadcast256Int64x2
OpBroadcast256Uint8x16
OpBroadcast256Uint16x8
OpBroadcast256Uint32x4
OpBroadcast256Uint64x2
OpBroadcast512Float32x4
OpBroadcast512Float64x2
OpBroadcast512Int8x16
OpBroadcast512Int16x8
OpBroadcast512Int32x4
OpBroadcast512Int64x2
OpBroadcast512Uint8x16
OpBroadcast512Uint16x8
OpBroadcast512Uint32x4
OpBroadcast512Uint64x2
OpBroadcast1To2Float64x2
OpBroadcast1To2Int64x2
OpBroadcast1To2Uint64x2
OpBroadcast1To4Float32x4
OpBroadcast1To4Float64x2
OpBroadcast1To4Int32x4
OpBroadcast1To4Int64x2
OpBroadcast1To4Uint32x4
OpBroadcast1To4Uint64x2
OpBroadcast1To8Float32x4
OpBroadcast1To8Float64x2
OpBroadcast1To8Int16x8
OpBroadcast1To8Int32x4
OpBroadcast1To8Int64x2
OpBroadcast1To8Uint16x8
OpBroadcast1To8Uint32x4
OpBroadcast1To8Uint64x2
OpBroadcast1To16Float32x4
OpBroadcast1To16Int8x16
OpBroadcast1To16Int16x8
OpBroadcast1To16Int32x4
OpBroadcast1To16Uint8x16
OpBroadcast1To16Uint16x8
OpBroadcast1To16Uint32x4
OpBroadcast1To32Int8x16
OpBroadcast1To32Int16x8
OpBroadcast1To32Uint8x16
OpBroadcast1To32Uint16x8
OpBroadcast1To64Int8x16
OpBroadcast1To64Uint8x16
OpCeilFloat32x4
OpCeilFloat32x8
OpCeilFloat64x2
@@ -89875,152 +89875,152 @@ var opcodeTable = [...]opInfo{
generic: true,
},
{
name: "Broadcast128Float32x4",
name: "Broadcast1To2Float64x2",
argLen: 1,
generic: true,
},
{
name: "Broadcast128Float64x2",
name: "Broadcast1To2Int64x2",
argLen: 1,
generic: true,
},
{
name: "Broadcast128Int8x16",
name: "Broadcast1To2Uint64x2",
argLen: 1,
generic: true,
},
{
name: "Broadcast128Int16x8",
name: "Broadcast1To4Float32x4",
argLen: 1,
generic: true,
},
{
name: "Broadcast128Int32x4",
name: "Broadcast1To4Float64x2",
argLen: 1,
generic: true,
},
{
name: "Broadcast128Int64x2",
name: "Broadcast1To4Int32x4",
argLen: 1,
generic: true,
},
{
name: "Broadcast128Uint8x16",
name: "Broadcast1To4Int64x2",
argLen: 1,
generic: true,
},
{
name: "Broadcast128Uint16x8",
name: "Broadcast1To4Uint32x4",
argLen: 1,
generic: true,
},
{
name: "Broadcast128Uint32x4",
name: "Broadcast1To4Uint64x2",
argLen: 1,
generic: true,
},
{
name: "Broadcast128Uint64x2",
name: "Broadcast1To8Float32x4",
argLen: 1,
generic: true,
},
{
name: "Broadcast256Float32x4",
name: "Broadcast1To8Float64x2",
argLen: 1,
generic: true,
},
{
name: "Broadcast256Float64x2",
name: "Broadcast1To8Int16x8",
argLen: 1,
generic: true,
},
{
name: "Broadcast256Int8x16",
name: "Broadcast1To8Int32x4",
argLen: 1,
generic: true,
},
{
name: "Broadcast256Int16x8",
name: "Broadcast1To8Int64x2",
argLen: 1,
generic: true,
},
{
name: "Broadcast256Int32x4",
name: "Broadcast1To8Uint16x8",
argLen: 1,
generic: true,
},
{
name: "Broadcast256Int64x2",
name: "Broadcast1To8Uint32x4",
argLen: 1,
generic: true,
},
{
name: "Broadcast256Uint8x16",
name: "Broadcast1To8Uint64x2",
argLen: 1,
generic: true,
},
{
name: "Broadcast256Uint16x8",
name: "Broadcast1To16Float32x4",
argLen: 1,
generic: true,
},
{
name: "Broadcast256Uint32x4",
name: "Broadcast1To16Int8x16",
argLen: 1,
generic: true,
},
{
name: "Broadcast256Uint64x2",
name: "Broadcast1To16Int16x8",
argLen: 1,
generic: true,
},
{
name: "Broadcast512Float32x4",
name: "Broadcast1To16Int32x4",
argLen: 1,
generic: true,
},
{
name: "Broadcast512Float64x2",
name: "Broadcast1To16Uint8x16",
argLen: 1,
generic: true,
},
{
name: "Broadcast512Int8x16",
name: "Broadcast1To16Uint16x8",
argLen: 1,
generic: true,
},
{
name: "Broadcast512Int16x8",
name: "Broadcast1To16Uint32x4",
argLen: 1,
generic: true,
},
{
name: "Broadcast512Int32x4",
name: "Broadcast1To32Int8x16",
argLen: 1,
generic: true,
},
{
name: "Broadcast512Int64x2",
name: "Broadcast1To32Int16x8",
argLen: 1,
generic: true,
},
{
name: "Broadcast512Uint8x16",
name: "Broadcast1To32Uint8x16",
argLen: 1,
generic: true,
},
{
name: "Broadcast512Uint16x8",
name: "Broadcast1To32Uint16x8",
argLen: 1,
generic: true,
},
{
name: "Broadcast512Uint32x4",
name: "Broadcast1To64Int8x16",
argLen: 1,
generic: true,
},
{
name: "Broadcast512Uint64x2",
name: "Broadcast1To64Uint8x16",
argLen: 1,
generic: true,
},

View File

@@ -2479,96 +2479,96 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpBitLen64(v)
case OpBitLen8:
return rewriteValueAMD64_OpBitLen8(v)
case OpBroadcast128Float32x4:
v.Op = OpAMD64VBROADCASTSS128
return true
case OpBroadcast128Float64x2:
v.Op = OpAMD64VPBROADCASTQ128
return true
case OpBroadcast128Int16x8:
v.Op = OpAMD64VPBROADCASTW128
return true
case OpBroadcast128Int32x4:
v.Op = OpAMD64VPBROADCASTD128
return true
case OpBroadcast128Int64x2:
v.Op = OpAMD64VPBROADCASTQ128
return true
case OpBroadcast128Int8x16:
v.Op = OpAMD64VPBROADCASTB128
return true
case OpBroadcast128Uint16x8:
v.Op = OpAMD64VPBROADCASTW128
return true
case OpBroadcast128Uint32x4:
v.Op = OpAMD64VPBROADCASTD128
return true
case OpBroadcast128Uint64x2:
v.Op = OpAMD64VPBROADCASTQ128
return true
case OpBroadcast128Uint8x16:
v.Op = OpAMD64VPBROADCASTB128
return true
case OpBroadcast256Float32x4:
v.Op = OpAMD64VBROADCASTSS256
return true
case OpBroadcast256Float64x2:
v.Op = OpAMD64VBROADCASTSD256
return true
case OpBroadcast256Int16x8:
v.Op = OpAMD64VPBROADCASTW256
return true
case OpBroadcast256Int32x4:
v.Op = OpAMD64VPBROADCASTD256
return true
case OpBroadcast256Int64x2:
v.Op = OpAMD64VPBROADCASTQ256
return true
case OpBroadcast256Int8x16:
v.Op = OpAMD64VPBROADCASTB256
return true
case OpBroadcast256Uint16x8:
v.Op = OpAMD64VPBROADCASTW256
return true
case OpBroadcast256Uint32x4:
v.Op = OpAMD64VPBROADCASTD256
return true
case OpBroadcast256Uint64x2:
v.Op = OpAMD64VPBROADCASTQ256
return true
case OpBroadcast256Uint8x16:
v.Op = OpAMD64VPBROADCASTB256
return true
case OpBroadcast512Float32x4:
case OpBroadcast1To16Float32x4:
v.Op = OpAMD64VBROADCASTSS512
return true
case OpBroadcast512Float64x2:
case OpBroadcast1To16Int16x8:
v.Op = OpAMD64VPBROADCASTW256
return true
case OpBroadcast1To16Int32x4:
v.Op = OpAMD64VPBROADCASTD512
return true
case OpBroadcast1To16Int8x16:
v.Op = OpAMD64VPBROADCASTB128
return true
case OpBroadcast1To16Uint16x8:
v.Op = OpAMD64VPBROADCASTW256
return true
case OpBroadcast1To16Uint32x4:
v.Op = OpAMD64VPBROADCASTD512
return true
case OpBroadcast1To16Uint8x16:
v.Op = OpAMD64VPBROADCASTB128
return true
case OpBroadcast1To2Float64x2:
v.Op = OpAMD64VPBROADCASTQ128
return true
case OpBroadcast1To2Int64x2:
v.Op = OpAMD64VPBROADCASTQ128
return true
case OpBroadcast1To2Uint64x2:
v.Op = OpAMD64VPBROADCASTQ128
return true
case OpBroadcast1To32Int16x8:
v.Op = OpAMD64VPBROADCASTW512
return true
case OpBroadcast1To32Int8x16:
v.Op = OpAMD64VPBROADCASTB256
return true
case OpBroadcast1To32Uint16x8:
v.Op = OpAMD64VPBROADCASTW512
return true
case OpBroadcast1To32Uint8x16:
v.Op = OpAMD64VPBROADCASTB256
return true
case OpBroadcast1To4Float32x4:
v.Op = OpAMD64VBROADCASTSS128
return true
case OpBroadcast1To4Float64x2:
v.Op = OpAMD64VBROADCASTSD256
return true
case OpBroadcast1To4Int32x4:
v.Op = OpAMD64VPBROADCASTD128
return true
case OpBroadcast1To4Int64x2:
v.Op = OpAMD64VPBROADCASTQ256
return true
case OpBroadcast1To4Uint32x4:
v.Op = OpAMD64VPBROADCASTD128
return true
case OpBroadcast1To4Uint64x2:
v.Op = OpAMD64VPBROADCASTQ256
return true
case OpBroadcast1To64Int8x16:
v.Op = OpAMD64VPBROADCASTB512
return true
case OpBroadcast1To64Uint8x16:
v.Op = OpAMD64VPBROADCASTB512
return true
case OpBroadcast1To8Float32x4:
v.Op = OpAMD64VBROADCASTSS256
return true
case OpBroadcast1To8Float64x2:
v.Op = OpAMD64VBROADCASTSD512
return true
case OpBroadcast512Int16x8:
v.Op = OpAMD64VPBROADCASTW512
case OpBroadcast1To8Int16x8:
v.Op = OpAMD64VPBROADCASTW128
return true
case OpBroadcast512Int32x4:
v.Op = OpAMD64VPBROADCASTD512
case OpBroadcast1To8Int32x4:
v.Op = OpAMD64VPBROADCASTD256
return true
case OpBroadcast512Int64x2:
case OpBroadcast1To8Int64x2:
v.Op = OpAMD64VPBROADCASTQ512
return true
case OpBroadcast512Int8x16:
v.Op = OpAMD64VPBROADCASTB512
case OpBroadcast1To8Uint16x8:
v.Op = OpAMD64VPBROADCASTW128
return true
case OpBroadcast512Uint16x8:
v.Op = OpAMD64VPBROADCASTW512
case OpBroadcast1To8Uint32x4:
v.Op = OpAMD64VPBROADCASTD256
return true
case OpBroadcast512Uint32x4:
v.Op = OpAMD64VPBROADCASTD512
return true
case OpBroadcast512Uint64x2:
case OpBroadcast1To8Uint64x2:
v.Op = OpAMD64VPBROADCASTQ512
return true
case OpBroadcast512Uint8x16:
v.Op = OpAMD64VPBROADCASTB512
return true
case OpBswap16:
return rewriteValueAMD64_OpBswap16(v)
case OpBswap32:

View File

@@ -152,36 +152,36 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint16x8.Average", opLen2(ssa.OpAverageUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x16.Average", opLen2(ssa.OpAverageUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x32.Average", opLen2(ssa.OpAverageUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.Broadcast128", opLen1(ssa.OpBroadcast128Float32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x2.Broadcast128", opLen1(ssa.OpBroadcast128Float64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x16.Broadcast128", opLen1(ssa.OpBroadcast128Int8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x8.Broadcast128", opLen1(ssa.OpBroadcast128Int16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x4.Broadcast128", opLen1(ssa.OpBroadcast128Int32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int64x2.Broadcast128", opLen1(ssa.OpBroadcast128Int64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x16.Broadcast128", opLen1(ssa.OpBroadcast128Uint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x8.Broadcast128", opLen1(ssa.OpBroadcast128Uint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x4.Broadcast128", opLen1(ssa.OpBroadcast128Uint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x2.Broadcast128", opLen1(ssa.OpBroadcast128Uint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x4.Broadcast256", opLen1(ssa.OpBroadcast256Float32x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x2.Broadcast256", opLen1(ssa.OpBroadcast256Float64x2, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x16.Broadcast256", opLen1(ssa.OpBroadcast256Int8x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x8.Broadcast256", opLen1(ssa.OpBroadcast256Int16x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x4.Broadcast256", opLen1(ssa.OpBroadcast256Int32x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int64x2.Broadcast256", opLen1(ssa.OpBroadcast256Int64x2, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x16.Broadcast256", opLen1(ssa.OpBroadcast256Uint8x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x8.Broadcast256", opLen1(ssa.OpBroadcast256Uint16x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x4.Broadcast256", opLen1(ssa.OpBroadcast256Uint32x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x2.Broadcast256", opLen1(ssa.OpBroadcast256Uint64x2, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x4.Broadcast512", opLen1(ssa.OpBroadcast512Float32x4, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float64x2.Broadcast512", opLen1(ssa.OpBroadcast512Float64x2, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.Broadcast512", opLen1(ssa.OpBroadcast512Int8x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x8.Broadcast512", opLen1(ssa.OpBroadcast512Int16x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.Broadcast512", opLen1(ssa.OpBroadcast512Int32x4, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int64x2.Broadcast512", opLen1(ssa.OpBroadcast512Int64x2, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.Broadcast512", opLen1(ssa.OpBroadcast512Uint8x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x8.Broadcast512", opLen1(ssa.OpBroadcast512Uint16x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint32x4.Broadcast512", opLen1(ssa.OpBroadcast512Uint32x4, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint64x2.Broadcast512", opLen1(ssa.OpBroadcast512Uint64x2, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float64x2.Broadcast1To2", opLen1(ssa.OpBroadcast1To2Float64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int64x2.Broadcast1To2", opLen1(ssa.OpBroadcast1To2Int64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x2.Broadcast1To2", opLen1(ssa.OpBroadcast1To2Uint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x4.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Float32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x2.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Float64x2, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x4.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Int32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int64x2.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Int64x2, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x4.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Uint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x2.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Uint64x2, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x4.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Float32x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x2.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Float64x2, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x8.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Int16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x4.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Int32x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int64x2.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Int64x2, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x8.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Uint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x4.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Uint32x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x2.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Uint64x2, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Float32x4, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Int8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x8.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Int16x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x4.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Int32x4, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Uint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x8.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Uint16x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x4.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Uint32x4, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.Broadcast1To32", opLen1(ssa.OpBroadcast1To32Int8x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x8.Broadcast1To32", opLen1(ssa.OpBroadcast1To32Int16x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.Broadcast1To32", opLen1(ssa.OpBroadcast1To32Uint8x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x8.Broadcast1To32", opLen1(ssa.OpBroadcast1To32Uint16x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.Broadcast1To64", opLen1(ssa.OpBroadcast1To64Int8x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.Broadcast1To64", opLen1(ssa.OpBroadcast1To64Uint8x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.Ceil", opLen1(ssa.OpCeilFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.Ceil", opLen1(ssa.OpCeilFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x2.Ceil", opLen1(ssa.OpCeilFloat64x2, types.TypeVec128), sys.AMD64)

View File

@@ -69,21 +69,36 @@
documentation: !string |-
// NAME performs an expansion on a vector x whose elements are packed to lower parts.
// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
- go: Broadcast128
- go: Broadcast1To2
commutative: false
documentation: !string |-
// NAME copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
- go: Broadcast256
// NAME copies the lowest element of its input to all 2 elements of
// the output vector.
- go: Broadcast1To4
commutative: false
documentation: !string |-
// NAME copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
- go: Broadcast512
// NAME copies the lowest element of its input to all 4 elements of
// the output vector.
- go: Broadcast1To8
commutative: false
documentation: !string |-
// NAME copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
// NAME copies the lowest element of its input to all 8 elements of
// the output vector.
- go: Broadcast1To16
commutative: false
documentation: !string |-
// NAME copies the lowest element of its input to all 16 elements of
// the output vector.
- go: Broadcast1To32
commutative: false
documentation: !string |-
// NAME copies the lowest element of its input to all 32 elements of
// the output vector.
- go: Broadcast1To64
commutative: false
documentation: !string |-
// NAME copies the lowest element of its input to all 64 elements of
// the output vector.
- go: PermuteOrZeroGrouped
commutative: false
documentation: !string |- # Detailed documentation will rely on the specific ops.

View File

@@ -376,21 +376,21 @@
out:
- *any
- go: Broadcast128
asm: VPBROADCAST[BWDQ]
- go: Broadcast1To2
asm: VPBROADCASTQ
in:
- class: vreg
bits: 128
elemBits: $e
elemBits: 64
base: $b
out:
- class: vreg
bits: 128
elemBits: $e
elemBits: 64
base: $b
# weirdly, this one case on AVX2 is memory-operand-only
- go: Broadcast128
- go: Broadcast1To2
asm: VPBROADCASTQ
in:
- class: vreg
@@ -405,70 +405,93 @@
base: int
OverwriteBase: float
- go: Broadcast256
- go: Broadcast1To4
asm: VPBROADCAST[BWDQ]
in:
- class: vreg
bits: 128
elemBits: $e
base: $b
out:
- class: vreg
bits: 256
elemBits: $e
lanes: 4
base: $b
- go: Broadcast512
- go: Broadcast1To8
asm: VPBROADCAST[BWDQ]
in:
- class: vreg
bits: 128
elemBits: $e
base: $b
out:
- class: vreg
bits: 512
elemBits: $e
lanes: 8
base: $b
- go: Broadcast128
- go: Broadcast1To16
asm: VPBROADCAST[BWDQ]
in:
- class: vreg
bits: 128
base: $b
out:
- class: vreg
lanes: 16
base: $b
- go: Broadcast1To32
asm: VPBROADCAST[BWDQ]
in:
- class: vreg
bits: 128
base: $b
out:
- class: vreg
lanes: 32
base: $b
- go: Broadcast1To64
asm: VPBROADCASTB
in:
- class: vreg
bits: 128
base: $b
out:
- class: vreg
lanes: 64
base: $b
- go: Broadcast1To4
asm: VBROADCASTS[SD]
in:
- class: vreg
bits: 128
elemBits: $e
base: $b
base: float
out:
- class: vreg
bits: 128
elemBits: $e
base: $b
lanes: 4
base: float
- go: Broadcast256
- go: Broadcast1To8
asm: VBROADCASTS[SD]
in:
- class: vreg
bits: 128
elemBits: $e
base: $b
base: float
out:
- class: vreg
bits: 256
elemBits: $e
base: $b
lanes: 8
base: float
- go: Broadcast512
- go: Broadcast1To16
asm: VBROADCASTS[SD]
in:
- class: vreg
bits: 128
elemBits: $e
base: $b
base: float
out:
- class: vreg
bits: 512
elemBits: $e
base: $b
lanes: 16
base: float
# VPSHUFB for 128-bit byte shuffles will be picked with higher priority than VPERMB, given its lower CPU feature requirement. (It's AVX)
- go: PermuteOrZero

View File

@@ -873,7 +873,7 @@ var broadcastTemplate = templateOf("Broadcast functions", `
// Emulated, CPU Feature: {{.CPUfeatureBC}}
func Broadcast{{.VType}}(x {{.Etype}}) {{.VType}} {
var z {{.As128BitVec }}
return z.SetElem(0, x).Broadcast{{.Vwidth}}()
return z.SetElem(0, x).Broadcast1To{{.Count}}()
}
`)

View File

@@ -805,191 +805,197 @@ func (x Uint16x16) Average(y Uint16x16) Uint16x16
// Asm: VPAVGW, CPU Feature: AVX512
func (x Uint16x32) Average(y Uint16x32) Uint16x32
/* Broadcast128 */
/* Broadcast1To2 */
// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
// Broadcast1To2 copies the lowest element of its input to all 2 elements of
// the output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX2
func (x Float64x2) Broadcast1To2() Float64x2
// Broadcast1To2 copies the lowest element of its input to all 2 elements of
// the output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX2
func (x Int64x2) Broadcast1To2() Int64x2
// Broadcast1To2 copies the lowest element of its input to all 2 elements of
// the output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX2
func (x Uint64x2) Broadcast1To2() Uint64x2
/* Broadcast1To4 */
// Broadcast1To4 copies the lowest element of its input to all 4 elements of
// the output vector.
//
// Asm: VBROADCASTSS, CPU Feature: AVX2
func (x Float32x4) Broadcast128() Float32x4
func (x Float32x4) Broadcast1To4() Float32x4
// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX2
func (x Float64x2) Broadcast128() Float64x2
// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTB, CPU Feature: AVX2
func (x Int8x16) Broadcast128() Int8x16
// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX2
func (x Int16x8) Broadcast128() Int16x8
// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX2
func (x Int32x4) Broadcast128() Int32x4
// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX2
func (x Int64x2) Broadcast128() Int64x2
// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTB, CPU Feature: AVX2
func (x Uint8x16) Broadcast128() Uint8x16
// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX2
func (x Uint16x8) Broadcast128() Uint16x8
// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX2
func (x Uint32x4) Broadcast128() Uint32x4
// Broadcast128 copies element zero of its (128-bit) input to all elements of
// the 128-bit output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX2
func (x Uint64x2) Broadcast128() Uint64x2
/* Broadcast256 */
// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VBROADCASTSS, CPU Feature: AVX2
func (x Float32x4) Broadcast256() Float32x8
// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
// Broadcast1To4 copies the lowest element of its input to all 4 elements of
// the output vector.
//
// Asm: VBROADCASTSD, CPU Feature: AVX2
func (x Float64x2) Broadcast256() Float64x4
func (x Float64x2) Broadcast1To4() Float64x4
// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VPBROADCASTB, CPU Feature: AVX2
func (x Int8x16) Broadcast256() Int8x32
// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX2
func (x Int16x8) Broadcast256() Int16x16
// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
// Broadcast1To4 copies the lowest element of its input to all 4 elements of
// the output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX2
func (x Int32x4) Broadcast256() Int32x8
func (x Int32x4) Broadcast1To4() Int32x4
// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
// Broadcast1To4 copies the lowest element of its input to all 4 elements of
// the output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX2
func (x Int64x2) Broadcast256() Int64x4
func (x Int64x2) Broadcast1To4() Int64x4
// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VPBROADCASTB, CPU Feature: AVX2
func (x Uint8x16) Broadcast256() Uint8x32
// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX2
func (x Uint16x8) Broadcast256() Uint16x16
// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
// Broadcast1To4 copies the lowest element of its input to all 4 elements of
// the output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX2
func (x Uint32x4) Broadcast256() Uint32x8
func (x Uint32x4) Broadcast1To4() Uint32x4
// Broadcast256 copies element zero of its (128-bit) input to all elements of
// the 256-bit output vector.
// Broadcast1To4 copies the lowest element of its input to all 4 elements of
// the output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX2
func (x Uint64x2) Broadcast256() Uint64x4
func (x Uint64x2) Broadcast1To4() Uint64x4
/* Broadcast512 */
/* Broadcast1To8 */
// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
// Broadcast1To8 copies the lowest element of its input to all 8 elements of
// the output vector.
//
// Asm: VBROADCASTSS, CPU Feature: AVX512
func (x Float32x4) Broadcast512() Float32x16
// Asm: VBROADCASTSS, CPU Feature: AVX2
func (x Float32x4) Broadcast1To8() Float32x8
// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
// Broadcast1To8 copies the lowest element of its input to all 8 elements of
// the output vector.
//
// Asm: VBROADCASTSD, CPU Feature: AVX512
func (x Float64x2) Broadcast512() Float64x8
func (x Float64x2) Broadcast1To8() Float64x8
// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
// Broadcast1To8 copies the lowest element of its input to all 8 elements of
// the output vector.
//
// Asm: VPBROADCASTB, CPU Feature: AVX512
func (x Int8x16) Broadcast512() Int8x64
// Asm: VPBROADCASTW, CPU Feature: AVX2
func (x Int16x8) Broadcast1To8() Int16x8
// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
// Broadcast1To8 copies the lowest element of its input to all 8 elements of
// the output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX512
func (x Int16x8) Broadcast512() Int16x32
// Asm: VPBROADCASTD, CPU Feature: AVX2
func (x Int32x4) Broadcast1To8() Int32x8
// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX512
func (x Int32x4) Broadcast512() Int32x16
// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
// Broadcast1To8 copies the lowest element of its input to all 8 elements of
// the output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX512
func (x Int64x2) Broadcast512() Int64x8
func (x Int64x2) Broadcast1To8() Int64x8
// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
// Broadcast1To8 copies the lowest element of its input to all 8 elements of
// the output vector.
//
// Asm: VPBROADCASTB, CPU Feature: AVX512
func (x Uint8x16) Broadcast512() Uint8x64
// Asm: VPBROADCASTW, CPU Feature: AVX2
func (x Uint16x8) Broadcast1To8() Uint16x8
// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
// Broadcast1To8 copies the lowest element of its input to all 8 elements of
// the output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX512
func (x Uint16x8) Broadcast512() Uint16x32
// Asm: VPBROADCASTD, CPU Feature: AVX2
func (x Uint32x4) Broadcast1To8() Uint32x8
// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX512
func (x Uint32x4) Broadcast512() Uint32x16
// Broadcast512 copies element zero of its (128-bit) input to all elements of
// the 512-bit output vector.
// Broadcast1To8 copies the lowest element of its input to all 8 elements of
// the output vector.
//
// Asm: VPBROADCASTQ, CPU Feature: AVX512
func (x Uint64x2) Broadcast512() Uint64x8
func (x Uint64x2) Broadcast1To8() Uint64x8
/* Broadcast1To16 */
// Broadcast1To16 copies the lowest element of its input to all 16 elements of
// the output vector.
//
// Asm: VBROADCASTSS, CPU Feature: AVX512
func (x Float32x4) Broadcast1To16() Float32x16
// Broadcast1To16 copies the lowest element of its input to all 16 elements of
// the output vector.
//
// Asm: VPBROADCASTB, CPU Feature: AVX2
func (x Int8x16) Broadcast1To16() Int8x16
// Broadcast1To16 copies the lowest element of its input to all 16 elements of
// the output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX2
func (x Int16x8) Broadcast1To16() Int16x16
// Broadcast1To16 copies the lowest element of its input to all 16 elements of
// the output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX512
func (x Int32x4) Broadcast1To16() Int32x16
// Broadcast1To16 copies the lowest element of its input to all 16 elements of
// the output vector.
//
// Asm: VPBROADCASTB, CPU Feature: AVX2
func (x Uint8x16) Broadcast1To16() Uint8x16
// Broadcast1To16 copies the lowest element of its input to all 16 elements of
// the output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX2
func (x Uint16x8) Broadcast1To16() Uint16x16
// Broadcast1To16 copies the lowest element of its input to all 16 elements of
// the output vector.
//
// Asm: VPBROADCASTD, CPU Feature: AVX512
func (x Uint32x4) Broadcast1To16() Uint32x16
/* Broadcast1To32 */
// Broadcast1To32 copies the lowest element of its input to all 32 elements of
// the output vector.
//
// Asm: VPBROADCASTB, CPU Feature: AVX2
func (x Int8x16) Broadcast1To32() Int8x32
// Broadcast1To32 copies the lowest element of its input to all 32 elements of
// the output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX512
func (x Int16x8) Broadcast1To32() Int16x32
// Broadcast1To32 copies the lowest element of its input to all 32 elements of
// the output vector.
//
// Asm: VPBROADCASTB, CPU Feature: AVX2
func (x Uint8x16) Broadcast1To32() Uint8x32
// Broadcast1To32 copies the lowest element of its input to all 32 elements of
// the output vector.
//
// Asm: VPBROADCASTW, CPU Feature: AVX512
func (x Uint16x8) Broadcast1To32() Uint16x32
/* Broadcast1To64 */
// Broadcast1To64 copies the lowest element of its input to all 64 elements of
// the output vector.
//
// Asm: VPBROADCASTB, CPU Feature: AVX512
func (x Int8x16) Broadcast1To64() Int8x64
// Broadcast1To64 copies the lowest element of its input to all 64 elements of
// the output vector.
//
// Asm: VPBROADCASTB, CPU Feature: AVX512
func (x Uint8x16) Broadcast1To64() Uint8x64
/* Ceil */

View File

@@ -10,7 +10,7 @@ package archsimd
// Emulated, CPU Feature: AVX2
func BroadcastInt8x16(x int8) Int8x16 {
var z Int8x16
return z.SetElem(0, x).Broadcast128()
return z.SetElem(0, x).Broadcast1To16()
}
// BroadcastInt16x8 returns a vector with the input
@@ -19,7 +19,7 @@ func BroadcastInt8x16(x int8) Int8x16 {
// Emulated, CPU Feature: AVX2
func BroadcastInt16x8(x int16) Int16x8 {
var z Int16x8
return z.SetElem(0, x).Broadcast128()
return z.SetElem(0, x).Broadcast1To8()
}
// BroadcastInt32x4 returns a vector with the input
@@ -28,7 +28,7 @@ func BroadcastInt16x8(x int16) Int16x8 {
// Emulated, CPU Feature: AVX2
func BroadcastInt32x4(x int32) Int32x4 {
var z Int32x4
return z.SetElem(0, x).Broadcast128()
return z.SetElem(0, x).Broadcast1To4()
}
// BroadcastInt64x2 returns a vector with the input
@@ -37,7 +37,7 @@ func BroadcastInt32x4(x int32) Int32x4 {
// Emulated, CPU Feature: AVX2
func BroadcastInt64x2(x int64) Int64x2 {
var z Int64x2
return z.SetElem(0, x).Broadcast128()
return z.SetElem(0, x).Broadcast1To2()
}
// BroadcastUint8x16 returns a vector with the input
@@ -46,7 +46,7 @@ func BroadcastInt64x2(x int64) Int64x2 {
// Emulated, CPU Feature: AVX2
func BroadcastUint8x16(x uint8) Uint8x16 {
var z Uint8x16
return z.SetElem(0, x).Broadcast128()
return z.SetElem(0, x).Broadcast1To16()
}
// BroadcastUint16x8 returns a vector with the input
@@ -55,7 +55,7 @@ func BroadcastUint8x16(x uint8) Uint8x16 {
// Emulated, CPU Feature: AVX2
func BroadcastUint16x8(x uint16) Uint16x8 {
var z Uint16x8
return z.SetElem(0, x).Broadcast128()
return z.SetElem(0, x).Broadcast1To8()
}
// BroadcastUint32x4 returns a vector with the input
@@ -64,7 +64,7 @@ func BroadcastUint16x8(x uint16) Uint16x8 {
// Emulated, CPU Feature: AVX2
func BroadcastUint32x4(x uint32) Uint32x4 {
var z Uint32x4
return z.SetElem(0, x).Broadcast128()
return z.SetElem(0, x).Broadcast1To4()
}
// BroadcastUint64x2 returns a vector with the input
@@ -73,7 +73,7 @@ func BroadcastUint32x4(x uint32) Uint32x4 {
// Emulated, CPU Feature: AVX2
func BroadcastUint64x2(x uint64) Uint64x2 {
var z Uint64x2
return z.SetElem(0, x).Broadcast128()
return z.SetElem(0, x).Broadcast1To2()
}
// BroadcastFloat32x4 returns a vector with the input
@@ -82,7 +82,7 @@ func BroadcastUint64x2(x uint64) Uint64x2 {
// Emulated, CPU Feature: AVX2
func BroadcastFloat32x4(x float32) Float32x4 {
var z Float32x4
return z.SetElem(0, x).Broadcast128()
return z.SetElem(0, x).Broadcast1To4()
}
// BroadcastFloat64x2 returns a vector with the input
@@ -91,7 +91,7 @@ func BroadcastFloat32x4(x float32) Float32x4 {
// Emulated, CPU Feature: AVX2
func BroadcastFloat64x2(x float64) Float64x2 {
var z Float64x2
return z.SetElem(0, x).Broadcast128()
return z.SetElem(0, x).Broadcast1To2()
}
// BroadcastInt8x32 returns a vector with the input
@@ -100,7 +100,7 @@ func BroadcastFloat64x2(x float64) Float64x2 {
// Emulated, CPU Feature: AVX2
func BroadcastInt8x32(x int8) Int8x32 {
var z Int8x16
return z.SetElem(0, x).Broadcast256()
return z.SetElem(0, x).Broadcast1To32()
}
// BroadcastInt16x16 returns a vector with the input
@@ -109,7 +109,7 @@ func BroadcastInt8x32(x int8) Int8x32 {
// Emulated, CPU Feature: AVX2
func BroadcastInt16x16(x int16) Int16x16 {
var z Int16x8
return z.SetElem(0, x).Broadcast256()
return z.SetElem(0, x).Broadcast1To16()
}
// BroadcastInt32x8 returns a vector with the input
@@ -118,7 +118,7 @@ func BroadcastInt16x16(x int16) Int16x16 {
// Emulated, CPU Feature: AVX2
func BroadcastInt32x8(x int32) Int32x8 {
var z Int32x4
return z.SetElem(0, x).Broadcast256()
return z.SetElem(0, x).Broadcast1To8()
}
// BroadcastInt64x4 returns a vector with the input
@@ -127,7 +127,7 @@ func BroadcastInt32x8(x int32) Int32x8 {
// Emulated, CPU Feature: AVX2
func BroadcastInt64x4(x int64) Int64x4 {
var z Int64x2
return z.SetElem(0, x).Broadcast256()
return z.SetElem(0, x).Broadcast1To4()
}
// BroadcastUint8x32 returns a vector with the input
@@ -136,7 +136,7 @@ func BroadcastInt64x4(x int64) Int64x4 {
// Emulated, CPU Feature: AVX2
func BroadcastUint8x32(x uint8) Uint8x32 {
var z Uint8x16
return z.SetElem(0, x).Broadcast256()
return z.SetElem(0, x).Broadcast1To32()
}
// BroadcastUint16x16 returns a vector with the input
@@ -145,7 +145,7 @@ func BroadcastUint8x32(x uint8) Uint8x32 {
// Emulated, CPU Feature: AVX2
func BroadcastUint16x16(x uint16) Uint16x16 {
var z Uint16x8
return z.SetElem(0, x).Broadcast256()
return z.SetElem(0, x).Broadcast1To16()
}
// BroadcastUint32x8 returns a vector with the input
@@ -154,7 +154,7 @@ func BroadcastUint16x16(x uint16) Uint16x16 {
// Emulated, CPU Feature: AVX2
func BroadcastUint32x8(x uint32) Uint32x8 {
var z Uint32x4
return z.SetElem(0, x).Broadcast256()
return z.SetElem(0, x).Broadcast1To8()
}
// BroadcastUint64x4 returns a vector with the input
@@ -163,7 +163,7 @@ func BroadcastUint32x8(x uint32) Uint32x8 {
// Emulated, CPU Feature: AVX2
func BroadcastUint64x4(x uint64) Uint64x4 {
var z Uint64x2
return z.SetElem(0, x).Broadcast256()
return z.SetElem(0, x).Broadcast1To4()
}
// BroadcastFloat32x8 returns a vector with the input
@@ -172,7 +172,7 @@ func BroadcastUint64x4(x uint64) Uint64x4 {
// Emulated, CPU Feature: AVX2
func BroadcastFloat32x8(x float32) Float32x8 {
var z Float32x4
return z.SetElem(0, x).Broadcast256()
return z.SetElem(0, x).Broadcast1To8()
}
// BroadcastFloat64x4 returns a vector with the input
@@ -181,7 +181,7 @@ func BroadcastFloat32x8(x float32) Float32x8 {
// Emulated, CPU Feature: AVX2
func BroadcastFloat64x4(x float64) Float64x4 {
var z Float64x2
return z.SetElem(0, x).Broadcast256()
return z.SetElem(0, x).Broadcast1To4()
}
// BroadcastInt8x64 returns a vector with the input
@@ -190,7 +190,7 @@ func BroadcastFloat64x4(x float64) Float64x4 {
// Emulated, CPU Feature: AVX512BW
func BroadcastInt8x64(x int8) Int8x64 {
var z Int8x16
return z.SetElem(0, x).Broadcast512()
return z.SetElem(0, x).Broadcast1To64()
}
// BroadcastInt16x32 returns a vector with the input
@@ -199,7 +199,7 @@ func BroadcastInt8x64(x int8) Int8x64 {
// Emulated, CPU Feature: AVX512BW
func BroadcastInt16x32(x int16) Int16x32 {
var z Int16x8
return z.SetElem(0, x).Broadcast512()
return z.SetElem(0, x).Broadcast1To32()
}
// BroadcastInt32x16 returns a vector with the input
@@ -208,7 +208,7 @@ func BroadcastInt16x32(x int16) Int16x32 {
// Emulated, CPU Feature: AVX512F
func BroadcastInt32x16(x int32) Int32x16 {
var z Int32x4
return z.SetElem(0, x).Broadcast512()
return z.SetElem(0, x).Broadcast1To16()
}
// BroadcastInt64x8 returns a vector with the input
@@ -217,7 +217,7 @@ func BroadcastInt32x16(x int32) Int32x16 {
// Emulated, CPU Feature: AVX512F
func BroadcastInt64x8(x int64) Int64x8 {
var z Int64x2
return z.SetElem(0, x).Broadcast512()
return z.SetElem(0, x).Broadcast1To8()
}
// BroadcastUint8x64 returns a vector with the input
@@ -226,7 +226,7 @@ func BroadcastInt64x8(x int64) Int64x8 {
// Emulated, CPU Feature: AVX512BW
func BroadcastUint8x64(x uint8) Uint8x64 {
var z Uint8x16
return z.SetElem(0, x).Broadcast512()
return z.SetElem(0, x).Broadcast1To64()
}
// BroadcastUint16x32 returns a vector with the input
@@ -235,7 +235,7 @@ func BroadcastUint8x64(x uint8) Uint8x64 {
// Emulated, CPU Feature: AVX512BW
func BroadcastUint16x32(x uint16) Uint16x32 {
var z Uint16x8
return z.SetElem(0, x).Broadcast512()
return z.SetElem(0, x).Broadcast1To32()
}
// BroadcastUint32x16 returns a vector with the input
@@ -244,7 +244,7 @@ func BroadcastUint16x32(x uint16) Uint16x32 {
// Emulated, CPU Feature: AVX512F
func BroadcastUint32x16(x uint32) Uint32x16 {
var z Uint32x4
return z.SetElem(0, x).Broadcast512()
return z.SetElem(0, x).Broadcast1To16()
}
// BroadcastUint64x8 returns a vector with the input
@@ -253,7 +253,7 @@ func BroadcastUint32x16(x uint32) Uint32x16 {
// Emulated, CPU Feature: AVX512F
func BroadcastUint64x8(x uint64) Uint64x8 {
var z Uint64x2
return z.SetElem(0, x).Broadcast512()
return z.SetElem(0, x).Broadcast1To8()
}
// BroadcastFloat32x16 returns a vector with the input
@@ -262,7 +262,7 @@ func BroadcastUint64x8(x uint64) Uint64x8 {
// Emulated, CPU Feature: AVX512F
func BroadcastFloat32x16(x float32) Float32x16 {
var z Float32x4
return z.SetElem(0, x).Broadcast512()
return z.SetElem(0, x).Broadcast1To16()
}
// BroadcastFloat64x8 returns a vector with the input
@@ -271,7 +271,7 @@ func BroadcastFloat32x16(x float32) Float32x16 {
// Emulated, CPU Feature: AVX512F
func BroadcastFloat64x8(x float64) Float64x8 {
var z Float64x2
return z.SetElem(0, x).Broadcast512()
return z.SetElem(0, x).Broadcast1To8()
}
// ToMask converts from Int8x16 to Mask8x16, mask element is set to true when the corresponding vector element is non-zero.