mirror of
https://github.com/golang/go.git
synced 2026-01-29 07:02:05 +03:00
simd/archsimd: add Grouped for 256- and 512-bit SaturateTo(U)Int16Concat, and fix type
They operate on 128-bit groups, so name them Grouped to be clear, and consistent with other grouped operations. Reword the documentation, mention the grouping only for grouped versions. Also, SaturateToUnt16Concat(Grouped) is a signed int32 to unsigned uint16 saturated conversion. The receiver and the parameter should be signed. The result remains unsigned. Change-Id: I30e28bc05e07f5c28214c9c6d9d201cbbb183468 Reviewed-on: https://go-review.googlesource.com/c/go/+/731501 Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
@@ -739,12 +739,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||
ssa.OpAMD64VPRORVQMasked128,
|
||||
ssa.OpAMD64VPRORVQMasked256,
|
||||
ssa.OpAMD64VPRORVQMasked512,
|
||||
ssa.OpAMD64VPACKSSDWMasked128,
|
||||
ssa.OpAMD64VPACKSSDWMasked256,
|
||||
ssa.OpAMD64VPACKSSDWMasked512,
|
||||
ssa.OpAMD64VPACKUSDWMasked128,
|
||||
ssa.OpAMD64VPACKSSDWMasked128,
|
||||
ssa.OpAMD64VPACKUSDWMasked256,
|
||||
ssa.OpAMD64VPACKUSDWMasked512,
|
||||
ssa.OpAMD64VPACKUSDWMasked128,
|
||||
ssa.OpAMD64VSCALEFPSMasked128,
|
||||
ssa.OpAMD64VSCALEFPSMasked256,
|
||||
ssa.OpAMD64VSCALEFPSMasked512,
|
||||
@@ -1575,12 +1575,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||
ssa.OpAMD64VPRORVQMasked128Merging,
|
||||
ssa.OpAMD64VPRORVQMasked256Merging,
|
||||
ssa.OpAMD64VPRORVQMasked512Merging,
|
||||
ssa.OpAMD64VPACKSSDWMasked128Merging,
|
||||
ssa.OpAMD64VPACKSSDWMasked256Merging,
|
||||
ssa.OpAMD64VPACKSSDWMasked512Merging,
|
||||
ssa.OpAMD64VPACKUSDWMasked128Merging,
|
||||
ssa.OpAMD64VPACKSSDWMasked128Merging,
|
||||
ssa.OpAMD64VPACKUSDWMasked256Merging,
|
||||
ssa.OpAMD64VPACKUSDWMasked512Merging,
|
||||
ssa.OpAMD64VPACKUSDWMasked128Merging,
|
||||
ssa.OpAMD64VSCALEFPSMasked128Merging,
|
||||
ssa.OpAMD64VSCALEFPSMasked256Merging,
|
||||
ssa.OpAMD64VSCALEFPSMasked512Merging,
|
||||
@@ -2162,12 +2162,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||
ssa.OpAMD64VPRORVQMasked128load,
|
||||
ssa.OpAMD64VPRORVQMasked256load,
|
||||
ssa.OpAMD64VPRORVQMasked512load,
|
||||
ssa.OpAMD64VPACKSSDWMasked128load,
|
||||
ssa.OpAMD64VPACKSSDWMasked256load,
|
||||
ssa.OpAMD64VPACKSSDWMasked512load,
|
||||
ssa.OpAMD64VPACKUSDWMasked128load,
|
||||
ssa.OpAMD64VPACKSSDWMasked128load,
|
||||
ssa.OpAMD64VPACKUSDWMasked256load,
|
||||
ssa.OpAMD64VPACKUSDWMasked512load,
|
||||
ssa.OpAMD64VPACKUSDWMasked128load,
|
||||
ssa.OpAMD64VSCALEFPSMasked128load,
|
||||
ssa.OpAMD64VSCALEFPSMasked256load,
|
||||
ssa.OpAMD64VSCALEFPSMasked512load,
|
||||
@@ -3439,12 +3439,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||
ssa.OpAMD64VPMOVSQBMasked128_128,
|
||||
ssa.OpAMD64VPMOVSQBMasked128_256,
|
||||
ssa.OpAMD64VPMOVSQBMasked128_512,
|
||||
ssa.OpAMD64VPACKSSDWMasked128,
|
||||
ssa.OpAMD64VPACKSSDWMasked128load,
|
||||
ssa.OpAMD64VPACKSSDWMasked256,
|
||||
ssa.OpAMD64VPACKSSDWMasked256load,
|
||||
ssa.OpAMD64VPACKSSDWMasked512,
|
||||
ssa.OpAMD64VPACKSSDWMasked512load,
|
||||
ssa.OpAMD64VPACKSSDWMasked128,
|
||||
ssa.OpAMD64VPACKSSDWMasked128load,
|
||||
ssa.OpAMD64VPMOVSDWMasked128_128,
|
||||
ssa.OpAMD64VPMOVSDWMasked128_256,
|
||||
ssa.OpAMD64VPMOVSDWMasked256,
|
||||
@@ -3463,12 +3463,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||
ssa.OpAMD64VPMOVUSQBMasked128_128,
|
||||
ssa.OpAMD64VPMOVUSQBMasked128_256,
|
||||
ssa.OpAMD64VPMOVUSQBMasked128_512,
|
||||
ssa.OpAMD64VPACKUSDWMasked128,
|
||||
ssa.OpAMD64VPACKUSDWMasked128load,
|
||||
ssa.OpAMD64VPACKUSDWMasked256,
|
||||
ssa.OpAMD64VPACKUSDWMasked256load,
|
||||
ssa.OpAMD64VPACKUSDWMasked512,
|
||||
ssa.OpAMD64VPACKUSDWMasked512load,
|
||||
ssa.OpAMD64VPACKUSDWMasked128,
|
||||
ssa.OpAMD64VPACKUSDWMasked128load,
|
||||
ssa.OpAMD64VPMOVUSDWMasked128_128,
|
||||
ssa.OpAMD64VPMOVUSDWMasked128_256,
|
||||
ssa.OpAMD64VPMOVUSDWMasked256,
|
||||
|
||||
@@ -914,8 +914,8 @@
|
||||
(SaturateToInt16Int64x4 ...) => (VPMOVSQW128_256 ...)
|
||||
(SaturateToInt16Int64x8 ...) => (VPMOVSQW128_512 ...)
|
||||
(SaturateToInt16ConcatInt32x4 ...) => (VPACKSSDW128 ...)
|
||||
(SaturateToInt16ConcatInt32x8 ...) => (VPACKSSDW256 ...)
|
||||
(SaturateToInt16ConcatInt32x16 ...) => (VPACKSSDW512 ...)
|
||||
(SaturateToInt16ConcatGroupedInt32x8 ...) => (VPACKSSDW256 ...)
|
||||
(SaturateToInt16ConcatGroupedInt32x16 ...) => (VPACKSSDW512 ...)
|
||||
(SaturateToInt32Int64x2 ...) => (VPMOVSQD128_128 ...)
|
||||
(SaturateToInt32Int64x4 ...) => (VPMOVSQD128_256 ...)
|
||||
(SaturateToInt32Int64x8 ...) => (VPMOVSQD256 ...)
|
||||
@@ -934,9 +934,9 @@
|
||||
(SaturateToUint16Uint64x2 ...) => (VPMOVUSQW128_128 ...)
|
||||
(SaturateToUint16Uint64x4 ...) => (VPMOVUSQW128_256 ...)
|
||||
(SaturateToUint16Uint64x8 ...) => (VPMOVUSQW128_512 ...)
|
||||
(SaturateToUint16ConcatUint32x4 ...) => (VPACKUSDW128 ...)
|
||||
(SaturateToUint16ConcatUint32x8 ...) => (VPACKUSDW256 ...)
|
||||
(SaturateToUint16ConcatUint32x16 ...) => (VPACKUSDW512 ...)
|
||||
(SaturateToUint16ConcatInt32x4 ...) => (VPACKUSDW128 ...)
|
||||
(SaturateToUint16ConcatGroupedInt32x8 ...) => (VPACKUSDW256 ...)
|
||||
(SaturateToUint16ConcatGroupedInt32x16 ...) => (VPACKUSDW512 ...)
|
||||
(SaturateToUint32Uint64x2 ...) => (VPMOVUSQD128_128 ...)
|
||||
(SaturateToUint32Uint64x4 ...) => (VPMOVUSQD128_256 ...)
|
||||
(SaturateToUint32Uint64x8 ...) => (VPMOVUSQD256 ...)
|
||||
@@ -1775,9 +1775,9 @@
|
||||
(VMOVDQU64Masked128 (VPMOVSQB128_128 x) mask) => (VPMOVSQBMasked128_128 x mask)
|
||||
(VMOVDQU64Masked256 (VPMOVSQB128_256 x) mask) => (VPMOVSQBMasked128_256 x mask)
|
||||
(VMOVDQU64Masked512 (VPMOVSQB128_512 x) mask) => (VPMOVSQBMasked128_512 x mask)
|
||||
(VMOVDQU32Masked128 (VPACKSSDW128 x y) mask) => (VPACKSSDWMasked128 x y mask)
|
||||
(VMOVDQU32Masked256 (VPACKSSDW256 x y) mask) => (VPACKSSDWMasked256 x y mask)
|
||||
(VMOVDQU32Masked512 (VPACKSSDW512 x y) mask) => (VPACKSSDWMasked512 x y mask)
|
||||
(VMOVDQU32Masked128 (VPACKSSDW128 x y) mask) => (VPACKSSDWMasked128 x y mask)
|
||||
(VMOVDQU32Masked128 (VPMOVSDW128_128 x) mask) => (VPMOVSDWMasked128_128 x mask)
|
||||
(VMOVDQU32Masked256 (VPMOVSDW128_256 x) mask) => (VPMOVSDWMasked128_256 x mask)
|
||||
(VMOVDQU32Masked256 (VPMOVSDW256 x) mask) => (VPMOVSDWMasked256 x mask)
|
||||
@@ -1796,9 +1796,9 @@
|
||||
(VMOVDQU64Masked128 (VPMOVUSQB128_128 x) mask) => (VPMOVUSQBMasked128_128 x mask)
|
||||
(VMOVDQU64Masked256 (VPMOVUSQB128_256 x) mask) => (VPMOVUSQBMasked128_256 x mask)
|
||||
(VMOVDQU64Masked512 (VPMOVUSQB128_512 x) mask) => (VPMOVUSQBMasked128_512 x mask)
|
||||
(VMOVDQU32Masked128 (VPACKUSDW128 x y) mask) => (VPACKUSDWMasked128 x y mask)
|
||||
(VMOVDQU32Masked256 (VPACKUSDW256 x y) mask) => (VPACKUSDWMasked256 x y mask)
|
||||
(VMOVDQU32Masked512 (VPACKUSDW512 x y) mask) => (VPACKUSDWMasked512 x y mask)
|
||||
(VMOVDQU32Masked128 (VPACKUSDW128 x y) mask) => (VPACKUSDWMasked128 x y mask)
|
||||
(VMOVDQU32Masked128 (VPMOVUSDW128_128 x) mask) => (VPMOVUSDWMasked128_128 x mask)
|
||||
(VMOVDQU32Masked256 (VPMOVUSDW128_256 x) mask) => (VPMOVUSDWMasked128_256 x mask)
|
||||
(VMOVDQU32Masked256 (VPMOVUSDW256 x) mask) => (VPMOVUSDWMasked256 x mask)
|
||||
@@ -2948,13 +2948,13 @@
|
||||
(VPRORVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORVQMasked256load {sym} [off] x ptr mask mem)
|
||||
(VPRORVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORVQMasked512load {sym} [off] x ptr mask mem)
|
||||
(VPACKSSDW512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDW512load {sym} [off] x ptr mem)
|
||||
(VPACKSSDWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked128load {sym} [off] x ptr mask mem)
|
||||
(VPACKSSDWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked256load {sym} [off] x ptr mask mem)
|
||||
(VPACKSSDWMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked512load {sym} [off] x ptr mask mem)
|
||||
(VPACKSSDWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked128load {sym} [off] x ptr mask mem)
|
||||
(VPACKUSDW512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDW512load {sym} [off] x ptr mem)
|
||||
(VPACKUSDWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDWMasked128load {sym} [off] x ptr mask mem)
|
||||
(VPACKUSDWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDWMasked256load {sym} [off] x ptr mask mem)
|
||||
(VPACKUSDWMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDWMasked512load {sym} [off] x ptr mask mem)
|
||||
(VPACKUSDWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDWMasked128load {sym} [off] x ptr mask mem)
|
||||
(VSCALEFPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPS128load {sym} [off] x ptr mem)
|
||||
(VSCALEFPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPS256load {sym} [off] x ptr mem)
|
||||
(VSCALEFPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPS512load {sym} [off] x ptr mem)
|
||||
|
||||
@@ -830,9 +830,9 @@ func simdGenericOps() []opData {
|
||||
{name: "SaturateToInt8Int64x2", argLength: 1, commutative: false},
|
||||
{name: "SaturateToInt8Int64x4", argLength: 1, commutative: false},
|
||||
{name: "SaturateToInt8Int64x8", argLength: 1, commutative: false},
|
||||
{name: "SaturateToInt16ConcatGroupedInt32x8", argLength: 2, commutative: false},
|
||||
{name: "SaturateToInt16ConcatGroupedInt32x16", argLength: 2, commutative: false},
|
||||
{name: "SaturateToInt16ConcatInt32x4", argLength: 2, commutative: false},
|
||||
{name: "SaturateToInt16ConcatInt32x8", argLength: 2, commutative: false},
|
||||
{name: "SaturateToInt16ConcatInt32x16", argLength: 2, commutative: false},
|
||||
{name: "SaturateToInt16Int32x4", argLength: 1, commutative: false},
|
||||
{name: "SaturateToInt16Int32x8", argLength: 1, commutative: false},
|
||||
{name: "SaturateToInt16Int32x16", argLength: 1, commutative: false},
|
||||
@@ -851,9 +851,9 @@ func simdGenericOps() []opData {
|
||||
{name: "SaturateToUint8Uint64x2", argLength: 1, commutative: false},
|
||||
{name: "SaturateToUint8Uint64x4", argLength: 1, commutative: false},
|
||||
{name: "SaturateToUint8Uint64x8", argLength: 1, commutative: false},
|
||||
{name: "SaturateToUint16ConcatUint32x4", argLength: 2, commutative: false},
|
||||
{name: "SaturateToUint16ConcatUint32x8", argLength: 2, commutative: false},
|
||||
{name: "SaturateToUint16ConcatUint32x16", argLength: 2, commutative: false},
|
||||
{name: "SaturateToUint16ConcatGroupedInt32x8", argLength: 2, commutative: false},
|
||||
{name: "SaturateToUint16ConcatGroupedInt32x16", argLength: 2, commutative: false},
|
||||
{name: "SaturateToUint16ConcatInt32x4", argLength: 2, commutative: false},
|
||||
{name: "SaturateToUint16Uint32x4", argLength: 1, commutative: false},
|
||||
{name: "SaturateToUint16Uint32x8", argLength: 1, commutative: false},
|
||||
{name: "SaturateToUint16Uint32x16", argLength: 1, commutative: false},
|
||||
|
||||
@@ -7004,9 +7004,9 @@ const (
|
||||
OpSaturateToInt8Int64x2
|
||||
OpSaturateToInt8Int64x4
|
||||
OpSaturateToInt8Int64x8
|
||||
OpSaturateToInt16ConcatGroupedInt32x8
|
||||
OpSaturateToInt16ConcatGroupedInt32x16
|
||||
OpSaturateToInt16ConcatInt32x4
|
||||
OpSaturateToInt16ConcatInt32x8
|
||||
OpSaturateToInt16ConcatInt32x16
|
||||
OpSaturateToInt16Int32x4
|
||||
OpSaturateToInt16Int32x8
|
||||
OpSaturateToInt16Int32x16
|
||||
@@ -7025,9 +7025,9 @@ const (
|
||||
OpSaturateToUint8Uint64x2
|
||||
OpSaturateToUint8Uint64x4
|
||||
OpSaturateToUint8Uint64x8
|
||||
OpSaturateToUint16ConcatUint32x4
|
||||
OpSaturateToUint16ConcatUint32x8
|
||||
OpSaturateToUint16ConcatUint32x16
|
||||
OpSaturateToUint16ConcatGroupedInt32x8
|
||||
OpSaturateToUint16ConcatGroupedInt32x16
|
||||
OpSaturateToUint16ConcatInt32x4
|
||||
OpSaturateToUint16Uint32x4
|
||||
OpSaturateToUint16Uint32x8
|
||||
OpSaturateToUint16Uint32x16
|
||||
@@ -93737,21 +93737,21 @@ var opcodeTable = [...]opInfo{
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "SaturateToInt16ConcatGroupedInt32x8",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "SaturateToInt16ConcatGroupedInt32x16",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "SaturateToInt16ConcatInt32x4",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "SaturateToInt16ConcatInt32x8",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "SaturateToInt16ConcatInt32x16",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "SaturateToInt16Int32x4",
|
||||
argLen: 1,
|
||||
@@ -93843,17 +93843,17 @@ var opcodeTable = [...]opInfo{
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "SaturateToUint16ConcatUint32x4",
|
||||
name: "SaturateToUint16ConcatGroupedInt32x8",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "SaturateToUint16ConcatUint32x8",
|
||||
name: "SaturateToUint16ConcatGroupedInt32x16",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "SaturateToUint16ConcatUint32x16",
|
||||
name: "SaturateToUint16ConcatInt32x4",
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
|
||||
@@ -5040,15 +5040,15 @@ func rewriteValueAMD64(v *Value) bool {
|
||||
case OpSHA256TwoRoundsUint32x4:
|
||||
v.Op = OpAMD64SHA256RNDS2128
|
||||
return true
|
||||
case OpSaturateToInt16ConcatInt32x16:
|
||||
case OpSaturateToInt16ConcatGroupedInt32x16:
|
||||
v.Op = OpAMD64VPACKSSDW512
|
||||
return true
|
||||
case OpSaturateToInt16ConcatGroupedInt32x8:
|
||||
v.Op = OpAMD64VPACKSSDW256
|
||||
return true
|
||||
case OpSaturateToInt16ConcatInt32x4:
|
||||
v.Op = OpAMD64VPACKSSDW128
|
||||
return true
|
||||
case OpSaturateToInt16ConcatInt32x8:
|
||||
v.Op = OpAMD64VPACKSSDW256
|
||||
return true
|
||||
case OpSaturateToInt16Int32x16:
|
||||
v.Op = OpAMD64VPMOVSDW256
|
||||
return true
|
||||
@@ -5103,15 +5103,15 @@ func rewriteValueAMD64(v *Value) bool {
|
||||
case OpSaturateToInt8Int64x8:
|
||||
v.Op = OpAMD64VPMOVSQB128_512
|
||||
return true
|
||||
case OpSaturateToUint16ConcatUint32x16:
|
||||
case OpSaturateToUint16ConcatGroupedInt32x16:
|
||||
v.Op = OpAMD64VPACKUSDW512
|
||||
return true
|
||||
case OpSaturateToUint16ConcatUint32x4:
|
||||
v.Op = OpAMD64VPACKUSDW128
|
||||
return true
|
||||
case OpSaturateToUint16ConcatUint32x8:
|
||||
case OpSaturateToUint16ConcatGroupedInt32x8:
|
||||
v.Op = OpAMD64VPACKUSDW256
|
||||
return true
|
||||
case OpSaturateToUint16ConcatInt32x4:
|
||||
v.Op = OpAMD64VPACKUSDW128
|
||||
return true
|
||||
case OpSaturateToUint16Uint32x16:
|
||||
v.Op = OpAMD64VPMOVUSDW256
|
||||
return true
|
||||
|
||||
@@ -926,8 +926,8 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
||||
addF(simdPackage, "Int64x4.SaturateToInt16", opLen1(ssa.OpSaturateToInt16Int64x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int64x8.SaturateToInt16", opLen1(ssa.OpSaturateToInt16Int64x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.SaturateToInt16Concat", opLen2(ssa.OpSaturateToInt16ConcatInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.SaturateToInt16Concat", opLen2(ssa.OpSaturateToInt16ConcatInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x16.SaturateToInt16Concat", opLen2(ssa.OpSaturateToInt16ConcatInt32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.SaturateToInt16ConcatGrouped", opLen2(ssa.OpSaturateToInt16ConcatGroupedInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x16.SaturateToInt16ConcatGrouped", opLen2(ssa.OpSaturateToInt16ConcatGroupedInt32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int64x2.SaturateToInt32", opLen1(ssa.OpSaturateToInt32Int64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int64x4.SaturateToInt32", opLen1(ssa.OpSaturateToInt32Int64x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int64x8.SaturateToInt32", opLen1(ssa.OpSaturateToInt32Int64x8, types.TypeVec256), sys.AMD64)
|
||||
@@ -946,9 +946,9 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
||||
addF(simdPackage, "Uint64x2.SaturateToUint16", opLen1(ssa.OpSaturateToUint16Uint64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x4.SaturateToUint16", opLen1(ssa.OpSaturateToUint16Uint64x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x8.SaturateToUint16", opLen1(ssa.OpSaturateToUint16Uint64x8, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x4.SaturateToUint16Concat", opLen2(ssa.OpSaturateToUint16ConcatUint32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x8.SaturateToUint16Concat", opLen2(ssa.OpSaturateToUint16ConcatUint32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x16.SaturateToUint16Concat", opLen2(ssa.OpSaturateToUint16ConcatUint32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Int32x4.SaturateToUint16Concat", opLen2(ssa.OpSaturateToUint16ConcatInt32x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.SaturateToUint16ConcatGrouped", opLen2(ssa.OpSaturateToUint16ConcatGroupedInt32x8, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Int32x16.SaturateToUint16ConcatGrouped", opLen2(ssa.OpSaturateToUint16ConcatGroupedInt32x16, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x2.SaturateToUint32", opLen1(ssa.OpSaturateToUint32Uint64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x4.SaturateToUint32", opLen1(ssa.OpSaturateToUint32Uint64x4, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x8.SaturateToUint32", opLen1(ssa.OpSaturateToUint32Uint64x8, types.TypeVec256), sys.AMD64)
|
||||
|
||||
@@ -142,7 +142,7 @@ func (o *Operation) DecodeUnified(v *unify.Value) error {
|
||||
outLanes := o.Out[0].Lanes
|
||||
if inLanes != nil && outLanes != nil && *inLanes < *outLanes {
|
||||
if (strings.Contains(o.Go, "Saturate") || strings.Contains(o.Go, "Truncate")) &&
|
||||
!strings.HasSuffix(o.Go, "Concat") {
|
||||
!strings.Contains(o.Go, "Concat") {
|
||||
o.Documentation += "\n// Results are packed to low elements in the returned vector, its upper elements are zeroed."
|
||||
}
|
||||
}
|
||||
|
||||
@@ -64,7 +64,7 @@
|
||||
regexpTag: "convert"
|
||||
documentation: !string |-
|
||||
// NAME truncates element values to int16.
|
||||
- go: "SaturateToInt16(Concat)?"
|
||||
- go: "SaturateToInt16(Concat(Grouped)?)?"
|
||||
commutative: false
|
||||
regexpTag: "convert"
|
||||
documentation: !string |-
|
||||
@@ -109,7 +109,7 @@
|
||||
regexpTag: "convert"
|
||||
documentation: !string |-
|
||||
// NAME truncates element values to uint16.
|
||||
- go: "SaturateToUint16(Concat)?"
|
||||
- go: "SaturateToUint16(Concat(Grouped)?)?"
|
||||
commutative: false
|
||||
regexpTag: "convert"
|
||||
documentation: !string |-
|
||||
|
||||
@@ -446,22 +446,48 @@
|
||||
asm: "VPACKSSDW"
|
||||
addDoc: &satDocConcat
|
||||
!string |-
|
||||
// With each 128-bit as a group:
|
||||
// The converted group from the first input vector will be packed to the lower part of the result vector,
|
||||
// the converted group from the second input vector will be packed to the upper part of the result vector.
|
||||
// The converted elements from x will be packed to the lower part of the result vector,
|
||||
// the converted elements from y will be packed to the upper part of the result vector.
|
||||
in:
|
||||
- base: int
|
||||
- base: int
|
||||
out:
|
||||
- base: int
|
||||
bits: 128
|
||||
- go: SaturateToInt16ConcatGrouped
|
||||
regexpTag: "convert"
|
||||
asm: "VPACKSSDW"
|
||||
addDoc: &satDocConcatGrouped
|
||||
!string |-
|
||||
// With each 128-bit as a group:
|
||||
// The converted elements from x will be packed to the lower part of the group in the result vector,
|
||||
// the converted elements from y will be packed to the upper part of the group in the result vector.
|
||||
in:
|
||||
- base: int
|
||||
- base: int
|
||||
out:
|
||||
- base: int
|
||||
bits: 256|512
|
||||
- go: SaturateToUint16Concat
|
||||
regexpTag: "convert"
|
||||
asm: "VPACKUSDW"
|
||||
addDoc: *satDocConcat
|
||||
in:
|
||||
- base: uint
|
||||
- base: uint
|
||||
- base: int
|
||||
- base: int
|
||||
out:
|
||||
- base: uint
|
||||
bits: 128
|
||||
- go: SaturateToUint16ConcatGrouped
|
||||
regexpTag: "convert"
|
||||
asm: "VPACKUSDW"
|
||||
addDoc: *satDocConcatGrouped
|
||||
in:
|
||||
- base: int
|
||||
- base: int
|
||||
out:
|
||||
- base: uint
|
||||
bits: 256|512
|
||||
|
||||
# low-part only conversions.
|
||||
# uint8->uint16
|
||||
|
||||
@@ -5418,28 +5418,29 @@ func (x Int64x8) SaturateToInt16() Int16x8
|
||||
/* SaturateToInt16Concat */
|
||||
|
||||
// SaturateToInt16Concat converts element values to int16 with signed saturation.
|
||||
// With each 128-bit as a group:
|
||||
// The converted group from the first input vector will be packed to the lower part of the result vector,
|
||||
// the converted group from the second input vector will be packed to the upper part of the result vector.
|
||||
// The converted elements from x will be packed to the lower part of the result vector,
|
||||
// the converted elements from y will be packed to the upper part of the result vector.
|
||||
//
|
||||
// Asm: VPACKSSDW, CPU Feature: AVX
|
||||
func (x Int32x4) SaturateToInt16Concat(y Int32x4) Int16x8
|
||||
|
||||
// SaturateToInt16Concat converts element values to int16 with signed saturation.
|
||||
/* SaturateToInt16ConcatGrouped */
|
||||
|
||||
// SaturateToInt16ConcatGrouped converts element values to int16 with signed saturation.
|
||||
// With each 128-bit as a group:
|
||||
// The converted group from the first input vector will be packed to the lower part of the result vector,
|
||||
// the converted group from the second input vector will be packed to the upper part of the result vector.
|
||||
// The converted elements from x will be packed to the lower part of the group in the result vector,
|
||||
// the converted elements from y will be packed to the upper part of the group in the result vector.
|
||||
//
|
||||
// Asm: VPACKSSDW, CPU Feature: AVX2
|
||||
func (x Int32x8) SaturateToInt16Concat(y Int32x8) Int16x16
|
||||
func (x Int32x8) SaturateToInt16ConcatGrouped(y Int32x8) Int16x16
|
||||
|
||||
// SaturateToInt16Concat converts element values to int16 with signed saturation.
|
||||
// SaturateToInt16ConcatGrouped converts element values to int16 with signed saturation.
|
||||
// With each 128-bit as a group:
|
||||
// The converted group from the first input vector will be packed to the lower part of the result vector,
|
||||
// the converted group from the second input vector will be packed to the upper part of the result vector.
|
||||
// The converted elements from x will be packed to the lower part of the group in the result vector,
|
||||
// the converted elements from y will be packed to the upper part of the group in the result vector.
|
||||
//
|
||||
// Asm: VPACKSSDW, CPU Feature: AVX512
|
||||
func (x Int32x16) SaturateToInt16Concat(y Int32x16) Int16x32
|
||||
func (x Int32x16) SaturateToInt16ConcatGrouped(y Int32x16) Int16x32
|
||||
|
||||
/* SaturateToInt32 */
|
||||
|
||||
@@ -5550,19 +5551,29 @@ func (x Uint64x8) SaturateToUint16() Uint16x8
|
||||
/* SaturateToUint16Concat */
|
||||
|
||||
// SaturateToUint16Concat converts element values to uint16 with unsigned saturation.
|
||||
// The converted elements from x will be packed to the lower part of the result vector,
|
||||
// the converted elements from y will be packed to the upper part of the result vector.
|
||||
//
|
||||
// Asm: VPACKUSDW, CPU Feature: AVX
|
||||
func (x Uint32x4) SaturateToUint16Concat(y Uint32x4) Uint16x8
|
||||
func (x Int32x4) SaturateToUint16Concat(y Int32x4) Uint16x8
|
||||
|
||||
// SaturateToUint16Concat converts element values to uint16 with unsigned saturation.
|
||||
/* SaturateToUint16ConcatGrouped */
|
||||
|
||||
// SaturateToUint16ConcatGrouped converts element values to uint16 with unsigned saturation.
|
||||
// With each 128-bit as a group:
|
||||
// The converted elements from x will be packed to the lower part of the group in the result vector,
|
||||
// the converted elements from y will be packed to the upper part of the group in the result vector.
|
||||
//
|
||||
// Asm: VPACKUSDW, CPU Feature: AVX2
|
||||
func (x Uint32x8) SaturateToUint16Concat(y Uint32x8) Uint16x16
|
||||
func (x Int32x8) SaturateToUint16ConcatGrouped(y Int32x8) Uint16x16
|
||||
|
||||
// SaturateToUint16Concat converts element values to uint16 with unsigned saturation.
|
||||
// SaturateToUint16ConcatGrouped converts element values to uint16 with unsigned saturation.
|
||||
// With each 128-bit as a group:
|
||||
// The converted elements from x will be packed to the lower part of the group in the result vector,
|
||||
// the converted elements from y will be packed to the upper part of the group in the result vector.
|
||||
//
|
||||
// Asm: VPACKUSDW, CPU Feature: AVX512
|
||||
func (x Uint32x16) SaturateToUint16Concat(y Uint32x16) Uint16x32
|
||||
func (x Int32x16) SaturateToUint16ConcatGrouped(y Int32x16) Uint16x32
|
||||
|
||||
/* SaturateToUint32 */
|
||||
|
||||
|
||||
Reference in New Issue
Block a user