mirror of
https://github.com/golang/go.git
synced 2026-02-05 18:35:03 +03:00
[dev.simd] cmd/compile: peephole simd load
Some convenient peepholes, might not have big impact on performances. Change-Id: I25574dba95fcf1d5fda14472175e556737b51584 Reviewed-on: https://go-review.googlesource.com/c/go/+/702997 Reviewed-by: Cherry Mui <cherryyz@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
@@ -1791,3 +1791,7 @@
|
||||
(VMOVSDf2v x:(MOVSDconst [c] )) => (VMOVSDconst [c] )
|
||||
(VMOVSSf2v x:(MOVSSconst [c] )) => (VMOVSSconst [c] )
|
||||
|
||||
(VMOVDQUload(128|256|512) [off1] {sym} x:(ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 => (VMOVDQUload(128|256|512) [off1+off2] {sym} ptr mem)
|
||||
(VMOVDQUstore(128|256|512) [off1] {sym} x:(ADDQconst [off2] ptr) val mem) && is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 => (VMOVDQUstore(128|256|512) [off1+off2] {sym} ptr val mem)
|
||||
(VMOVDQUload(128|256|512) [off1] {sym1} x:(LEAQ [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 && canMergeSym(sym1, sym2) => (VMOVDQUload(128|256|512) [off1+off2] {mergeSym(sym1, sym2)} base mem)
|
||||
(VMOVDQUstore(128|256|512) [off1] {sym1} x:(LEAQ [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 && canMergeSym(sym1, sym2) => (VMOVDQUstore(128|256|512) [off1+off2] {mergeSym(sym1, sym2)} base val mem)
|
||||
|
||||
@@ -709,6 +709,18 @@ func rewriteValueAMD64(v *Value) bool {
|
||||
return rewriteValueAMD64_OpAMD64VMOVDQU64Masked512(v)
|
||||
case OpAMD64VMOVDQU8Masked512:
|
||||
return rewriteValueAMD64_OpAMD64VMOVDQU8Masked512(v)
|
||||
case OpAMD64VMOVDQUload128:
|
||||
return rewriteValueAMD64_OpAMD64VMOVDQUload128(v)
|
||||
case OpAMD64VMOVDQUload256:
|
||||
return rewriteValueAMD64_OpAMD64VMOVDQUload256(v)
|
||||
case OpAMD64VMOVDQUload512:
|
||||
return rewriteValueAMD64_OpAMD64VMOVDQUload512(v)
|
||||
case OpAMD64VMOVDQUstore128:
|
||||
return rewriteValueAMD64_OpAMD64VMOVDQUstore128(v)
|
||||
case OpAMD64VMOVDQUstore256:
|
||||
return rewriteValueAMD64_OpAMD64VMOVDQUstore256(v)
|
||||
case OpAMD64VMOVDQUstore512:
|
||||
return rewriteValueAMD64_OpAMD64VMOVDQUstore512(v)
|
||||
case OpAMD64VMOVQ:
|
||||
return rewriteValueAMD64_OpAMD64VMOVQ(v)
|
||||
case OpAMD64VMOVSDf2v:
|
||||
@@ -32833,6 +32845,315 @@ func rewriteValueAMD64_OpAMD64VMOVDQU8Masked512(v *Value) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VMOVDQUload128(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VMOVDQUload128 [off1] {sym} x:(ADDQconst [off2] ptr) mem)
|
||||
// cond: is32Bit(int64(off1)+int64(off2)) && x.Uses == 1
|
||||
// result: (VMOVDQUload128 [off1+off2] {sym} ptr mem)
|
||||
for {
|
||||
off1 := auxIntToInt32(v.AuxInt)
|
||||
sym := auxToSym(v.Aux)
|
||||
x := v_0
|
||||
if x.Op != OpAMD64ADDQconst {
|
||||
break
|
||||
}
|
||||
off2 := auxIntToInt32(x.AuxInt)
|
||||
ptr := x.Args[0]
|
||||
mem := v_1
|
||||
if !(is32Bit(int64(off1)+int64(off2)) && x.Uses == 1) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VMOVDQUload128)
|
||||
v.AuxInt = int32ToAuxInt(off1 + off2)
|
||||
v.Aux = symToAux(sym)
|
||||
v.AddArg2(ptr, mem)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQUload128 [off1] {sym1} x:(LEAQ [off2] {sym2} base) mem)
|
||||
// cond: is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 && canMergeSym(sym1, sym2)
|
||||
// result: (VMOVDQUload128 [off1+off2] {mergeSym(sym1, sym2)} base mem)
|
||||
for {
|
||||
off1 := auxIntToInt32(v.AuxInt)
|
||||
sym1 := auxToSym(v.Aux)
|
||||
x := v_0
|
||||
if x.Op != OpAMD64LEAQ {
|
||||
break
|
||||
}
|
||||
off2 := auxIntToInt32(x.AuxInt)
|
||||
sym2 := auxToSym(x.Aux)
|
||||
base := x.Args[0]
|
||||
mem := v_1
|
||||
if !(is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 && canMergeSym(sym1, sym2)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VMOVDQUload128)
|
||||
v.AuxInt = int32ToAuxInt(off1 + off2)
|
||||
v.Aux = symToAux(mergeSym(sym1, sym2))
|
||||
v.AddArg2(base, mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VMOVDQUload256(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VMOVDQUload256 [off1] {sym} x:(ADDQconst [off2] ptr) mem)
|
||||
// cond: is32Bit(int64(off1)+int64(off2)) && x.Uses == 1
|
||||
// result: (VMOVDQUload256 [off1+off2] {sym} ptr mem)
|
||||
for {
|
||||
off1 := auxIntToInt32(v.AuxInt)
|
||||
sym := auxToSym(v.Aux)
|
||||
x := v_0
|
||||
if x.Op != OpAMD64ADDQconst {
|
||||
break
|
||||
}
|
||||
off2 := auxIntToInt32(x.AuxInt)
|
||||
ptr := x.Args[0]
|
||||
mem := v_1
|
||||
if !(is32Bit(int64(off1)+int64(off2)) && x.Uses == 1) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VMOVDQUload256)
|
||||
v.AuxInt = int32ToAuxInt(off1 + off2)
|
||||
v.Aux = symToAux(sym)
|
||||
v.AddArg2(ptr, mem)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQUload256 [off1] {sym1} x:(LEAQ [off2] {sym2} base) mem)
|
||||
// cond: is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 && canMergeSym(sym1, sym2)
|
||||
// result: (VMOVDQUload256 [off1+off2] {mergeSym(sym1, sym2)} base mem)
|
||||
for {
|
||||
off1 := auxIntToInt32(v.AuxInt)
|
||||
sym1 := auxToSym(v.Aux)
|
||||
x := v_0
|
||||
if x.Op != OpAMD64LEAQ {
|
||||
break
|
||||
}
|
||||
off2 := auxIntToInt32(x.AuxInt)
|
||||
sym2 := auxToSym(x.Aux)
|
||||
base := x.Args[0]
|
||||
mem := v_1
|
||||
if !(is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 && canMergeSym(sym1, sym2)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VMOVDQUload256)
|
||||
v.AuxInt = int32ToAuxInt(off1 + off2)
|
||||
v.Aux = symToAux(mergeSym(sym1, sym2))
|
||||
v.AddArg2(base, mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VMOVDQUload512(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VMOVDQUload512 [off1] {sym} x:(ADDQconst [off2] ptr) mem)
|
||||
// cond: is32Bit(int64(off1)+int64(off2)) && x.Uses == 1
|
||||
// result: (VMOVDQUload512 [off1+off2] {sym} ptr mem)
|
||||
for {
|
||||
off1 := auxIntToInt32(v.AuxInt)
|
||||
sym := auxToSym(v.Aux)
|
||||
x := v_0
|
||||
if x.Op != OpAMD64ADDQconst {
|
||||
break
|
||||
}
|
||||
off2 := auxIntToInt32(x.AuxInt)
|
||||
ptr := x.Args[0]
|
||||
mem := v_1
|
||||
if !(is32Bit(int64(off1)+int64(off2)) && x.Uses == 1) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VMOVDQUload512)
|
||||
v.AuxInt = int32ToAuxInt(off1 + off2)
|
||||
v.Aux = symToAux(sym)
|
||||
v.AddArg2(ptr, mem)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQUload512 [off1] {sym1} x:(LEAQ [off2] {sym2} base) mem)
|
||||
// cond: is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 && canMergeSym(sym1, sym2)
|
||||
// result: (VMOVDQUload512 [off1+off2] {mergeSym(sym1, sym2)} base mem)
|
||||
for {
|
||||
off1 := auxIntToInt32(v.AuxInt)
|
||||
sym1 := auxToSym(v.Aux)
|
||||
x := v_0
|
||||
if x.Op != OpAMD64LEAQ {
|
||||
break
|
||||
}
|
||||
off2 := auxIntToInt32(x.AuxInt)
|
||||
sym2 := auxToSym(x.Aux)
|
||||
base := x.Args[0]
|
||||
mem := v_1
|
||||
if !(is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 && canMergeSym(sym1, sym2)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VMOVDQUload512)
|
||||
v.AuxInt = int32ToAuxInt(off1 + off2)
|
||||
v.Aux = symToAux(mergeSym(sym1, sym2))
|
||||
v.AddArg2(base, mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VMOVDQUstore128(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VMOVDQUstore128 [off1] {sym} x:(ADDQconst [off2] ptr) val mem)
|
||||
// cond: is32Bit(int64(off1)+int64(off2)) && x.Uses == 1
|
||||
// result: (VMOVDQUstore128 [off1+off2] {sym} ptr val mem)
|
||||
for {
|
||||
off1 := auxIntToInt32(v.AuxInt)
|
||||
sym := auxToSym(v.Aux)
|
||||
x := v_0
|
||||
if x.Op != OpAMD64ADDQconst {
|
||||
break
|
||||
}
|
||||
off2 := auxIntToInt32(x.AuxInt)
|
||||
ptr := x.Args[0]
|
||||
val := v_1
|
||||
mem := v_2
|
||||
if !(is32Bit(int64(off1)+int64(off2)) && x.Uses == 1) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VMOVDQUstore128)
|
||||
v.AuxInt = int32ToAuxInt(off1 + off2)
|
||||
v.Aux = symToAux(sym)
|
||||
v.AddArg3(ptr, val, mem)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQUstore128 [off1] {sym1} x:(LEAQ [off2] {sym2} base) val mem)
|
||||
// cond: is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 && canMergeSym(sym1, sym2)
|
||||
// result: (VMOVDQUstore128 [off1+off2] {mergeSym(sym1, sym2)} base val mem)
|
||||
for {
|
||||
off1 := auxIntToInt32(v.AuxInt)
|
||||
sym1 := auxToSym(v.Aux)
|
||||
x := v_0
|
||||
if x.Op != OpAMD64LEAQ {
|
||||
break
|
||||
}
|
||||
off2 := auxIntToInt32(x.AuxInt)
|
||||
sym2 := auxToSym(x.Aux)
|
||||
base := x.Args[0]
|
||||
val := v_1
|
||||
mem := v_2
|
||||
if !(is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 && canMergeSym(sym1, sym2)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VMOVDQUstore128)
|
||||
v.AuxInt = int32ToAuxInt(off1 + off2)
|
||||
v.Aux = symToAux(mergeSym(sym1, sym2))
|
||||
v.AddArg3(base, val, mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VMOVDQUstore256(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VMOVDQUstore256 [off1] {sym} x:(ADDQconst [off2] ptr) val mem)
|
||||
// cond: is32Bit(int64(off1)+int64(off2)) && x.Uses == 1
|
||||
// result: (VMOVDQUstore256 [off1+off2] {sym} ptr val mem)
|
||||
for {
|
||||
off1 := auxIntToInt32(v.AuxInt)
|
||||
sym := auxToSym(v.Aux)
|
||||
x := v_0
|
||||
if x.Op != OpAMD64ADDQconst {
|
||||
break
|
||||
}
|
||||
off2 := auxIntToInt32(x.AuxInt)
|
||||
ptr := x.Args[0]
|
||||
val := v_1
|
||||
mem := v_2
|
||||
if !(is32Bit(int64(off1)+int64(off2)) && x.Uses == 1) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VMOVDQUstore256)
|
||||
v.AuxInt = int32ToAuxInt(off1 + off2)
|
||||
v.Aux = symToAux(sym)
|
||||
v.AddArg3(ptr, val, mem)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQUstore256 [off1] {sym1} x:(LEAQ [off2] {sym2} base) val mem)
|
||||
// cond: is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 && canMergeSym(sym1, sym2)
|
||||
// result: (VMOVDQUstore256 [off1+off2] {mergeSym(sym1, sym2)} base val mem)
|
||||
for {
|
||||
off1 := auxIntToInt32(v.AuxInt)
|
||||
sym1 := auxToSym(v.Aux)
|
||||
x := v_0
|
||||
if x.Op != OpAMD64LEAQ {
|
||||
break
|
||||
}
|
||||
off2 := auxIntToInt32(x.AuxInt)
|
||||
sym2 := auxToSym(x.Aux)
|
||||
base := x.Args[0]
|
||||
val := v_1
|
||||
mem := v_2
|
||||
if !(is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 && canMergeSym(sym1, sym2)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VMOVDQUstore256)
|
||||
v.AuxInt = int32ToAuxInt(off1 + off2)
|
||||
v.Aux = symToAux(mergeSym(sym1, sym2))
|
||||
v.AddArg3(base, val, mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VMOVDQUstore512(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VMOVDQUstore512 [off1] {sym} x:(ADDQconst [off2] ptr) val mem)
|
||||
// cond: is32Bit(int64(off1)+int64(off2)) && x.Uses == 1
|
||||
// result: (VMOVDQUstore512 [off1+off2] {sym} ptr val mem)
|
||||
for {
|
||||
off1 := auxIntToInt32(v.AuxInt)
|
||||
sym := auxToSym(v.Aux)
|
||||
x := v_0
|
||||
if x.Op != OpAMD64ADDQconst {
|
||||
break
|
||||
}
|
||||
off2 := auxIntToInt32(x.AuxInt)
|
||||
ptr := x.Args[0]
|
||||
val := v_1
|
||||
mem := v_2
|
||||
if !(is32Bit(int64(off1)+int64(off2)) && x.Uses == 1) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VMOVDQUstore512)
|
||||
v.AuxInt = int32ToAuxInt(off1 + off2)
|
||||
v.Aux = symToAux(sym)
|
||||
v.AddArg3(ptr, val, mem)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQUstore512 [off1] {sym1} x:(LEAQ [off2] {sym2} base) val mem)
|
||||
// cond: is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 && canMergeSym(sym1, sym2)
|
||||
// result: (VMOVDQUstore512 [off1+off2] {mergeSym(sym1, sym2)} base val mem)
|
||||
for {
|
||||
off1 := auxIntToInt32(v.AuxInt)
|
||||
sym1 := auxToSym(v.Aux)
|
||||
x := v_0
|
||||
if x.Op != OpAMD64LEAQ {
|
||||
break
|
||||
}
|
||||
off2 := auxIntToInt32(x.AuxInt)
|
||||
sym2 := auxToSym(x.Aux)
|
||||
base := x.Args[0]
|
||||
val := v_1
|
||||
mem := v_2
|
||||
if !(is32Bit(int64(off1)+int64(off2)) && x.Uses == 1 && canMergeSym(sym1, sym2)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VMOVDQUstore512)
|
||||
v.AuxInt = int32ToAuxInt(off1 + off2)
|
||||
v.Aux = symToAux(mergeSym(sym1, sym2))
|
||||
v.AddArg3(base, val, mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VMOVQ(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
|
||||
Reference in New Issue
Block a user