mirror of
https://github.com/golang/go.git
synced 2026-02-03 09:25:06 +03:00
cmd/compile: use generated loops instead of DUFFZERO on amd64
goarch: amd64
cpu: 12th Gen Intel(R) Core(TM) i7-12700
│ base │ exp │
│ sec/op │ sec/op vs base │
MemclrKnownSize112-20 1.270n ± 14% 1.006n ± 0% -20.72% (p=0.000 n=10)
MemclrKnownSize128-20 1.266n ± 0% 1.005n ± 0% -20.58% (p=0.000 n=10)
MemclrKnownSize192-20 1.771n ± 0% 1.579n ± 1% -10.84% (p=0.000 n=10)
MemclrKnownSize248-20 4.034n ± 0% 3.520n ± 0% -12.75% (p=0.000 n=10)
MemclrKnownSize256-20 2.269n ± 0% 2.014n ± 0% -11.26% (p=0.000 n=10)
MemclrKnownSize512-20 4.280n ± 0% 4.030n ± 0% -5.84% (p=0.000 n=10)
MemclrKnownSize1024-20 8.309n ± 1% 8.057n ± 0% -3.03% (p=0.000 n=10)
Change-Id: I8f1627e2a1e981ff351dc7178932b32a2627f765
Reviewed-on: https://go-review.googlesource.com/c/go/+/678937
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
@@ -1007,26 +1007,103 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
||||
ssagen.AddAux(&p.From, v)
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = v.Reg()
|
||||
case ssa.OpAMD64DUFFZERO:
|
||||
|
||||
case ssa.OpAMD64LoweredZero:
|
||||
if s.ABI != obj.ABIInternal {
|
||||
// zero X15 manually
|
||||
opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
|
||||
}
|
||||
off := duffStart(v.AuxInt)
|
||||
adj := duffAdj(v.AuxInt)
|
||||
var p *obj.Prog
|
||||
if adj != 0 {
|
||||
p = s.Prog(x86.ALEAQ)
|
||||
p.From.Type = obj.TYPE_MEM
|
||||
p.From.Offset = adj
|
||||
p.From.Reg = x86.REG_DI
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = x86.REG_DI
|
||||
ptrReg := v.Args[0].Reg()
|
||||
n := v.AuxInt
|
||||
if n < 16 {
|
||||
v.Fatalf("Zero too small %d", n)
|
||||
}
|
||||
p = s.Prog(obj.ADUFFZERO)
|
||||
p.To.Type = obj.TYPE_ADDR
|
||||
p.To.Sym = ir.Syms.Duffzero
|
||||
p.To.Offset = off
|
||||
zero16 := func(off int64) {
|
||||
zero16(s, ptrReg, off)
|
||||
}
|
||||
|
||||
// Generate zeroing instructions.
|
||||
var off int64
|
||||
for n >= 16 {
|
||||
zero16(off)
|
||||
off += 16
|
||||
n -= 16
|
||||
}
|
||||
if n != 0 {
|
||||
// use partially overlapped write.
|
||||
// TODO: n <= 8, use smaller write?
|
||||
zero16(off + n - 16)
|
||||
}
|
||||
|
||||
case ssa.OpAMD64LoweredZeroLoop:
|
||||
if s.ABI != obj.ABIInternal {
|
||||
// zero X15 manually
|
||||
opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
|
||||
}
|
||||
ptrReg := v.Args[0].Reg()
|
||||
countReg := v.RegTmp()
|
||||
n := v.AuxInt
|
||||
loopSize := int64(64)
|
||||
if n < 3*loopSize {
|
||||
// - a loop count of 0 won't work.
|
||||
// - a loop count of 1 is useless.
|
||||
// - a loop count of 2 is a code size ~tie
|
||||
// 4 instructions to implement the loop
|
||||
// 4 instructions in the loop body
|
||||
// vs
|
||||
// 8 instructions in the straightline code
|
||||
// Might as well use straightline code.
|
||||
v.Fatalf("ZeroLoop size too small %d", n)
|
||||
}
|
||||
zero16 := func(off int64) {
|
||||
zero16(s, ptrReg, off)
|
||||
}
|
||||
|
||||
// Put iteration count in a register.
|
||||
// MOVL $n, countReg
|
||||
p := s.Prog(x86.AMOVL)
|
||||
p.From.Type = obj.TYPE_CONST
|
||||
p.From.Offset = n / loopSize
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = countReg
|
||||
cntInit := p
|
||||
|
||||
// Zero loopSize bytes starting at ptrReg.
|
||||
for i := range loopSize / 16 {
|
||||
zero16(i * 16)
|
||||
}
|
||||
// ADDQ $loopSize, ptrReg
|
||||
p = s.Prog(x86.AADDQ)
|
||||
p.From.Type = obj.TYPE_CONST
|
||||
p.From.Offset = loopSize
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = ptrReg
|
||||
// DECL countReg
|
||||
p = s.Prog(x86.ADECL)
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = countReg
|
||||
// Jump to first instruction in loop if we're not done yet.
|
||||
// JNE head
|
||||
p = s.Prog(x86.AJNE)
|
||||
p.To.Type = obj.TYPE_BRANCH
|
||||
p.To.SetTarget(cntInit.Link)
|
||||
|
||||
// Multiples of the loop size are now done.
|
||||
n %= loopSize
|
||||
|
||||
// Write any fractional portion.
|
||||
var off int64
|
||||
for n >= 16 {
|
||||
zero16(off)
|
||||
off += 16
|
||||
n -= 16
|
||||
}
|
||||
if n != 0 {
|
||||
// Use partially-overlapping write.
|
||||
// TODO: n <= 8, use smaller write?
|
||||
zero16(off + n - 16)
|
||||
}
|
||||
|
||||
case ssa.OpAMD64DUFFCOPY:
|
||||
p := s.Prog(obj.ADUFFCOPY)
|
||||
p.To.Type = obj.TYPE_ADDR
|
||||
@@ -1621,3 +1698,14 @@ func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg in
|
||||
p.Pos = p.Pos.WithNotStmt()
|
||||
return p
|
||||
}
|
||||
|
||||
// zero 16 bytes at reg+off.
|
||||
func zero16(s *ssagen.State, reg int16, off int64) {
|
||||
// MOVUPS X15, off(ptrReg)
|
||||
p := s.Prog(x86.AMOVUPS)
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = x86.REG_X15
|
||||
p.To.Type = obj.TYPE_MEM
|
||||
p.To.Reg = reg
|
||||
p.To.Offset = off
|
||||
}
|
||||
|
||||
@@ -375,34 +375,17 @@
|
||||
(MOVQstoreconst [makeValAndOff(0,int32(s-8))] destptr
|
||||
(MOVQstoreconst [makeValAndOff(0,0)] destptr mem))
|
||||
|
||||
// Adjust zeros to be a multiple of 16 bytes.
|
||||
(Zero [s] destptr mem) && s%16 != 0 && s > 16 =>
|
||||
(Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16])
|
||||
(MOVOstoreconst [makeValAndOff(0,0)] destptr mem))
|
||||
// Zeroing up to 192 bytes uses straightline code.
|
||||
(Zero [s] destptr mem) && s >= 16 && s < 192 => (LoweredZero [s] destptr mem)
|
||||
|
||||
(Zero [16] destptr mem) =>
|
||||
(MOVOstoreconst [makeValAndOff(0,0)] destptr mem)
|
||||
(Zero [32] destptr mem) =>
|
||||
(MOVOstoreconst [makeValAndOff(0,16)] destptr
|
||||
(MOVOstoreconst [makeValAndOff(0,0)] destptr mem))
|
||||
(Zero [48] destptr mem) =>
|
||||
(MOVOstoreconst [makeValAndOff(0,32)] destptr
|
||||
(MOVOstoreconst [makeValAndOff(0,16)] destptr
|
||||
(MOVOstoreconst [makeValAndOff(0,0)] destptr mem)))
|
||||
(Zero [64] destptr mem) =>
|
||||
(MOVOstoreconst [makeValAndOff(0,48)] destptr
|
||||
(MOVOstoreconst [makeValAndOff(0,32)] destptr
|
||||
(MOVOstoreconst [makeValAndOff(0,16)] destptr
|
||||
(MOVOstoreconst [makeValAndOff(0,0)] destptr mem))))
|
||||
|
||||
// Medium zeroing uses a duff device.
|
||||
(Zero [s] destptr mem)
|
||||
&& s > 64 && s <= 1024 && s%16 == 0 =>
|
||||
(DUFFZERO [s] destptr mem)
|
||||
// Zeroing up to ~1KB uses a small loop.
|
||||
(Zero [s] destptr mem) && s >= 192 && s <= repZeroThreshold => (LoweredZeroLoop [s] destptr mem)
|
||||
|
||||
// Large zeroing uses REP STOSQ.
|
||||
(Zero [s] destptr mem)
|
||||
&& s > 1024 && s%8 == 0 =>
|
||||
(Zero [s] destptr mem) && s > repZeroThreshold && s%8 != 0 =>
|
||||
(Zero [s-s%8] (OffPtr <destptr.Type> destptr [s%8])
|
||||
(MOVOstoreconst [makeValAndOff(0,0)] destptr mem))
|
||||
(Zero [s] destptr mem) && s > repZeroThreshold && s%8 == 0 =>
|
||||
(REPSTOSQ destptr (MOVQconst [s/8]) (MOVQconst [0]) mem)
|
||||
|
||||
// Lowering constants
|
||||
|
||||
@@ -889,15 +889,30 @@ func init() {
|
||||
// auxint = # of bytes to zero
|
||||
// returns mem
|
||||
{
|
||||
name: "DUFFZERO",
|
||||
name: "LoweredZero",
|
||||
aux: "Int64",
|
||||
argLength: 2,
|
||||
reg: regInfo{
|
||||
inputs: []regMask{buildReg("DI")},
|
||||
clobbers: buildReg("DI"),
|
||||
inputs: []regMask{gp},
|
||||
},
|
||||
//faultOnNilArg0: true, // Note: removed for 73748. TODO: reenable at some point
|
||||
unsafePoint: true, // FP maintenance around DUFFCOPY can be clobbered by interrupts
|
||||
faultOnNilArg0: true,
|
||||
},
|
||||
|
||||
// arg0 = pointer to start of memory to zero
|
||||
// arg1 = mem
|
||||
// auxint = # of bytes to zero
|
||||
// returns mem
|
||||
{
|
||||
name: "LoweredZeroLoop",
|
||||
aux: "Int64",
|
||||
argLength: 2,
|
||||
reg: regInfo{
|
||||
inputs: []regMask{gp},
|
||||
clobbersArg0: true,
|
||||
},
|
||||
clobberFlags: true,
|
||||
faultOnNilArg0: true,
|
||||
needIntTemp: true,
|
||||
},
|
||||
|
||||
// arg0 = address of memory to zero
|
||||
|
||||
@@ -1051,7 +1051,8 @@ const (
|
||||
OpAMD64MOVLstoreconstidx4
|
||||
OpAMD64MOVQstoreconstidx1
|
||||
OpAMD64MOVQstoreconstidx8
|
||||
OpAMD64DUFFZERO
|
||||
OpAMD64LoweredZero
|
||||
OpAMD64LoweredZeroLoop
|
||||
OpAMD64REPSTOSQ
|
||||
OpAMD64CALLstatic
|
||||
OpAMD64CALLtail
|
||||
@@ -13873,15 +13874,28 @@ var opcodeTable = [...]opInfo{
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "DUFFZERO",
|
||||
auxType: auxInt64,
|
||||
argLen: 2,
|
||||
unsafePoint: true,
|
||||
name: "LoweredZero",
|
||||
auxType: auxInt64,
|
||||
argLen: 2,
|
||||
faultOnNilArg0: true,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 128}, // DI
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
clobbers: 128, // DI
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "LoweredZeroLoop",
|
||||
auxType: auxInt64,
|
||||
argLen: 2,
|
||||
clobberFlags: true,
|
||||
needIntTemp: true,
|
||||
faultOnNilArg0: true,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
clobbersArg0: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
|
||||
@@ -6,6 +6,7 @@ package ssa
|
||||
|
||||
import (
|
||||
"cmd/compile/internal/types"
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
@@ -218,10 +219,37 @@ func TestSpillMove2(t *testing.T) {
|
||||
|
||||
}
|
||||
|
||||
func TestClobbersArg0(t *testing.T) {
|
||||
c := testConfig(t)
|
||||
f := c.Fun("entry",
|
||||
Bloc("entry",
|
||||
Valu("mem", OpInitMem, types.TypeMem, 0, nil),
|
||||
Valu("ptr", OpArg, c.config.Types.Int64.PtrTo(), 0, c.Temp(c.config.Types.Int64.PtrTo())),
|
||||
Valu("dst", OpArg, c.config.Types.Int64.PtrTo().PtrTo(), 0, c.Temp(c.config.Types.Int64.PtrTo().PtrTo())),
|
||||
Valu("zero", OpAMD64LoweredZeroLoop, types.TypeMem, 256, nil, "ptr", "mem"),
|
||||
Valu("store", OpAMD64MOVQstore, types.TypeMem, 0, nil, "dst", "ptr", "zero"),
|
||||
Exit("store")))
|
||||
flagalloc(f.f)
|
||||
regalloc(f.f)
|
||||
checkFunc(f.f)
|
||||
// LoweredZeroLoop clobbers its argument, so there must be a copy of "ptr" somewhere
|
||||
// so we still have that value available at "store".
|
||||
if n := numCopies(f.blocks["entry"]); n != 1 {
|
||||
fmt.Printf("%s\n", f.f.String())
|
||||
t.Errorf("got %d copies, want 1", n)
|
||||
}
|
||||
}
|
||||
|
||||
func numSpills(b *Block) int {
|
||||
return numOps(b, OpStoreReg)
|
||||
}
|
||||
func numCopies(b *Block) int {
|
||||
return numOps(b, OpCopy)
|
||||
}
|
||||
func numOps(b *Block, op Op) int {
|
||||
n := 0
|
||||
for _, v := range b.Values {
|
||||
if v.Op == OpStoreReg {
|
||||
if v.Op == op {
|
||||
n++
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,6 +29,8 @@ type deadValueChoice bool
|
||||
const (
|
||||
leaveDeadValues deadValueChoice = false
|
||||
removeDeadValues = true
|
||||
|
||||
repZeroThreshold = 1408 // size beyond which we use REP STOS for zeroing
|
||||
)
|
||||
|
||||
// deadcode indicates whether rewrite should try to remove any values that become dead.
|
||||
|
||||
@@ -30025,19 +30025,49 @@ func rewriteValueAMD64_OpZero(v *Value) bool {
|
||||
return true
|
||||
}
|
||||
// match: (Zero [s] destptr mem)
|
||||
// cond: s%16 != 0 && s > 16
|
||||
// result: (Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16]) (MOVOstoreconst [makeValAndOff(0,0)] destptr mem))
|
||||
// cond: s >= 16 && s < 192
|
||||
// result: (LoweredZero [s] destptr mem)
|
||||
for {
|
||||
s := auxIntToInt64(v.AuxInt)
|
||||
destptr := v_0
|
||||
mem := v_1
|
||||
if !(s%16 != 0 && s > 16) {
|
||||
if !(s >= 16 && s < 192) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64LoweredZero)
|
||||
v.AuxInt = int64ToAuxInt(s)
|
||||
v.AddArg2(destptr, mem)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [s] destptr mem)
|
||||
// cond: s >= 192 && s <= repZeroThreshold
|
||||
// result: (LoweredZeroLoop [s] destptr mem)
|
||||
for {
|
||||
s := auxIntToInt64(v.AuxInt)
|
||||
destptr := v_0
|
||||
mem := v_1
|
||||
if !(s >= 192 && s <= repZeroThreshold) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64LoweredZeroLoop)
|
||||
v.AuxInt = int64ToAuxInt(s)
|
||||
v.AddArg2(destptr, mem)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [s] destptr mem)
|
||||
// cond: s > repZeroThreshold && s%8 != 0
|
||||
// result: (Zero [s-s%8] (OffPtr <destptr.Type> destptr [s%8]) (MOVOstoreconst [makeValAndOff(0,0)] destptr mem))
|
||||
for {
|
||||
s := auxIntToInt64(v.AuxInt)
|
||||
destptr := v_0
|
||||
mem := v_1
|
||||
if !(s > repZeroThreshold && s%8 != 0) {
|
||||
break
|
||||
}
|
||||
v.reset(OpZero)
|
||||
v.AuxInt = int64ToAuxInt(s - s%16)
|
||||
v.AuxInt = int64ToAuxInt(s - s%8)
|
||||
v0 := b.NewValue0(v.Pos, OpOffPtr, destptr.Type)
|
||||
v0.AuxInt = int64ToAuxInt(s % 16)
|
||||
v0.AuxInt = int64ToAuxInt(s % 8)
|
||||
v0.AddArg(destptr)
|
||||
v1 := b.NewValue0(v.Pos, OpAMD64MOVOstoreconst, types.TypeMem)
|
||||
v1.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0))
|
||||
@@ -30045,99 +30075,14 @@ func rewriteValueAMD64_OpZero(v *Value) bool {
|
||||
v.AddArg2(v0, v1)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [16] destptr mem)
|
||||
// result: (MOVOstoreconst [makeValAndOff(0,0)] destptr mem)
|
||||
for {
|
||||
if auxIntToInt64(v.AuxInt) != 16 {
|
||||
break
|
||||
}
|
||||
destptr := v_0
|
||||
mem := v_1
|
||||
v.reset(OpAMD64MOVOstoreconst)
|
||||
v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0))
|
||||
v.AddArg2(destptr, mem)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [32] destptr mem)
|
||||
// result: (MOVOstoreconst [makeValAndOff(0,16)] destptr (MOVOstoreconst [makeValAndOff(0,0)] destptr mem))
|
||||
for {
|
||||
if auxIntToInt64(v.AuxInt) != 32 {
|
||||
break
|
||||
}
|
||||
destptr := v_0
|
||||
mem := v_1
|
||||
v.reset(OpAMD64MOVOstoreconst)
|
||||
v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 16))
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64MOVOstoreconst, types.TypeMem)
|
||||
v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0))
|
||||
v0.AddArg2(destptr, mem)
|
||||
v.AddArg2(destptr, v0)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [48] destptr mem)
|
||||
// result: (MOVOstoreconst [makeValAndOff(0,32)] destptr (MOVOstoreconst [makeValAndOff(0,16)] destptr (MOVOstoreconst [makeValAndOff(0,0)] destptr mem)))
|
||||
for {
|
||||
if auxIntToInt64(v.AuxInt) != 48 {
|
||||
break
|
||||
}
|
||||
destptr := v_0
|
||||
mem := v_1
|
||||
v.reset(OpAMD64MOVOstoreconst)
|
||||
v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 32))
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64MOVOstoreconst, types.TypeMem)
|
||||
v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 16))
|
||||
v1 := b.NewValue0(v.Pos, OpAMD64MOVOstoreconst, types.TypeMem)
|
||||
v1.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0))
|
||||
v1.AddArg2(destptr, mem)
|
||||
v0.AddArg2(destptr, v1)
|
||||
v.AddArg2(destptr, v0)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [64] destptr mem)
|
||||
// result: (MOVOstoreconst [makeValAndOff(0,48)] destptr (MOVOstoreconst [makeValAndOff(0,32)] destptr (MOVOstoreconst [makeValAndOff(0,16)] destptr (MOVOstoreconst [makeValAndOff(0,0)] destptr mem))))
|
||||
for {
|
||||
if auxIntToInt64(v.AuxInt) != 64 {
|
||||
break
|
||||
}
|
||||
destptr := v_0
|
||||
mem := v_1
|
||||
v.reset(OpAMD64MOVOstoreconst)
|
||||
v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 48))
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64MOVOstoreconst, types.TypeMem)
|
||||
v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 32))
|
||||
v1 := b.NewValue0(v.Pos, OpAMD64MOVOstoreconst, types.TypeMem)
|
||||
v1.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 16))
|
||||
v2 := b.NewValue0(v.Pos, OpAMD64MOVOstoreconst, types.TypeMem)
|
||||
v2.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0))
|
||||
v2.AddArg2(destptr, mem)
|
||||
v1.AddArg2(destptr, v2)
|
||||
v0.AddArg2(destptr, v1)
|
||||
v.AddArg2(destptr, v0)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [s] destptr mem)
|
||||
// cond: s > 64 && s <= 1024 && s%16 == 0
|
||||
// result: (DUFFZERO [s] destptr mem)
|
||||
for {
|
||||
s := auxIntToInt64(v.AuxInt)
|
||||
destptr := v_0
|
||||
mem := v_1
|
||||
if !(s > 64 && s <= 1024 && s%16 == 0) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64DUFFZERO)
|
||||
v.AuxInt = int64ToAuxInt(s)
|
||||
v.AddArg2(destptr, mem)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [s] destptr mem)
|
||||
// cond: s > 1024 && s%8 == 0
|
||||
// cond: s > repZeroThreshold && s%8 == 0
|
||||
// result: (REPSTOSQ destptr (MOVQconst [s/8]) (MOVQconst [0]) mem)
|
||||
for {
|
||||
s := auxIntToInt64(v.AuxInt)
|
||||
destptr := v_0
|
||||
mem := v_1
|
||||
if !(s > 1024 && s%8 == 0) {
|
||||
if !(s > repZeroThreshold && s%8 == 0) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64REPSTOSQ)
|
||||
|
||||
@@ -17,31 +17,31 @@ type T struct {
|
||||
|
||||
func (t *T) f() {
|
||||
// amd64:-".*runtime.memclrNoHeapPointers"
|
||||
// amd64:"DUFFZERO"
|
||||
// amd64:`MOVUPS\tX15,`
|
||||
for i := range t.a {
|
||||
t.a[i] = 0
|
||||
}
|
||||
|
||||
// amd64:-".*runtime.memclrNoHeapPointers"
|
||||
// amd64:"DUFFZERO"
|
||||
// amd64:`MOVUPS\tX15,`
|
||||
for i := range *t.a {
|
||||
t.a[i] = 0
|
||||
}
|
||||
|
||||
// amd64:-".*runtime.memclrNoHeapPointers"
|
||||
// amd64:"DUFFZERO"
|
||||
// amd64:`MOVUPS\tX15,`
|
||||
for i := range t.a {
|
||||
(*t.a)[i] = 0
|
||||
}
|
||||
|
||||
// amd64:-".*runtime.memclrNoHeapPointers"
|
||||
// amd64:"DUFFZERO"
|
||||
// amd64:`MOVUPS\tX15,`
|
||||
for i := range *t.a {
|
||||
(*t.a)[i] = 0
|
||||
}
|
||||
|
||||
// amd64:-".*runtime.memclrNoHeapPointers"
|
||||
// amd64:"DUFFZERO"
|
||||
// amd64:`MOVUPS\tX15,`
|
||||
for i := range t.b {
|
||||
t.b[i] = 0
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user