cmd/compile/internal/bloop: avoid extraneous heap allocs in b.Loop body due to autotmp variable scope

The motivating example I created for #73137 still seems
to heap allocate in go1.26rc2 when used in a b.Loop body.

                    │   go1.25    │            go1.26rc2               │
                    │  allocs/op  │ allocs/op   vs base                │
NewX/b.Loop-basic-4   1.000 ± 0%     1.000 ± 0%       ~ (p=1.000 n=10) ¹

I suspect it is because the temps are by default declared
outside the loop body, which escape analysis will determine is
an escaping value and result in a heap allocation. (I've seen
this problem before, including in my older CL 546023 that attempts
to help PGO with a similar issue.)

This is an attempt to address that by placing ODCLs within the
b.Loop body for the temps that are created so that they can be
marked keepalive.

There are two cases handled in the CL: function return values
and function arguments. The first case is what affects my example
from #73137, and is also illustrated via the NewX test case in
the new test/escape_bloop.go file.

Without this CL, the NewX call in the BenchmarkBloop test is inlined,
which is an improvement over Go 1.25, but the slice still escapes
because the temporary used for the return value is declared outside
the loop body.

With this CL, the slice does not escape.

The second case is illustrated via the new BenchmarkBLoopFunctionArg
test, which shows a function argument that escapes without this CL
but does not escape with this CL.

We can also make the two new b.Loop tests in testing/benchmark_test.go
individually pass or fail as expected based on individually
reverting the two changes in this CL.

While we are here, we add a note to typecheck.TempAt to help
make people aware of this behavior.

Updates #73137
Fixes #77339

Change-Id: I69abe978367a8d3a931430aec5d85c9c54b42c1f
Reviewed-on: https://go-review.googlesource.com/c/go/+/738822
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
This commit is contained in:
thepudds
2026-01-23 15:59:40 -05:00
committed by t hepudds
parent c08f6a5b26
commit bae9486d07
4 changed files with 140 additions and 2 deletions

View File

@@ -155,6 +155,10 @@ func preserveCallResults(curFn *ir.Func, call *ir.CallExpr) ir.Node {
assign := typecheck.AssignExpr(ir.NewAssignListStmt(call.Pos(), ir.OAS2, lhs, ir.Nodes{call})).(*ir.AssignListStmt)
assign.Def = true
for _, tmp := range lhs {
// Place temp declarations in the loop body to help escape analysis.
assign.PtrInit().Append(typecheck.Stmt(ir.NewDecl(assign.Pos(), ir.ODCL, tmp.(*ir.Name))))
}
return keepAliveAt(ns, assign)
}
@@ -164,7 +168,11 @@ func preserveCallArgs(curFn *ir.Func, call *ir.CallExpr) ir.Node {
var names ir.Nodes
preserveTmp := func(pos src.XPos, n ir.Node) ir.Node {
tmp := typecheck.TempAt(pos, curFn, n.Type())
argTmps = append(argTmps, typecheck.AssignExpr(ir.NewAssignStmt(pos, tmp, n)))
assign := ir.NewAssignStmt(pos, tmp, n)
assign.Def = true
// Place temp declarations in the loop body to help escape analysis.
assign.PtrInit().Append(typecheck.Stmt(ir.NewDecl(assign.Pos(), ir.ODCL, tmp)))
argTmps = append(argTmps, typecheck.AssignExpr(assign))
names = append(names, tmp)
if base.Flag.LowerM > 1 {
base.WarnfAt(call.Pos(), "function arg will be kept alive")

View File

@@ -42,7 +42,12 @@ func CheckFuncStack() {
}
}
// make a new Node off the books.
// TempAt makes a new Node off the books.
//
// N.B., the new Node is a function-local variable defaulting to function scope.
// It helps in some cases if an ODCL is also created and placed in a narrower scope,
// such as if the variable can be used in a loop body and potentially escape.
// TODO: Consider some mechanism to more conveniently create a block scoped temporary.
func TempAt(pos src.XPos, curfn *ir.Func, typ *types.Type) *ir.Name {
if curfn == nil {
base.FatalfAt(pos, "no curfn for TempAt")

View File

@@ -9,6 +9,10 @@ import (
"cmp"
"context"
"errors"
"internal/asan"
"internal/msan"
"internal/race"
"internal/testenv"
"runtime"
"slices"
"strings"
@@ -157,6 +161,63 @@ func TestBenchmarkContext(t *testing.T) {
})
}
// Some auxiliary functions for measuring allocations in a b.Loop benchmark below,
// where in this case mid-stack inlining allows stack allocation of a slice.
// This is based on the example in go.dev/issue/73137.
func newX() []byte {
out := make([]byte, 8)
return use1(out)
}
//go:noinline
func use1(out []byte) []byte {
return out
}
// An auxiliary function for measuring allocations with a simple function argument
// in the b.Loop body.
//go:noinline
func use2(x any) {}
func TestBenchmarkBLoopAllocs(t *testing.T) {
testenv.SkipIfOptimizationOff(t)
if race.Enabled || asan.Enabled || msan.Enabled {
t.Skip("skipping in case sanitizers alter allocation behavior")
}
t.Run("call-result", func(t *testing.T) {
bRet := testing.Benchmark(func(b *testing.B) {
b.ReportAllocs()
for b.Loop() {
newX()
}
})
if bRet.N == 0 {
t.Fatalf("benchmark reported 0 iterations")
}
if bRet.AllocsPerOp() != 0 {
t.Errorf("want 0 allocs, got %d", bRet.AllocsPerOp())
}
})
t.Run("call-arg", func(t *testing.T) {
bRet := testing.Benchmark(func(b *testing.B) {
b.ReportAllocs()
for b.Loop() {
use2(make([]byte, 1000))
}
})
if bRet.N == 0 {
t.Fatalf("benchmark reported 0 iterations")
}
if bRet.AllocsPerOp() != 0 {
t.Errorf("want 0 allocs, got %d", bRet.AllocsPerOp())
}
})
}
func ExampleB_RunParallel() {
// Parallel benchmark for text/template.Template.Execute on a single object.
testing.Benchmark(func(b *testing.B) {

64
test/escape_bloop.go Normal file
View File

@@ -0,0 +1,64 @@
// errorcheck -0 -m
// Copyright 2026 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Test b.Loop escape analysis behavior.
package bloop
import (
"testing"
)
// An example where mid-stack inlining allows stack allocation of a slice.
// This is from the example in go.dev/issue/73137.
func NewX(x int) []byte { // ERROR "can inline NewX"
out := make([]byte, 8) // ERROR "make\(\[\]byte, 8\) escapes to heap"
return use1(out)
}
//go:noinline
func use1(out []byte) []byte { // ERROR "leaking param: out to result ~r0 level=0"
return out
}
//go:noinline
func BenchmarkBloop(b *testing.B) { // ERROR "leaking param: b"
for b.Loop() { // ERROR "inlining call to testing.\(\*B\).Loop"
NewX(42) // ERROR "make\(\[\]byte, 8\) does not escape" "inlining call to NewX"
}
}
// A traditional b.N benchmark using a sink variable for comparison,
// also from the example in go.dev/issue/73137.
var sink byte
//go:noinline
func BenchmarkBN(b *testing.B) { // ERROR "b does not escape"
for i := 0; i < b.N; i++ {
out := NewX(42) // ERROR "make\(\[\]byte, 8\) does not escape" "inlining call to NewX"
sink = out[0]
}
}
// An example showing behavior of a simple function argument in the b.Loop body.
//go:noinline
func use2(x any) {} // ERROR "x does not escape"
//go:noinline
func BenchmarkBLoopFunctionArg(b *testing.B) { // ERROR "leaking param: b"
for b.Loop() { // ERROR "inlining call to testing.\(\*B\).Loop"
use2(42) // ERROR "42 does not escape"
}
}
// A similar call outside of b.Loop for comparison.
func simpleFunctionArg() { // ERROR "can inline simpleFunctionArg"
use2(42) // ERROR "42 does not escape"
}