runtime: don't count nGsyscallNoP for extra Ms in C

In #76435, it turns out that the new metric
/sched/goroutines/not-in-go:goroutines counts C threads that have called
into Go before (on Linux) as not-in-go goroutines. The reason for this
is that the M is still attached to the C thread on Linux as an
optimization, so we don't go through all the trouble of detaching the M
and, of course, decrementing nGsyscallNoP.

There's an easy fix to this accounting issue. The flag on the M,
isExtraInC, says whether a thread with an extra M attached no longer has
any Go on its (logical) stack. When we take the P from an M in this
state, we simply just don't increment nGsyscallNoP. When it calls back
into Go, we similarly skip the decrement to nGsyscallNoP.

This is more efficient than alternatives, like always updating
nGsyscallNoP in cgocallbackg, since that would add a new
read-modify-write atomic onto that fast path. It does mean we count
threads in C with a P still attached as not-in-go, but this transient in
most real programs, assuming the thread indeed does not call back into
Go any time soon.

Fixes #76435.

Change-Id: Id05563bacbe35d3fae17d67fb5ed45fa43fa0548
Reviewed-on: https://go-review.googlesource.com/c/go/+/726964
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
This commit is contained in:
Michael Anthony Knyszek
2025-12-04 23:27:03 +00:00
committed by Michael Knyszek
parent f3d572d96a
commit 745349712e
4 changed files with 169 additions and 10 deletions

View File

@@ -1584,3 +1584,18 @@ func TestReadMetricsSched(t *testing.T) {
t.Fatalf("output:\n%s\n\nwanted:\n%s", output, want)
}
}
func TestNotInGoMetricCallback(t *testing.T) {
switch runtime.GOOS {
case "windows", "plan9":
t.Skip("unsupported on Windows and Plan9")
}
// This test is run in a subprocess to prevent other tests from polluting the metrics
// and because we need to make some cgo callbacks.
output := runTestProg(t, "testprogcgo", "NotInGoMetricCallback")
want := "OK\n"
if output != want {
t.Fatalf("output:\n%s\n\nwanted:\n%s", output, want)
}
}

View File

@@ -2433,7 +2433,7 @@ func needm(signal bool) {
sp := sys.GetCallerSP()
callbackUpdateSystemStack(mp, sp, signal)
// Should mark we are already in Go now.
// We must mark that we are already in Go now.
// Otherwise, we may call needm again when we get a signal, before cgocallbackg1,
// which means the extram list may be empty, that will cause a deadlock.
mp.isExtraInC = false
@@ -2455,7 +2455,8 @@ func needm(signal bool) {
// mp.curg is now a real goroutine.
casgstatus(mp.curg, _Gdeadextra, _Gsyscall)
sched.ngsys.Add(-1)
sched.nGsyscallNoP.Add(1)
// N.B. We do not update nGsyscallNoP, because isExtraInC threads are not
// counted as real goroutines while they're in C.
if !signal {
if trace.ok() {
@@ -2590,7 +2591,7 @@ func dropm() {
casgstatus(mp.curg, _Gsyscall, _Gdeadextra)
mp.curg.preemptStop = false
sched.ngsys.Add(1)
sched.nGsyscallNoP.Add(-1)
decGSyscallNoP(mp)
if !mp.isExtraInSig {
if trace.ok() {
@@ -4732,7 +4733,7 @@ func entersyscallHandleGCWait(trace traceLocker) {
if trace.ok() {
trace.ProcStop(pp)
}
sched.nGsyscallNoP.Add(1)
addGSyscallNoP(gp.m) // We gave up our P voluntarily.
pp.gcStopTime = nanotime()
pp.syscalltick++
if sched.stopwait--; sched.stopwait == 0 {
@@ -4763,7 +4764,7 @@ func entersyscallblock() {
gp.m.syscalltick = gp.m.p.ptr().syscalltick
gp.m.p.ptr().syscalltick++
sched.nGsyscallNoP.Add(1)
addGSyscallNoP(gp.m) // We're going to give up our P.
// Leave SP around for GC and traceback.
pc := sys.GetCallerPC()
@@ -5001,8 +5002,8 @@ func exitsyscallTryGetP(oldp *p) *p {
if oldp != nil {
if thread, ok := setBlockOnExitSyscall(oldp); ok {
thread.takeP()
addGSyscallNoP(thread.mp) // takeP does the opposite, but this is a net zero change.
thread.resume()
sched.nGsyscallNoP.Add(-1) // takeP adds 1.
return oldp
}
}
@@ -5017,7 +5018,7 @@ func exitsyscallTryGetP(oldp *p) *p {
}
unlock(&sched.lock)
if pp != nil {
sched.nGsyscallNoP.Add(-1)
decGSyscallNoP(getg().m) // We got a P for ourselves.
return pp
}
}
@@ -5043,7 +5044,7 @@ func exitsyscallNoP(gp *g) {
trace.GoSysExit(true)
traceRelease(trace)
}
sched.nGsyscallNoP.Add(-1)
decGSyscallNoP(getg().m)
dropg()
lock(&sched.lock)
var pp *p
@@ -5081,6 +5082,41 @@ func exitsyscallNoP(gp *g) {
schedule() // Never returns.
}
// addGSyscallNoP must be called when a goroutine in a syscall loses its P.
// This function updates all relevant accounting.
//
// nosplit because it's called on the syscall paths.
//
//go:nosplit
func addGSyscallNoP(mp *m) {
// It's safe to read isExtraInC here because it's only mutated
// outside of _Gsyscall, and we know this thread is attached
// to a goroutine in _Gsyscall and blocked from exiting.
if !mp.isExtraInC {
// Increment nGsyscallNoP since we're taking away a P
// from a _Gsyscall goroutine, but only if isExtraInC
// is not set on the M. If it is, then this thread is
// back to being a full C thread, and will just inflate
// the count of not-in-go goroutines. See go.dev/issue/76435.
sched.nGsyscallNoP.Add(1)
}
}
// decGSsyscallNoP must be called whenever a goroutine in a syscall without
// a P exits the system call. This function updates all relevant accounting.
//
// nosplit because it's called from dropm.
//
//go:nosplit
func decGSyscallNoP(mp *m) {
// Update nGsyscallNoP, but only if this is not a thread coming
// out of C. See the comment in addGSyscallNoP. This logic must match,
// to avoid unmatched increments and decrements.
if !mp.isExtraInC {
sched.nGsyscallNoP.Add(-1)
}
}
// Called from syscall package before fork.
//
// syscall_runtime_BeforeFork is for package syscall,
@@ -6758,7 +6794,7 @@ func (s syscallingThread) releaseP(state uint32) {
trace.ProcSteal(s.pp)
traceRelease(trace)
}
sched.nGsyscallNoP.Add(1)
addGSyscallNoP(s.mp)
s.pp.syscalltick++
}

View File

@@ -945,7 +945,7 @@ type schedt struct {
nmfreed int64 // cumulative number of freed m's
ngsys atomic.Int32 // number of system goroutines
nGsyscallNoP atomic.Int32 // number of goroutines in syscalls without a P
nGsyscallNoP atomic.Int32 // number of goroutines in syscalls without a P but whose M is not isExtraInC
pidle puintptr // idle p's
npidle atomic.Int32

View File

@@ -0,0 +1,108 @@
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !plan9 && !windows
// +build !plan9,!windows
package main
/*
#include <stdatomic.h>
#include <stddef.h>
#include <pthread.h>
extern void Ready();
static int spinning;
static int released;
static void* enterGoThenSpinTwice(void* arg __attribute__ ((unused))) {
Ready();
atomic_fetch_add(&spinning, 1);
while(atomic_load(&released) == 0) {};
Ready();
atomic_fetch_add(&spinning, 1);
while(1) {};
return NULL;
}
static void SpinTwiceInNewCThread() {
pthread_t tid;
pthread_create(&tid, NULL, enterGoThenSpinTwice, NULL);
}
static int Spinning() {
return atomic_load(&spinning);
}
static void Release() {
atomic_store(&spinning, 0);
atomic_store(&released, 1);
}
*/
import "C"
import (
"os"
"runtime"
"runtime/metrics"
)
func init() {
register("NotInGoMetricCallback", NotInGoMetricCallback)
}
func NotInGoMetricCallback() {
const N = 10
s := []metrics.Sample{{Name: "/sched/goroutines/not-in-go:goroutines"}}
// Create N new C threads that have called into Go at least once.
for range N {
C.SpinTwiceInNewCThread()
}
// Synchronize with spinning threads twice.
//
// This helps catch bad accounting by taking at least a couple other
// codepaths which would cause the accounting to change.
for i := range 2 {
// Make sure they pass through Go.
// N.B. Ready is called twice by the new threads.
for j := range N {
<-readyCh
if j == 2 {
// Try to trigger an update in the immediate STW handoff case.
runtime.ReadMemStats(&m)
}
}
// Make sure they're back in C.
for C.Spinning() < N {
}
// Do something that stops the world to take all the Ps back.
runtime.ReadMemStats(&m)
if i == 0 {
C.Release()
}
}
// Read not-in-go.
metrics.Read(s)
if n := s[0].Value.Uint64(); n != 0 {
println("expected 0 not-in-go goroutines, found", n)
os.Exit(2)
}
println("OK")
}
var m runtime.MemStats
var readyCh = make(chan bool)
//export Ready
func Ready() {
readyCh <- true
}