From 549d8d407e2bbcaecdee0b52cbf3a513dda637fb Mon Sep 17 00:00:00 2001 From: Gavin Lam Date: Fri, 9 Jan 2026 23:33:38 -0500 Subject: [PATCH] runtime: speed up cheaprand and cheaprand64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current cheaprand performs 128-bit multiplication on 64-bit numbers and truncate the result to 32 bits, which is inefficient. A 32-bit specific implementation is more performant because it performs 64-bit multiplication on 32-bit numbers instead. The current cheaprand64 involves two cheaprand calls. Implementing it as 64-bit wyrand is significantly faster. Since cheaprand64 discards one bit, I have preserved this behavior. The underlying uint64 function is made available as cheaprandu64. │ old │ new │ │ sec/op │ sec/op vs base │ Cheaprand-8 1.358n ± 0% 1.218n ± 0% -10.31% (n=100) Cheaprand64-8 2.424n ± 0% 1.391n ± 0% -42.62% (n=100) Blocksampled-8 8.347n ± 0% 2.022n ± 0% -75.78% (n=100) Fixes #77149 --- src/runtime/export_test.go | 4 ++++ src/runtime/lock_spinbit.go | 12 ++-------- src/runtime/mprof.go | 4 ++-- src/runtime/mprof_test.go | 16 +++++++++++++ src/runtime/rand.go | 47 ++++++++++++++++++++++++++++--------- src/runtime/rand_test.go | 12 ++++++++++ src/runtime/runtime2.go | 5 ++-- 7 files changed, 75 insertions(+), 25 deletions(-) create mode 100644 src/runtime/mprof_test.go diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index 4f6ef9a3f26..fb0bfd3904a 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -273,6 +273,10 @@ func CountPagesInUse() (pagesInUse, counted uintptr) { return } +func Blocksampled(cycles, rate int64) bool { return blocksampled(cycles, rate) } + +func Cheaprand() uint32 { return cheaprand() } +func Cheaprand64() int64 { return cheaprand64() } func Fastrand() uint32 { return uint32(rand()) } func Fastrand64() uint64 { return rand() } func Fastrandn(n uint32) uint32 { return randn(n) } diff --git a/src/runtime/lock_spinbit.go b/src/runtime/lock_spinbit.go index 039ea6f5656..590be8a0c51 100644 --- a/src/runtime/lock_spinbit.go +++ b/src/runtime/lock_spinbit.go @@ -327,16 +327,8 @@ func unlock2(l *mutex) { // mutexSampleContention returns whether the current mutex operation should // report any contention it discovers. func mutexSampleContention() bool { - if rate := int64(atomic.Load64(&mutexprofilerate)); rate <= 0 { - return false - } else { - // TODO: have SetMutexProfileFraction do the clamping - rate32 := uint32(rate) - if int64(rate32) != rate { - rate32 = ^uint32(0) - } - return cheaprandn(rate32) == 0 - } + rate := atomic.Load64(&mutexprofilerate) + return rate > 0 && cheaprandu64()%rate == 0 } // unlock2Wake updates the list of Ms waiting on l, waking an M if necessary. diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go index f1703a7ebab..ccc952701fa 100644 --- a/src/runtime/mprof.go +++ b/src/runtime/mprof.go @@ -696,8 +696,8 @@ func (prof *mLockProfile) recordUnlock(cycles int64) { if cycles == 0 { return } - prevScore := uint64(cheaprand64()) % uint64(prev) - thisScore := uint64(cheaprand64()) % uint64(cycles) + prevScore := cheaprandu64() % uint64(prev) + thisScore := cheaprandu64() % uint64(cycles) if prevScore > thisScore { prof.cyclesLost += cycles return diff --git a/src/runtime/mprof_test.go b/src/runtime/mprof_test.go new file mode 100644 index 00000000000..d819c11d1d0 --- /dev/null +++ b/src/runtime/mprof_test.go @@ -0,0 +1,16 @@ +// Copyright 2026 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime_test + +import ( + . "runtime" + "testing" +) + +func BenchmarkBlocksampled(b *testing.B) { + for b.Loop() { + Blocksampled(42, 1337) + } +} diff --git a/src/runtime/rand.go b/src/runtime/rand.go index a30845b585e..353e0e0f424 100644 --- a/src/runtime/rand.go +++ b/src/runtime/rand.go @@ -192,7 +192,8 @@ func mrandinit(mp *m) { } bootstrapRandReseed() // erase key we just extracted mp.chacha8.Init64(seed) - mp.cheaprand = rand() + mp.cheaprand = uint32(rand()) + mp.cheaprand64 = rand() } // randn is like rand() % n but faster. @@ -227,14 +228,12 @@ func randn(n uint32) uint32 { func cheaprand() uint32 { mp := getg().m // Implement wyrand: https://github.com/wangyi-fudan/wyhash - // Only the platform that bits.Mul64 can be lowered - // by the compiler should be in this list. - if goarch.IsAmd64|goarch.IsArm64|goarch.IsPpc64| - goarch.IsPpc64le|goarch.IsMips64|goarch.IsMips64le| - goarch.IsS390x|goarch.IsRiscv64|goarch.IsLoong64 == 1 { - mp.cheaprand += 0xa0761d6478bd642f - hi, lo := bits.Mul64(mp.cheaprand, mp.cheaprand^0xe7037ed1a0b428db) - return uint32(hi ^ lo) + // Only the platform that supports 64-bit multiplication + // natively should be allowed. + if bits.UintSize == 64 { + mp.cheaprand += 0x53c5ca59 + hi, lo := bits.Mul32(mp.cheaprand, mp.cheaprand^0x74743c1b) + return hi ^ lo } // Implement xorshift64+: 2 32-bit xorshift sequences added together. @@ -242,7 +241,7 @@ func cheaprand() uint32 { // Xorshift paper: https://www.jstatsoft.org/article/view/v008i14/xorshift.pdf // This generator passes the SmallCrush suite, part of TestU01 framework: // http://simul.iro.umontreal.ca/testu01/tu01.html - t := (*[2]uint32)(unsafe.Pointer(&mp.cheaprand)) + t := (*[2]uint32)(unsafe.Pointer(&mp.cheaprand64)) s1, s0 := t[0], t[1] s1 ^= s1 << 17 s1 = s1 ^ s0 ^ s1>>7 ^ s0>>16 @@ -269,7 +268,33 @@ func cheaprand() uint32 { //go:linkname cheaprand64 //go:nosplit func cheaprand64() int64 { - return int64(cheaprand())<<31 ^ int64(cheaprand()) + return int64(cheaprandu64() & ^(uint64(1) << 63)) +} + +// cheaprandu64 is a non-cryptographic-quality 64-bit random generator +// suitable for calling at very high frequency (such as during sampling decisions). +// it is "cheap" in the sense of both expense and quality. +// +// cheaprandu64 must not be exported to other packages: +// the rule is that other packages using runtime-provided +// randomness must always use rand. +// +//go:nosplit +func cheaprandu64() uint64 { + // Implement wyrand: https://github.com/wangyi-fudan/wyhash + // Only the platform that bits.Mul64 can be lowered + // by the compiler should be in this list. + if goarch.IsAmd64|goarch.IsArm64|goarch.IsPpc64| + goarch.IsPpc64le|goarch.IsMips64|goarch.IsMips64le| + goarch.IsS390x|goarch.IsRiscv64|goarch.IsLoong64 == 1 { + mp := getg().m + // Implement wyrand: https://github.com/wangyi-fudan/wyhash + mp.cheaprand64 += 0xa0761d6478bd642f + hi, lo := bits.Mul64(mp.cheaprand64, mp.cheaprand64^0xe7037ed1a0b428db) + return hi ^ lo + } + + return uint64(cheaprand())<<32 | uint64(cheaprand()) } // cheaprandn is like cheaprand() % n but faster. diff --git a/src/runtime/rand_test.go b/src/runtime/rand_test.go index baecb6984d0..79b888fcfac 100644 --- a/src/runtime/rand_test.go +++ b/src/runtime/rand_test.go @@ -22,6 +22,18 @@ func TestReadRandom(t *testing.T) { } } +func BenchmarkCheaprand(b *testing.B) { + for b.Loop() { + Cheaprand() + } +} + +func BenchmarkCheaprand64(b *testing.B) { + for b.Loop() { + Cheaprand64() + } +} + func BenchmarkFastrand(b *testing.B) { b.RunParallel(func(pb *testing.PB) { for pb.Next() { diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index be33932b24d..bf47955d0d6 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -715,8 +715,9 @@ type m struct { mOS - chacha8 chacha8rand.State - cheaprand uint64 + chacha8 chacha8rand.State + cheaprand uint32 + cheaprand64 uint64 // Up to 10 locks held by this m, maintained by the lock ranking code. locksHeldLen int