cmd/compile: use equality signatures in hash function generation

There aren't a huge number of generated hash functions, so this probably
won't save a whole lot of memory. But it means we can clean up a bunch
of code by basing equality and hashing on the same underlying infrastructure.

Change-Id: I36ed1e49044fecb33120d8736f1c0403a4a2554e
Reviewed-on: https://go-review.googlesource.com/c/go/+/727500
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Cuong Manh Le <cuong.manhle.vn@gmail.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Keith Randall
2025-12-04 17:27:02 -08:00
parent 6eec9bcdb2
commit 0da8979210
2 changed files with 145 additions and 127 deletions

View File

@@ -11,7 +11,6 @@ import (
"strings"
"cmd/compile/internal/base"
"cmd/compile/internal/compare"
"cmd/compile/internal/ir"
"cmd/compile/internal/objw"
"cmd/compile/internal/typecheck"
@@ -50,105 +49,91 @@ func AlgType(t *types.Type) types.AlgKind {
// genhash returns a symbol which is the closure used to compute
// the hash of a value of type t.
// Note: the generated function must match runtime.typehash exactly.
func genhash(t *types.Type) *obj.LSym {
switch AlgType(t) {
default:
// genhash is only called for types that have equality
base.Fatalf("genhash %v", t)
case types.AMEM0:
return genhashSig(eqSignature(t))
}
func genhashSig(sig string) *obj.LSym {
if len(sig) > 0 && sig[0] == sigAlign {
_, sig = parseNum(sig[1:])
}
switch sig {
case "":
return sysClosure("memhash0")
case types.AMEM8:
case string(sigMemory) + "1":
return sysClosure("memhash8")
case types.AMEM16:
case string(sigMemory) + "2":
return sysClosure("memhash16")
case types.AMEM32:
case string(sigMemory) + "4":
return sysClosure("memhash32")
case types.AMEM64:
case string(sigMemory) + "8":
return sysClosure("memhash64")
case types.AMEM128:
case string(sigMemory) + "16":
return sysClosure("memhash128")
case types.ASTRING:
case string(sigString):
return sysClosure("strhash")
case types.AINTER:
case string(sigIface):
return sysClosure("interhash")
case types.ANILINTER:
case string(sigEface):
return sysClosure("nilinterhash")
case types.AFLOAT32:
case string(sigFloat32):
return sysClosure("f32hash")
case types.AFLOAT64:
case string(sigFloat64):
return sysClosure("f64hash")
case types.ACPLX64:
case string(sigFloat32) + string(sigFloat32):
return sysClosure("c64hash")
case types.ACPLX128:
case string(sigFloat64) + string(sigFloat64):
return sysClosure("c128hash")
case types.AMEM:
// For other sizes of plain memory, we build a closure
// that calls memhash_varlen. The size of the memory is
// encoded in the first slot of the closure.
closure := TypeLinksymLookup(fmt.Sprintf(".hashfunc%d", t.Size()))
if len(closure.P) > 0 { // already generated
return closure
}
if memhashvarlen == nil {
memhashvarlen = typecheck.LookupRuntimeFunc("memhash_varlen")
}
ot := 0
ot = objw.SymPtr(closure, ot, memhashvarlen, 0)
ot = objw.Uintptr(closure, ot, uint64(t.Size())) // size encoded in closure
objw.Global(closure, int32(ot), obj.DUPOK|obj.RODATA)
return closure
case types.ASPECIAL:
break
}
closure := TypeLinksymPrefix(".hashfunc", t)
closure := TypeLinksymLookup(".hashfunc." + sig)
if len(closure.P) > 0 { // already generated
return closure
}
// Generate hash functions for subtypes.
// There are cases where we might not use these hashes,
// but in that case they will get dead-code eliminated.
// (And the closure generated by genhash will also get
// dead-code eliminated, as we call the subtype hashers
// directly.)
switch t.Kind() {
case types.TARRAY:
genhash(t.Elem())
case types.TSTRUCT:
for _, f := range t.Fields() {
genhash(f.Type)
if sig[0] == sigMemory {
n, rest := parseNum(sig[1:])
if rest == "" {
// Just M%d. We can make a memhash_varlen closure.
// The size of the memory region to hash is encoded in the closure.
if memhashvarlen == nil {
memhashvarlen = typecheck.LookupRuntimeFunc("memhash_varlen")
}
ot := 0
ot = objw.SymPtr(closure, ot, memhashvarlen, 0)
ot = objw.Uintptr(closure, ot, uint64(n)) // size encoded in closue
objw.Global(closure, int32(ot), obj.DUPOK|obj.RODATA)
return closure
}
}
if base.Flag.LowerR != 0 {
fmt.Printf("genhash %v %v\n", closure, t)
fmt.Printf("genhash %s\n", sig)
}
fn := hashFunc(t)
fn := hashFunc(sig)
// Build closure. It doesn't close over any variables, so
// it contains just the function pointer.
objw.SymPtr(closure, 0, fn.Linksym(), 0)
objw.Global(closure, int32(types.PtrSize), obj.DUPOK|obj.RODATA)
return closure
}
func hashFunc(t *types.Type) *ir.Func {
sym := TypeSymPrefix(".hash", t)
func hashFunc(sig string) *ir.Func {
sym := types.TypeSymLookup(".hash." + sig)
if sym.Def != nil {
return sym.Def.(*ir.Name).Func
}
sig0 := sig
pos := base.AutogeneratedPos // less confusing than end of input
base.Pos = pos
// func sym(p *T, h uintptr) uintptr
// func sym(p unsafe.Pointer, h uintptr) uintptr
fn := ir.NewFunc(pos, pos, sym, types.NewSignature(nil,
[]*types.Field{
types.NewField(pos, typecheck.Lookup("p"), types.NewPtr(t)),
types.NewField(pos, typecheck.Lookup("p"), types.Types[types.TUNSAFEPTR]),
types.NewField(pos, typecheck.Lookup("h"), types.Types[types.TUINTPTR]),
},
[]*types.Field{
@@ -157,81 +142,121 @@ func hashFunc(t *types.Type) *ir.Func {
))
sym.Def = fn.Nname
fn.Pragma |= ir.Noinline // TODO(mdempsky): We need to emit this during the unified frontend instead, to allow inlining.
typecheck.DeclFunc(fn)
np := fn.Dcl[0]
nh := fn.Dcl[1]
switch t.Kind() {
case types.TARRAY:
// An array of pure memory would be handled by the
// standard algorithm, so the element type must not be
// pure memory.
hashel := hashfor(t.Elem())
// Skip alignment, hash functions can handle unaligned data.
if len(sig) > 0 && sig[0] == sigAlign {
_, sig = parseNum(sig[1:])
}
// for i := 0; i < nelem; i++
ni := typecheck.TempAt(base.Pos, ir.CurFunc, types.Types[types.TINT])
init := ir.NewAssignStmt(base.Pos, ni, ir.NewInt(base.Pos, 0))
cond := ir.NewBinaryExpr(base.Pos, ir.OLT, ni, ir.NewInt(base.Pos, t.NumElem()))
post := ir.NewAssignStmt(base.Pos, ni, ir.NewBinaryExpr(base.Pos, ir.OADD, ni, ir.NewInt(base.Pos, 1)))
loop := ir.NewForStmt(base.Pos, nil, cond, post, nil, false)
loop.PtrInit().Append(init)
// offset from np that we're currently working on
var off int64
// h = hashel(&p[i], h)
call := ir.NewCallExpr(base.Pos, ir.OCALL, hashel, nil)
// Return np+off cast to a t (t must be a pointer-y type).
ptr := func(t *types.Type) ir.Node {
c := ir.NewBasicLit(pos, types.Types[types.TUINTPTR], constant.MakeInt64(off))
p := ir.NewBinaryExpr(pos, ir.OUNSAFEADD, np, c)
return ir.NewConvExpr(pos, ir.OCONVNOP, t, p)
}
// hash data of type t at np+off.
// Increment off by the size of t.
hash := func(t *types.Type) {
p := ptr(t.PtrTo())
hashFn := hashfor(t)
call := ir.NewCallExpr(pos, ir.OCALL, hashFn, []ir.Node{p, nh})
fn.Body.Append(ir.NewAssignStmt(pos, nh, call))
off += t.Size()
}
nx := ir.NewIndexExpr(base.Pos, np, ni)
nx.SetBounded(true)
na := typecheck.NodAddr(nx)
call.Args.Append(na)
call.Args.Append(nh)
loop.Body.Append(ir.NewAssignStmt(base.Pos, nh, call))
fn.Body.Append(loop)
case types.TSTRUCT:
// Walk the struct using memhash for runs of AMEM
// and calling specific hash functions for the others.
for i, fields := 0, t.Fields(); i < len(fields); {
f := fields[i]
// Skip blank fields.
if f.Sym.IsBlank() {
i++
continue
for len(sig) > 0 {
kind := sig[0]
sig = sig[1:]
switch kind {
case sigMemory:
var n int64
n, sig = parseNum(sig)
switch {
case n == 4:
p := ptr(types.Types[types.TUNSAFEPTR])
memhash := typecheck.LookupRuntime("memhash32")
call := ir.NewCallExpr(pos, ir.OCALL, memhash, []ir.Node{p, nh})
fn.Body.Append(ir.NewAssignStmt(pos, nh, call))
case n == 8:
p := ptr(types.Types[types.TUNSAFEPTR])
memhash := typecheck.LookupRuntime("memhash64")
call := ir.NewCallExpr(pos, ir.OCALL, memhash, []ir.Node{p, nh})
fn.Body.Append(ir.NewAssignStmt(pos, nh, call))
default:
p := ptr(types.Types[types.TUINT8].PtrTo())
memhash := typecheck.LookupRuntime("memhash", types.Types[types.TUINT8])
size := ir.NewBasicLit(pos, types.Types[types.TUINTPTR], constant.MakeInt64(n))
call := ir.NewCallExpr(pos, ir.OCALL, memhash, []ir.Node{p, nh, size})
fn.Body.Append(ir.NewAssignStmt(pos, nh, call))
}
// Hash non-memory fields with appropriate hash function.
if !compare.IsRegularMemory(f.Type) {
hashel := hashfor(f.Type)
call := ir.NewCallExpr(base.Pos, ir.OCALL, hashel, nil)
na := typecheck.NodAddr(typecheck.DotField(base.Pos, np, i))
call.Args.Append(na)
call.Args.Append(nh)
fn.Body.Append(ir.NewAssignStmt(base.Pos, nh, call))
off += n
case sigFloat32:
hash(types.Types[types.TFLOAT32])
case sigFloat64:
hash(types.Types[types.TFLOAT64])
case sigString:
hash(types.Types[types.TSTRING])
case sigEface:
hash(types.NewInterface(nil))
case sigIface:
// arg kinda hacky. TODO: clean this up.
hash(types.NewInterface([]*types.Field{types.NewField(pos, typecheck.Lookup("A"), types.Types[types.TBOOL])}))
case sigSkip:
var n int64
n, sig = parseNum(sig)
off += n
case sigArrayStart:
var n int64
n, sig = parseNum(sig)
// Find matching closing brace.
i := 0
depth := 1
findEndSquareBracket:
for {
if i == len(sig) {
base.Fatalf("mismatched brackets in %s", sig0)
}
switch sig[i] {
case sigArrayStart:
depth++
case sigArrayEnd:
depth--
if depth == 0 {
break findEndSquareBracket
}
}
i++
continue
}
elemSig := sig[:i]
elemSize := sigSize(elemSig)
sig = sig[i+1:] // remaining signature after array
// Otherwise, hash a maximal length run of raw memory.
size, next := compare.Memrun(t, i)
// Loop N times, calling hash function for the element.
// for i := off; i < off + N*elemSize; i += elemSize {
// h = elemfn(p+i, h)
// }
elemFn := hashFunc(elemSig).Nname
idx := typecheck.TempAt(pos, ir.CurFunc, types.Types[types.TUINTPTR])
init := ir.NewAssignStmt(pos, idx, ir.NewInt(pos, off))
cond := ir.NewBinaryExpr(pos, ir.OLT, idx, ir.NewInt(pos, off+n*elemSize))
post := ir.NewAssignStmt(pos, idx, ir.NewBinaryExpr(pos, ir.OADD, idx, ir.NewInt(pos, elemSize)))
// h = hashel(&p.first, size, h)
hashel := hashmem(f.Type)
call := ir.NewCallExpr(base.Pos, ir.OCALL, hashel, nil)
na := typecheck.NodAddr(typecheck.DotField(base.Pos, np, i))
call.Args.Append(na)
call.Args.Append(nh)
call.Args.Append(ir.NewInt(base.Pos, size))
fn.Body.Append(ir.NewAssignStmt(base.Pos, nh, call))
i = next
p := ir.NewBinaryExpr(pos, ir.OUNSAFEADD, np, idx)
call := typecheck.Call(pos, elemFn, []ir.Node{p, nh}, false)
as := ir.NewAssignStmt(pos, nh, call)
loop := ir.NewForStmt(pos, init, cond, post, []ir.Node{as}, false)
fn.Body.Append(loop)
off += n * elemSize
}
}
r := ir.NewReturnStmt(base.Pos, nil)
r.Results.Append(nh)
fn.Body.Append(r)
fn.Body.Append(ir.NewReturnStmt(pos, []ir.Node{nh}))
if base.Flag.LowerR != 0 {
ir.DumpList("genhash body", fn.Body)
@@ -246,7 +271,6 @@ func hashFunc(t *types.Type) *ir.Func {
})
fn.SetNilCheckDisabled(true)
return fn
}
@@ -257,8 +281,9 @@ func runtimeHashFor(name string, t *types.Type) *ir.Name {
// hashfor returns the function to compute the hash of a value of type t.
func hashfor(t *types.Type) *ir.Name {
switch types.AlgType(t) {
case types.AMEM:
base.Fatalf("hashfor with AMEM type")
default:
base.Fatalf("hashfor with bad type %v", t)
return nil
case types.AINTER:
return runtimeHashFor("interhash", t)
case types.ANILINTER:
@@ -274,9 +299,6 @@ func hashfor(t *types.Type) *ir.Name {
case types.ACPLX128:
return runtimeHashFor("c128hash", t)
}
fn := hashFunc(t)
return fn.Nname
}
// sysClosure returns a closure which will call the
@@ -383,8 +405,6 @@ func geneqSig(sig string) *obj.LSym {
return closure
}
// TODO: generate hash function from signatures also?
// They are slightly different, at least at the moment.
func eqFunc(sig string) *ir.Func {
sym := types.TypeSymLookup(".eq." + sig)
if sym.Def != nil {

View File

@@ -199,8 +199,6 @@ func nilinterhash(p unsafe.Pointer, h uintptr) uintptr {
// is slower but more general and is used for hashing interface types
// (called from interhash or nilinterhash, above) or for hashing in
// maps generated by reflect.MapOf (reflect_typehash, below).
// Note: this function must match the compiler generated
// functions exactly. See issue 37716.
//
// typehash should be an internal detail,
// but widely used packages access it using linkname.