cmd/compile: use equality signatures in hash function generation

There aren't a huge number of generated hash functions, so this probably won't save a whole lot of memory. But it means we can clean up a bunch of code by basing equality and hashing on the same underlying infrastructure. Change-Id: I36ed1e49044fecb33120d8736f1c0403a4a2554e Reviewed-on: https://go-review.googlesource.com/c/go/+/727500 Reviewed-by: Keith Randall <khr@google.com> Reviewed-by: Cuong Manh Le <cuong.manhle.vn@gmail.com> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2026-01-29 07:02:05 +03:00 · 2025-12-04 17:27:02 -08:00
parent 6eec9bcdb2
commit 0da8979210
2 changed files with 145 additions and 127 deletions
--- a/src/cmd/compile/internal/reflectdata/alg.go
+++ b/src/cmd/compile/internal/reflectdata/alg.go
@@ -11,7 +11,6 @@ import (
 	"strings"

 	"cmd/compile/internal/base"
-	"cmd/compile/internal/compare"
 	"cmd/compile/internal/ir"
 	"cmd/compile/internal/objw"
 	"cmd/compile/internal/typecheck"
@@ -50,105 +49,91 @@ func AlgType(t *types.Type) types.AlgKind {

 // genhash returns a symbol which is the closure used to compute
 // the hash of a value of type t.
-// Note: the generated function must match runtime.typehash exactly.
 func genhash(t *types.Type) *obj.LSym {
-	switch AlgType(t) {
-	default:
-		// genhash is only called for types that have equality
-		base.Fatalf("genhash %v", t)
-	case types.AMEM0:
+	return genhashSig(eqSignature(t))
+}
+
+func genhashSig(sig string) *obj.LSym {
+	if len(sig) > 0 && sig[0] == sigAlign {
+		_, sig = parseNum(sig[1:])
+	}
+	switch sig {
+	case "":
 		return sysClosure("memhash0")
-	case types.AMEM8:
+	case string(sigMemory) + "1":
 		return sysClosure("memhash8")
-	case types.AMEM16:
+	case string(sigMemory) + "2":
 		return sysClosure("memhash16")
-	case types.AMEM32:
+	case string(sigMemory) + "4":
 		return sysClosure("memhash32")
-	case types.AMEM64:
+	case string(sigMemory) + "8":
 		return sysClosure("memhash64")
-	case types.AMEM128:
+	case string(sigMemory) + "16":
 		return sysClosure("memhash128")
-	case types.ASTRING:
+	case string(sigString):
 		return sysClosure("strhash")
-	case types.AINTER:
+	case string(sigIface):
 		return sysClosure("interhash")
-	case types.ANILINTER:
+	case string(sigEface):
 		return sysClosure("nilinterhash")
-	case types.AFLOAT32:
+	case string(sigFloat32):
 		return sysClosure("f32hash")
-	case types.AFLOAT64:
+	case string(sigFloat64):
 		return sysClosure("f64hash")
-	case types.ACPLX64:
+	case string(sigFloat32) + string(sigFloat32):
 		return sysClosure("c64hash")
-	case types.ACPLX128:
+	case string(sigFloat64) + string(sigFloat64):
 		return sysClosure("c128hash")
-	case types.AMEM:
-		// For other sizes of plain memory, we build a closure
-		// that calls memhash_varlen. The size of the memory is
-		// encoded in the first slot of the closure.
-		closure := TypeLinksymLookup(fmt.Sprintf(".hashfunc%d", t.Size()))
-		if len(closure.P) > 0 { // already generated
-			return closure
-		}
-		if memhashvarlen == nil {
-			memhashvarlen = typecheck.LookupRuntimeFunc("memhash_varlen")
-		}
-		ot := 0
-		ot = objw.SymPtr(closure, ot, memhashvarlen, 0)
-		ot = objw.Uintptr(closure, ot, uint64(t.Size())) // size encoded in closure
-		objw.Global(closure, int32(ot), obj.DUPOK|obj.RODATA)
-		return closure
-	case types.ASPECIAL:
-		break
 	}

-	closure := TypeLinksymPrefix(".hashfunc", t)
+	closure := TypeLinksymLookup(".hashfunc." + sig)
 	if len(closure.P) > 0 { // already generated
 		return closure
 	}

-	// Generate hash functions for subtypes.
-	// There are cases where we might not use these hashes,
-	// but in that case they will get dead-code eliminated.
-	// (And the closure generated by genhash will also get
-	// dead-code eliminated, as we call the subtype hashers
-	// directly.)
-	switch t.Kind() {
-	case types.TARRAY:
-		genhash(t.Elem())
-	case types.TSTRUCT:
-		for _, f := range t.Fields() {
-			genhash(f.Type)
+	if sig[0] == sigMemory {
+		n, rest := parseNum(sig[1:])
+		if rest == "" {
+			// Just M%d. We can make a memhash_varlen closure.
+			// The size of the memory region to hash is encoded in the closure.
+			if memhashvarlen == nil {
+				memhashvarlen = typecheck.LookupRuntimeFunc("memhash_varlen")
+			}
+			ot := 0
+			ot = objw.SymPtr(closure, ot, memhashvarlen, 0)
+			ot = objw.Uintptr(closure, ot, uint64(n)) // size encoded in closue
+			objw.Global(closure, int32(ot), obj.DUPOK|obj.RODATA)
+			return closure
 		}
 	}

 	if base.Flag.LowerR != 0 {
-		fmt.Printf("genhash %v %v\n", closure, t)
+		fmt.Printf("genhash %s\n", sig)
 	}

-	fn := hashFunc(t)
+	fn := hashFunc(sig)

 	// Build closure. It doesn't close over any variables, so
 	// it contains just the function pointer.
 	objw.SymPtr(closure, 0, fn.Linksym(), 0)
 	objw.Global(closure, int32(types.PtrSize), obj.DUPOK|obj.RODATA)
-
 	return closure
 }

-func hashFunc(t *types.Type) *ir.Func {
-	sym := TypeSymPrefix(".hash", t)
+func hashFunc(sig string) *ir.Func {
+	sym := types.TypeSymLookup(".hash." + sig)
 	if sym.Def != nil {
 		return sym.Def.(*ir.Name).Func
 	}
+	sig0 := sig

 	pos := base.AutogeneratedPos // less confusing than end of input
 	base.Pos = pos

-	// func sym(p *T, h uintptr) uintptr
+	// func sym(p unsafe.Pointer, h uintptr) uintptr
 	fn := ir.NewFunc(pos, pos, sym, types.NewSignature(nil,
 		[]*types.Field{
-			types.NewField(pos, typecheck.Lookup("p"), types.NewPtr(t)),
+			types.NewField(pos, typecheck.Lookup("p"), types.Types[types.TUNSAFEPTR]),
 			types.NewField(pos, typecheck.Lookup("h"), types.Types[types.TUINTPTR]),
 		},
 		[]*types.Field{
@@ -157,81 +142,121 @@ func hashFunc(t *types.Type) *ir.Func {
 	))
 	sym.Def = fn.Nname
 	fn.Pragma |= ir.Noinline // TODO(mdempsky): We need to emit this during the unified frontend instead, to allow inlining.
-
 	typecheck.DeclFunc(fn)
 	np := fn.Dcl[0]
 	nh := fn.Dcl[1]

-	switch t.Kind() {
-	case types.TARRAY:
-		// An array of pure memory would be handled by the
-		// standard algorithm, so the element type must not be
-		// pure memory.
-		hashel := hashfor(t.Elem())
+	// Skip alignment, hash functions can handle unaligned data.
+	if len(sig) > 0 && sig[0] == sigAlign {
+		_, sig = parseNum(sig[1:])
+	}

-		// for i := 0; i < nelem; i++
-		ni := typecheck.TempAt(base.Pos, ir.CurFunc, types.Types[types.TINT])
-		init := ir.NewAssignStmt(base.Pos, ni, ir.NewInt(base.Pos, 0))
-		cond := ir.NewBinaryExpr(base.Pos, ir.OLT, ni, ir.NewInt(base.Pos, t.NumElem()))
-		post := ir.NewAssignStmt(base.Pos, ni, ir.NewBinaryExpr(base.Pos, ir.OADD, ni, ir.NewInt(base.Pos, 1)))
-		loop := ir.NewForStmt(base.Pos, nil, cond, post, nil, false)
-		loop.PtrInit().Append(init)
+	// offset from np that we're currently working on
+	var off int64

-		// h = hashel(&p[i], h)
-		call := ir.NewCallExpr(base.Pos, ir.OCALL, hashel, nil)
+	// Return np+off cast to a t (t must be a pointer-y type).
+	ptr := func(t *types.Type) ir.Node {
+		c := ir.NewBasicLit(pos, types.Types[types.TUINTPTR], constant.MakeInt64(off))
+		p := ir.NewBinaryExpr(pos, ir.OUNSAFEADD, np, c)
+		return ir.NewConvExpr(pos, ir.OCONVNOP, t, p)
+	}
+	// hash data of type t at np+off.
+	// Increment off by the size of t.
+	hash := func(t *types.Type) {
+		p := ptr(t.PtrTo())
+		hashFn := hashfor(t)
+		call := ir.NewCallExpr(pos, ir.OCALL, hashFn, []ir.Node{p, nh})
+		fn.Body.Append(ir.NewAssignStmt(pos, nh, call))
+		off += t.Size()
+	}

-		nx := ir.NewIndexExpr(base.Pos, np, ni)
-		nx.SetBounded(true)
-		na := typecheck.NodAddr(nx)
-		call.Args.Append(na)
-		call.Args.Append(nh)
-		loop.Body.Append(ir.NewAssignStmt(base.Pos, nh, call))
-
-		fn.Body.Append(loop)
-
-	case types.TSTRUCT:
-		// Walk the struct using memhash for runs of AMEM
-		// and calling specific hash functions for the others.
-		for i, fields := 0, t.Fields(); i < len(fields); {
-			f := fields[i]
-
-			// Skip blank fields.
-			if f.Sym.IsBlank() {
-				i++
-				continue
+	for len(sig) > 0 {
+		kind := sig[0]
+		sig = sig[1:]
+		switch kind {
+		case sigMemory:
+			var n int64
+			n, sig = parseNum(sig)
+			switch {
+			case n == 4:
+				p := ptr(types.Types[types.TUNSAFEPTR])
+				memhash := typecheck.LookupRuntime("memhash32")
+				call := ir.NewCallExpr(pos, ir.OCALL, memhash, []ir.Node{p, nh})
+				fn.Body.Append(ir.NewAssignStmt(pos, nh, call))
+			case n == 8:
+				p := ptr(types.Types[types.TUNSAFEPTR])
+				memhash := typecheck.LookupRuntime("memhash64")
+				call := ir.NewCallExpr(pos, ir.OCALL, memhash, []ir.Node{p, nh})
+				fn.Body.Append(ir.NewAssignStmt(pos, nh, call))
+			default:
+				p := ptr(types.Types[types.TUINT8].PtrTo())
+				memhash := typecheck.LookupRuntime("memhash", types.Types[types.TUINT8])
+				size := ir.NewBasicLit(pos, types.Types[types.TUINTPTR], constant.MakeInt64(n))
+				call := ir.NewCallExpr(pos, ir.OCALL, memhash, []ir.Node{p, nh, size})
+				fn.Body.Append(ir.NewAssignStmt(pos, nh, call))
 			}
-
-			// Hash non-memory fields with appropriate hash function.
-			if !compare.IsRegularMemory(f.Type) {
-				hashel := hashfor(f.Type)
-				call := ir.NewCallExpr(base.Pos, ir.OCALL, hashel, nil)
-				na := typecheck.NodAddr(typecheck.DotField(base.Pos, np, i))
-				call.Args.Append(na)
-				call.Args.Append(nh)
-				fn.Body.Append(ir.NewAssignStmt(base.Pos, nh, call))
+			off += n
+		case sigFloat32:
+			hash(types.Types[types.TFLOAT32])
+		case sigFloat64:
+			hash(types.Types[types.TFLOAT64])
+		case sigString:
+			hash(types.Types[types.TSTRING])
+		case sigEface:
+			hash(types.NewInterface(nil))
+		case sigIface:
+			// arg kinda hacky. TODO: clean this up.
+			hash(types.NewInterface([]*types.Field{types.NewField(pos, typecheck.Lookup("A"), types.Types[types.TBOOL])}))
+		case sigSkip:
+			var n int64
+			n, sig = parseNum(sig)
+			off += n
+		case sigArrayStart:
+			var n int64
+			n, sig = parseNum(sig)
+			// Find matching closing brace.
+			i := 0
+			depth := 1
+		findEndSquareBracket:
+			for {
+				if i == len(sig) {
+					base.Fatalf("mismatched brackets in %s", sig0)
+				}
+				switch sig[i] {
+				case sigArrayStart:
+					depth++
+				case sigArrayEnd:
+					depth--
+					if depth == 0 {
+						break findEndSquareBracket
+					}
+				}
 				i++
-				continue
 			}
+			elemSig := sig[:i]
+			elemSize := sigSize(elemSig)
+			sig = sig[i+1:] // remaining signature after array

-			// Otherwise, hash a maximal length run of raw memory.
-			size, next := compare.Memrun(t, i)
+			// Loop N times, calling hash function for the element.
+			//     for i := off; i < off + N*elemSize; i += elemSize {
+			//         h = elemfn(p+i, h)
+			//     }
+			elemFn := hashFunc(elemSig).Nname
+			idx := typecheck.TempAt(pos, ir.CurFunc, types.Types[types.TUINTPTR])
+			init := ir.NewAssignStmt(pos, idx, ir.NewInt(pos, off))
+			cond := ir.NewBinaryExpr(pos, ir.OLT, idx, ir.NewInt(pos, off+n*elemSize))
+			post := ir.NewAssignStmt(pos, idx, ir.NewBinaryExpr(pos, ir.OADD, idx, ir.NewInt(pos, elemSize)))

-			// h = hashel(&p.first, size, h)
-			hashel := hashmem(f.Type)
-			call := ir.NewCallExpr(base.Pos, ir.OCALL, hashel, nil)
-			na := typecheck.NodAddr(typecheck.DotField(base.Pos, np, i))
-			call.Args.Append(na)
-			call.Args.Append(nh)
-			call.Args.Append(ir.NewInt(base.Pos, size))
-			fn.Body.Append(ir.NewAssignStmt(base.Pos, nh, call))
-
-			i = next
+			p := ir.NewBinaryExpr(pos, ir.OUNSAFEADD, np, idx)
+			call := typecheck.Call(pos, elemFn, []ir.Node{p, nh}, false)
+			as := ir.NewAssignStmt(pos, nh, call)
+			loop := ir.NewForStmt(pos, init, cond, post, []ir.Node{as}, false)
+			fn.Body.Append(loop)
+			off += n * elemSize
 		}
 	}

-	r := ir.NewReturnStmt(base.Pos, nil)
-	r.Results.Append(nh)
-	fn.Body.Append(r)
+	fn.Body.Append(ir.NewReturnStmt(pos, []ir.Node{nh}))

 	if base.Flag.LowerR != 0 {
 		ir.DumpList("genhash body", fn.Body)
@@ -246,7 +271,6 @@ func hashFunc(t *types.Type) *ir.Func {
 	})

 	fn.SetNilCheckDisabled(true)
-
 	return fn
 }

@@ -257,8 +281,9 @@ func runtimeHashFor(name string, t *types.Type) *ir.Name {
 // hashfor returns the function to compute the hash of a value of type t.
 func hashfor(t *types.Type) *ir.Name {
 	switch types.AlgType(t) {
-	case types.AMEM:
-		base.Fatalf("hashfor with AMEM type")
+	default:
+		base.Fatalf("hashfor with bad type %v", t)
+		return nil
 	case types.AINTER:
 		return runtimeHashFor("interhash", t)
 	case types.ANILINTER:
@@ -274,9 +299,6 @@ func hashfor(t *types.Type) *ir.Name {
 	case types.ACPLX128:
 		return runtimeHashFor("c128hash", t)
 	}
-
-	fn := hashFunc(t)
-	return fn.Nname
 }

 // sysClosure returns a closure which will call the
@@ -383,8 +405,6 @@ func geneqSig(sig string) *obj.LSym {
 	return closure
 }

-// TODO: generate hash function from signatures also?
-// They are slightly different, at least at the moment.
 func eqFunc(sig string) *ir.Func {
 	sym := types.TypeSymLookup(".eq." + sig)
 	if sym.Def != nil {
--- a/src/runtime/alg.go
+++ b/src/runtime/alg.go
@@ -199,8 +199,6 @@ func nilinterhash(p unsafe.Pointer, h uintptr) uintptr {
 // is slower but more general and is used for hashing interface types
 // (called from interhash or nilinterhash, above) or for hashing in
 // maps generated by reflect.MapOf (reflect_typehash, below).
-// Note: this function must match the compiler generated
-// functions exactly. See issue 37716.
 //
 // typehash should be an internal detail,
 // but widely used packages access it using linkname.