mirror of
https://github.com/golang/go.git
synced 2026-01-29 07:02:05 +03:00
cmd/compile: use equality signatures in hash function generation
There aren't a huge number of generated hash functions, so this probably won't save a whole lot of memory. But it means we can clean up a bunch of code by basing equality and hashing on the same underlying infrastructure. Change-Id: I36ed1e49044fecb33120d8736f1c0403a4a2554e Reviewed-on: https://go-review.googlesource.com/c/go/+/727500 Reviewed-by: Keith Randall <khr@google.com> Reviewed-by: Cuong Manh Le <cuong.manhle.vn@gmail.com> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
@@ -11,7 +11,6 @@ import (
|
||||
"strings"
|
||||
|
||||
"cmd/compile/internal/base"
|
||||
"cmd/compile/internal/compare"
|
||||
"cmd/compile/internal/ir"
|
||||
"cmd/compile/internal/objw"
|
||||
"cmd/compile/internal/typecheck"
|
||||
@@ -50,105 +49,91 @@ func AlgType(t *types.Type) types.AlgKind {
|
||||
|
||||
// genhash returns a symbol which is the closure used to compute
|
||||
// the hash of a value of type t.
|
||||
// Note: the generated function must match runtime.typehash exactly.
|
||||
func genhash(t *types.Type) *obj.LSym {
|
||||
switch AlgType(t) {
|
||||
default:
|
||||
// genhash is only called for types that have equality
|
||||
base.Fatalf("genhash %v", t)
|
||||
case types.AMEM0:
|
||||
return genhashSig(eqSignature(t))
|
||||
}
|
||||
|
||||
func genhashSig(sig string) *obj.LSym {
|
||||
if len(sig) > 0 && sig[0] == sigAlign {
|
||||
_, sig = parseNum(sig[1:])
|
||||
}
|
||||
switch sig {
|
||||
case "":
|
||||
return sysClosure("memhash0")
|
||||
case types.AMEM8:
|
||||
case string(sigMemory) + "1":
|
||||
return sysClosure("memhash8")
|
||||
case types.AMEM16:
|
||||
case string(sigMemory) + "2":
|
||||
return sysClosure("memhash16")
|
||||
case types.AMEM32:
|
||||
case string(sigMemory) + "4":
|
||||
return sysClosure("memhash32")
|
||||
case types.AMEM64:
|
||||
case string(sigMemory) + "8":
|
||||
return sysClosure("memhash64")
|
||||
case types.AMEM128:
|
||||
case string(sigMemory) + "16":
|
||||
return sysClosure("memhash128")
|
||||
case types.ASTRING:
|
||||
case string(sigString):
|
||||
return sysClosure("strhash")
|
||||
case types.AINTER:
|
||||
case string(sigIface):
|
||||
return sysClosure("interhash")
|
||||
case types.ANILINTER:
|
||||
case string(sigEface):
|
||||
return sysClosure("nilinterhash")
|
||||
case types.AFLOAT32:
|
||||
case string(sigFloat32):
|
||||
return sysClosure("f32hash")
|
||||
case types.AFLOAT64:
|
||||
case string(sigFloat64):
|
||||
return sysClosure("f64hash")
|
||||
case types.ACPLX64:
|
||||
case string(sigFloat32) + string(sigFloat32):
|
||||
return sysClosure("c64hash")
|
||||
case types.ACPLX128:
|
||||
case string(sigFloat64) + string(sigFloat64):
|
||||
return sysClosure("c128hash")
|
||||
case types.AMEM:
|
||||
// For other sizes of plain memory, we build a closure
|
||||
// that calls memhash_varlen. The size of the memory is
|
||||
// encoded in the first slot of the closure.
|
||||
closure := TypeLinksymLookup(fmt.Sprintf(".hashfunc%d", t.Size()))
|
||||
if len(closure.P) > 0 { // already generated
|
||||
return closure
|
||||
}
|
||||
if memhashvarlen == nil {
|
||||
memhashvarlen = typecheck.LookupRuntimeFunc("memhash_varlen")
|
||||
}
|
||||
ot := 0
|
||||
ot = objw.SymPtr(closure, ot, memhashvarlen, 0)
|
||||
ot = objw.Uintptr(closure, ot, uint64(t.Size())) // size encoded in closure
|
||||
objw.Global(closure, int32(ot), obj.DUPOK|obj.RODATA)
|
||||
return closure
|
||||
case types.ASPECIAL:
|
||||
break
|
||||
}
|
||||
|
||||
closure := TypeLinksymPrefix(".hashfunc", t)
|
||||
closure := TypeLinksymLookup(".hashfunc." + sig)
|
||||
if len(closure.P) > 0 { // already generated
|
||||
return closure
|
||||
}
|
||||
|
||||
// Generate hash functions for subtypes.
|
||||
// There are cases where we might not use these hashes,
|
||||
// but in that case they will get dead-code eliminated.
|
||||
// (And the closure generated by genhash will also get
|
||||
// dead-code eliminated, as we call the subtype hashers
|
||||
// directly.)
|
||||
switch t.Kind() {
|
||||
case types.TARRAY:
|
||||
genhash(t.Elem())
|
||||
case types.TSTRUCT:
|
||||
for _, f := range t.Fields() {
|
||||
genhash(f.Type)
|
||||
if sig[0] == sigMemory {
|
||||
n, rest := parseNum(sig[1:])
|
||||
if rest == "" {
|
||||
// Just M%d. We can make a memhash_varlen closure.
|
||||
// The size of the memory region to hash is encoded in the closure.
|
||||
if memhashvarlen == nil {
|
||||
memhashvarlen = typecheck.LookupRuntimeFunc("memhash_varlen")
|
||||
}
|
||||
ot := 0
|
||||
ot = objw.SymPtr(closure, ot, memhashvarlen, 0)
|
||||
ot = objw.Uintptr(closure, ot, uint64(n)) // size encoded in closue
|
||||
objw.Global(closure, int32(ot), obj.DUPOK|obj.RODATA)
|
||||
return closure
|
||||
}
|
||||
}
|
||||
|
||||
if base.Flag.LowerR != 0 {
|
||||
fmt.Printf("genhash %v %v\n", closure, t)
|
||||
fmt.Printf("genhash %s\n", sig)
|
||||
}
|
||||
|
||||
fn := hashFunc(t)
|
||||
fn := hashFunc(sig)
|
||||
|
||||
// Build closure. It doesn't close over any variables, so
|
||||
// it contains just the function pointer.
|
||||
objw.SymPtr(closure, 0, fn.Linksym(), 0)
|
||||
objw.Global(closure, int32(types.PtrSize), obj.DUPOK|obj.RODATA)
|
||||
|
||||
return closure
|
||||
}
|
||||
|
||||
func hashFunc(t *types.Type) *ir.Func {
|
||||
sym := TypeSymPrefix(".hash", t)
|
||||
func hashFunc(sig string) *ir.Func {
|
||||
sym := types.TypeSymLookup(".hash." + sig)
|
||||
if sym.Def != nil {
|
||||
return sym.Def.(*ir.Name).Func
|
||||
}
|
||||
sig0 := sig
|
||||
|
||||
pos := base.AutogeneratedPos // less confusing than end of input
|
||||
base.Pos = pos
|
||||
|
||||
// func sym(p *T, h uintptr) uintptr
|
||||
// func sym(p unsafe.Pointer, h uintptr) uintptr
|
||||
fn := ir.NewFunc(pos, pos, sym, types.NewSignature(nil,
|
||||
[]*types.Field{
|
||||
types.NewField(pos, typecheck.Lookup("p"), types.NewPtr(t)),
|
||||
types.NewField(pos, typecheck.Lookup("p"), types.Types[types.TUNSAFEPTR]),
|
||||
types.NewField(pos, typecheck.Lookup("h"), types.Types[types.TUINTPTR]),
|
||||
},
|
||||
[]*types.Field{
|
||||
@@ -157,81 +142,121 @@ func hashFunc(t *types.Type) *ir.Func {
|
||||
))
|
||||
sym.Def = fn.Nname
|
||||
fn.Pragma |= ir.Noinline // TODO(mdempsky): We need to emit this during the unified frontend instead, to allow inlining.
|
||||
|
||||
typecheck.DeclFunc(fn)
|
||||
np := fn.Dcl[0]
|
||||
nh := fn.Dcl[1]
|
||||
|
||||
switch t.Kind() {
|
||||
case types.TARRAY:
|
||||
// An array of pure memory would be handled by the
|
||||
// standard algorithm, so the element type must not be
|
||||
// pure memory.
|
||||
hashel := hashfor(t.Elem())
|
||||
// Skip alignment, hash functions can handle unaligned data.
|
||||
if len(sig) > 0 && sig[0] == sigAlign {
|
||||
_, sig = parseNum(sig[1:])
|
||||
}
|
||||
|
||||
// for i := 0; i < nelem; i++
|
||||
ni := typecheck.TempAt(base.Pos, ir.CurFunc, types.Types[types.TINT])
|
||||
init := ir.NewAssignStmt(base.Pos, ni, ir.NewInt(base.Pos, 0))
|
||||
cond := ir.NewBinaryExpr(base.Pos, ir.OLT, ni, ir.NewInt(base.Pos, t.NumElem()))
|
||||
post := ir.NewAssignStmt(base.Pos, ni, ir.NewBinaryExpr(base.Pos, ir.OADD, ni, ir.NewInt(base.Pos, 1)))
|
||||
loop := ir.NewForStmt(base.Pos, nil, cond, post, nil, false)
|
||||
loop.PtrInit().Append(init)
|
||||
// offset from np that we're currently working on
|
||||
var off int64
|
||||
|
||||
// h = hashel(&p[i], h)
|
||||
call := ir.NewCallExpr(base.Pos, ir.OCALL, hashel, nil)
|
||||
// Return np+off cast to a t (t must be a pointer-y type).
|
||||
ptr := func(t *types.Type) ir.Node {
|
||||
c := ir.NewBasicLit(pos, types.Types[types.TUINTPTR], constant.MakeInt64(off))
|
||||
p := ir.NewBinaryExpr(pos, ir.OUNSAFEADD, np, c)
|
||||
return ir.NewConvExpr(pos, ir.OCONVNOP, t, p)
|
||||
}
|
||||
// hash data of type t at np+off.
|
||||
// Increment off by the size of t.
|
||||
hash := func(t *types.Type) {
|
||||
p := ptr(t.PtrTo())
|
||||
hashFn := hashfor(t)
|
||||
call := ir.NewCallExpr(pos, ir.OCALL, hashFn, []ir.Node{p, nh})
|
||||
fn.Body.Append(ir.NewAssignStmt(pos, nh, call))
|
||||
off += t.Size()
|
||||
}
|
||||
|
||||
nx := ir.NewIndexExpr(base.Pos, np, ni)
|
||||
nx.SetBounded(true)
|
||||
na := typecheck.NodAddr(nx)
|
||||
call.Args.Append(na)
|
||||
call.Args.Append(nh)
|
||||
loop.Body.Append(ir.NewAssignStmt(base.Pos, nh, call))
|
||||
|
||||
fn.Body.Append(loop)
|
||||
|
||||
case types.TSTRUCT:
|
||||
// Walk the struct using memhash for runs of AMEM
|
||||
// and calling specific hash functions for the others.
|
||||
for i, fields := 0, t.Fields(); i < len(fields); {
|
||||
f := fields[i]
|
||||
|
||||
// Skip blank fields.
|
||||
if f.Sym.IsBlank() {
|
||||
i++
|
||||
continue
|
||||
for len(sig) > 0 {
|
||||
kind := sig[0]
|
||||
sig = sig[1:]
|
||||
switch kind {
|
||||
case sigMemory:
|
||||
var n int64
|
||||
n, sig = parseNum(sig)
|
||||
switch {
|
||||
case n == 4:
|
||||
p := ptr(types.Types[types.TUNSAFEPTR])
|
||||
memhash := typecheck.LookupRuntime("memhash32")
|
||||
call := ir.NewCallExpr(pos, ir.OCALL, memhash, []ir.Node{p, nh})
|
||||
fn.Body.Append(ir.NewAssignStmt(pos, nh, call))
|
||||
case n == 8:
|
||||
p := ptr(types.Types[types.TUNSAFEPTR])
|
||||
memhash := typecheck.LookupRuntime("memhash64")
|
||||
call := ir.NewCallExpr(pos, ir.OCALL, memhash, []ir.Node{p, nh})
|
||||
fn.Body.Append(ir.NewAssignStmt(pos, nh, call))
|
||||
default:
|
||||
p := ptr(types.Types[types.TUINT8].PtrTo())
|
||||
memhash := typecheck.LookupRuntime("memhash", types.Types[types.TUINT8])
|
||||
size := ir.NewBasicLit(pos, types.Types[types.TUINTPTR], constant.MakeInt64(n))
|
||||
call := ir.NewCallExpr(pos, ir.OCALL, memhash, []ir.Node{p, nh, size})
|
||||
fn.Body.Append(ir.NewAssignStmt(pos, nh, call))
|
||||
}
|
||||
|
||||
// Hash non-memory fields with appropriate hash function.
|
||||
if !compare.IsRegularMemory(f.Type) {
|
||||
hashel := hashfor(f.Type)
|
||||
call := ir.NewCallExpr(base.Pos, ir.OCALL, hashel, nil)
|
||||
na := typecheck.NodAddr(typecheck.DotField(base.Pos, np, i))
|
||||
call.Args.Append(na)
|
||||
call.Args.Append(nh)
|
||||
fn.Body.Append(ir.NewAssignStmt(base.Pos, nh, call))
|
||||
off += n
|
||||
case sigFloat32:
|
||||
hash(types.Types[types.TFLOAT32])
|
||||
case sigFloat64:
|
||||
hash(types.Types[types.TFLOAT64])
|
||||
case sigString:
|
||||
hash(types.Types[types.TSTRING])
|
||||
case sigEface:
|
||||
hash(types.NewInterface(nil))
|
||||
case sigIface:
|
||||
// arg kinda hacky. TODO: clean this up.
|
||||
hash(types.NewInterface([]*types.Field{types.NewField(pos, typecheck.Lookup("A"), types.Types[types.TBOOL])}))
|
||||
case sigSkip:
|
||||
var n int64
|
||||
n, sig = parseNum(sig)
|
||||
off += n
|
||||
case sigArrayStart:
|
||||
var n int64
|
||||
n, sig = parseNum(sig)
|
||||
// Find matching closing brace.
|
||||
i := 0
|
||||
depth := 1
|
||||
findEndSquareBracket:
|
||||
for {
|
||||
if i == len(sig) {
|
||||
base.Fatalf("mismatched brackets in %s", sig0)
|
||||
}
|
||||
switch sig[i] {
|
||||
case sigArrayStart:
|
||||
depth++
|
||||
case sigArrayEnd:
|
||||
depth--
|
||||
if depth == 0 {
|
||||
break findEndSquareBracket
|
||||
}
|
||||
}
|
||||
i++
|
||||
continue
|
||||
}
|
||||
elemSig := sig[:i]
|
||||
elemSize := sigSize(elemSig)
|
||||
sig = sig[i+1:] // remaining signature after array
|
||||
|
||||
// Otherwise, hash a maximal length run of raw memory.
|
||||
size, next := compare.Memrun(t, i)
|
||||
// Loop N times, calling hash function for the element.
|
||||
// for i := off; i < off + N*elemSize; i += elemSize {
|
||||
// h = elemfn(p+i, h)
|
||||
// }
|
||||
elemFn := hashFunc(elemSig).Nname
|
||||
idx := typecheck.TempAt(pos, ir.CurFunc, types.Types[types.TUINTPTR])
|
||||
init := ir.NewAssignStmt(pos, idx, ir.NewInt(pos, off))
|
||||
cond := ir.NewBinaryExpr(pos, ir.OLT, idx, ir.NewInt(pos, off+n*elemSize))
|
||||
post := ir.NewAssignStmt(pos, idx, ir.NewBinaryExpr(pos, ir.OADD, idx, ir.NewInt(pos, elemSize)))
|
||||
|
||||
// h = hashel(&p.first, size, h)
|
||||
hashel := hashmem(f.Type)
|
||||
call := ir.NewCallExpr(base.Pos, ir.OCALL, hashel, nil)
|
||||
na := typecheck.NodAddr(typecheck.DotField(base.Pos, np, i))
|
||||
call.Args.Append(na)
|
||||
call.Args.Append(nh)
|
||||
call.Args.Append(ir.NewInt(base.Pos, size))
|
||||
fn.Body.Append(ir.NewAssignStmt(base.Pos, nh, call))
|
||||
|
||||
i = next
|
||||
p := ir.NewBinaryExpr(pos, ir.OUNSAFEADD, np, idx)
|
||||
call := typecheck.Call(pos, elemFn, []ir.Node{p, nh}, false)
|
||||
as := ir.NewAssignStmt(pos, nh, call)
|
||||
loop := ir.NewForStmt(pos, init, cond, post, []ir.Node{as}, false)
|
||||
fn.Body.Append(loop)
|
||||
off += n * elemSize
|
||||
}
|
||||
}
|
||||
|
||||
r := ir.NewReturnStmt(base.Pos, nil)
|
||||
r.Results.Append(nh)
|
||||
fn.Body.Append(r)
|
||||
fn.Body.Append(ir.NewReturnStmt(pos, []ir.Node{nh}))
|
||||
|
||||
if base.Flag.LowerR != 0 {
|
||||
ir.DumpList("genhash body", fn.Body)
|
||||
@@ -246,7 +271,6 @@ func hashFunc(t *types.Type) *ir.Func {
|
||||
})
|
||||
|
||||
fn.SetNilCheckDisabled(true)
|
||||
|
||||
return fn
|
||||
}
|
||||
|
||||
@@ -257,8 +281,9 @@ func runtimeHashFor(name string, t *types.Type) *ir.Name {
|
||||
// hashfor returns the function to compute the hash of a value of type t.
|
||||
func hashfor(t *types.Type) *ir.Name {
|
||||
switch types.AlgType(t) {
|
||||
case types.AMEM:
|
||||
base.Fatalf("hashfor with AMEM type")
|
||||
default:
|
||||
base.Fatalf("hashfor with bad type %v", t)
|
||||
return nil
|
||||
case types.AINTER:
|
||||
return runtimeHashFor("interhash", t)
|
||||
case types.ANILINTER:
|
||||
@@ -274,9 +299,6 @@ func hashfor(t *types.Type) *ir.Name {
|
||||
case types.ACPLX128:
|
||||
return runtimeHashFor("c128hash", t)
|
||||
}
|
||||
|
||||
fn := hashFunc(t)
|
||||
return fn.Nname
|
||||
}
|
||||
|
||||
// sysClosure returns a closure which will call the
|
||||
@@ -383,8 +405,6 @@ func geneqSig(sig string) *obj.LSym {
|
||||
return closure
|
||||
}
|
||||
|
||||
// TODO: generate hash function from signatures also?
|
||||
// They are slightly different, at least at the moment.
|
||||
func eqFunc(sig string) *ir.Func {
|
||||
sym := types.TypeSymLookup(".eq." + sig)
|
||||
if sym.Def != nil {
|
||||
|
||||
@@ -199,8 +199,6 @@ func nilinterhash(p unsafe.Pointer, h uintptr) uintptr {
|
||||
// is slower but more general and is used for hashing interface types
|
||||
// (called from interhash or nilinterhash, above) or for hashing in
|
||||
// maps generated by reflect.MapOf (reflect_typehash, below).
|
||||
// Note: this function must match the compiler generated
|
||||
// functions exactly. See issue 37716.
|
||||
//
|
||||
// typehash should be an internal detail,
|
||||
// but widely used packages access it using linkname.
|
||||
|
||||
Reference in New Issue
Block a user