1 // Copyright 2020 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package intern lets you make smaller comparable values by boxing 6 // a larger comparable value (such as a 16 byte string header) down 7 // into a globally unique 8 byte pointer. 8 // 9 // The globally unique pointers are garbage collected with weak 10 // references and finalizers. This package hides that. 11 package intern 12 13 import ( 14 "internal/godebug" 15 "runtime" 16 "sync" 17 "unsafe" 18 ) 19 20 // A Value pointer is the handle to an underlying comparable value. 21 // See func Get for how Value pointers may be used. 22 type Value struct { 23 _ [0]func() // prevent people from accidentally using value type as comparable 24 cmpVal any 25 // resurrected is guarded by mu (for all instances of Value). 26 // It is set true whenever v is synthesized from a uintptr. 27 resurrected bool 28 } 29 30 // Get returns the comparable value passed to the Get func 31 // that returned v. 32 func (v *Value) Get() any { return v.cmpVal } 33 34 // key is a key in our global value map. 35 // It contains type-specialized fields to avoid allocations 36 // when converting common types to empty interfaces. 37 type key struct { 38 s string 39 cmpVal any 40 // isString reports whether key contains a string. 41 // Without it, the zero value of key is ambiguous. 42 isString bool 43 } 44 45 // keyFor returns a key to use with cmpVal. 46 func keyFor(cmpVal any) key { 47 if s, ok := cmpVal.(string); ok { 48 return key{s: s, isString: true} 49 } 50 return key{cmpVal: cmpVal} 51 } 52 53 // Value returns a *Value built from k. 54 func (k key) Value() *Value { 55 if k.isString { 56 return &Value{cmpVal: k.s} 57 } 58 return &Value{cmpVal: k.cmpVal} 59 } 60 61 var ( 62 // mu guards valMap, a weakref map of *Value by underlying value. 63 // It also guards the resurrected field of all *Values. 64 mu sync.Mutex 65 valMap = map[key]uintptr{} // to uintptr(*Value) 66 valSafe = safeMap() // non-nil in safe+leaky mode 67 ) 68 69 var intern = godebug.New("#intern") 70 71 // safeMap returns a non-nil map if we're in safe-but-leaky mode, 72 // as controlled by GODEBUG=intern=leaky 73 func safeMap() map[key]*Value { 74 if intern.Value() == "leaky" { 75 return map[key]*Value{} 76 } 77 return nil 78 } 79 80 // Get returns a pointer representing the comparable value cmpVal. 81 // 82 // The returned pointer will be the same for Get(v) and Get(v2) 83 // if and only if v == v2, and can be used as a map key. 84 func Get(cmpVal any) *Value { 85 return get(keyFor(cmpVal)) 86 } 87 88 // GetByString is identical to Get, except that it is specialized for strings. 89 // This avoids an allocation from putting a string into an interface{} 90 // to pass as an argument to Get. 91 func GetByString(s string) *Value { 92 return get(key{s: s, isString: true}) 93 } 94 95 // We play unsafe games that violate Go's rules (and assume a non-moving 96 // collector). So we quiet Go here. 97 // See the comment below Get for more implementation details. 98 // 99 //go:nocheckptr 100 func get(k key) *Value { 101 mu.Lock() 102 defer mu.Unlock() 103 104 var v *Value 105 if valSafe != nil { 106 v = valSafe[k] 107 } else if addr, ok := valMap[k]; ok { 108 v = (*Value)(unsafe.Pointer(addr)) 109 v.resurrected = true 110 } 111 if v != nil { 112 return v 113 } 114 v = k.Value() 115 if valSafe != nil { 116 valSafe[k] = v 117 } else { 118 // SetFinalizer before uintptr conversion (theoretical concern; 119 // see https://github.com/go4org/intern/issues/13) 120 runtime.SetFinalizer(v, finalize) 121 valMap[k] = uintptr(unsafe.Pointer(v)) 122 } 123 return v 124 } 125 126 func finalize(v *Value) { 127 mu.Lock() 128 defer mu.Unlock() 129 if v.resurrected { 130 // We lost the race. Somebody resurrected it while we 131 // were about to finalize it. Try again next round. 132 v.resurrected = false 133 runtime.SetFinalizer(v, finalize) 134 return 135 } 136 delete(valMap, keyFor(v.cmpVal)) 137 } 138 139 // Interning is simple if you don't require that unused values be 140 // garbage collectable. But we do require that; we don't want to be 141 // DOS vector. We do this by using a uintptr to hide the pointer from 142 // the garbage collector, and using a finalizer to eliminate the 143 // pointer when no other code is using it. 144 // 145 // The obvious implementation of this is to use a 146 // map[interface{}]uintptr-of-*interface{}, and set up a finalizer to 147 // delete from the map. Unfortunately, this is racy. Because pointers 148 // are being created in violation of Go's unsafety rules, it's 149 // possible to create a pointer to a value concurrently with the GC 150 // concluding that the value can be collected. There are other races 151 // that break the equality invariant as well, but the use-after-free 152 // will cause a runtime crash. 153 // 154 // To make this work, the finalizer needs to know that no references 155 // have been unsafely created since the finalizer was set up. To do 156 // this, values carry a "resurrected" sentinel, which gets set 157 // whenever a pointer is unsafely created. If the finalizer encounters 158 // the sentinel, it clears the sentinel and delays collection for one 159 // additional GC cycle, by re-installing itself as finalizer. This 160 // ensures that the unsafely created pointer is visible to the GC, and 161 // will correctly prevent collection. 162 // 163 // This technique does mean that interned values that get reused take 164 // at least 3 GC cycles to fully collect (1 to clear the sentinel, 1 165 // to clean up the unsafe map, 1 to be actually deleted). 166 // 167 // @ianlancetaylor commented in 168 // https://github.com/golang/go/issues/41303#issuecomment-717401656 169 // that it is possible to implement weak references in terms of 170 // finalizers without unsafe. Unfortunately, the approach he outlined 171 // does not work here, for two reasons. First, there is no way to 172 // construct a strong pointer out of a weak pointer; our map stores 173 // weak pointers, but we must return strong pointers to callers. 174 // Second, and more fundamentally, we must return not just _a_ strong 175 // pointer to callers, but _the same_ strong pointer to callers. In 176 // order to return _the same_ strong pointer to callers, we must track 177 // it, which is exactly what we cannot do with strong pointers. 178 // 179 // See https://github.com/inetaf/netaddr/issues/53 for more 180 // discussion, and https://github.com/go4org/intern/issues/2 for an 181 // illustration of the subtleties at play. 182