1
2
3
4
5
6
7
8
9
10
11
12 package build
13
14 import (
15 "fmt"
16 "hash/fnv"
17 "io"
18 "reflect"
19 )
20
21 const (
22 blockSize = 64
23 blockOffset = 2
24 )
25
26 type trieHandle struct {
27 lookupStart uint16
28 valueStart uint16
29 }
30
31 type trie struct {
32 index []uint16
33 values []uint32
34 }
35
36
37 type trieNode struct {
38 index []*trieNode
39 value []uint32
40 b byte
41 refValue uint16
42 refIndex uint16
43 }
44
45 func newNode() *trieNode {
46 return &trieNode{
47 index: make([]*trieNode, 64),
48 value: make([]uint32, 128),
49 }
50 }
51
52 func (n *trieNode) isInternal() bool {
53 return n.value != nil
54 }
55
56 func (n *trieNode) insert(r rune, value uint32) {
57 const maskx = 0x3F
58 str := string(r)
59 if len(str) == 1 {
60 n.value[str[0]] = value
61 return
62 }
63 for i := 0; i < len(str)-1; i++ {
64 b := str[i] & maskx
65 if n.index == nil {
66 n.index = make([]*trieNode, blockSize)
67 }
68 nn := n.index[b]
69 if nn == nil {
70 nn = &trieNode{}
71 nn.b = b
72 n.index[b] = nn
73 }
74 n = nn
75 }
76 if n.value == nil {
77 n.value = make([]uint32, blockSize)
78 }
79 b := str[len(str)-1] & maskx
80 n.value[b] = value
81 }
82
83 type trieBuilder struct {
84 t *trie
85
86 roots []*trieHandle
87
88 lookupBlocks []*trieNode
89 valueBlocks []*trieNode
90
91 lookupBlockIdx map[uint32]*trieNode
92 valueBlockIdx map[uint32]*trieNode
93 }
94
95 func newTrieBuilder() *trieBuilder {
96 index := &trieBuilder{}
97 index.lookupBlocks = make([]*trieNode, 0)
98 index.valueBlocks = make([]*trieNode, 0)
99 index.lookupBlockIdx = make(map[uint32]*trieNode)
100 index.valueBlockIdx = make(map[uint32]*trieNode)
101
102
103 index.lookupBlocks = append(index.lookupBlocks, nil, nil, nil)
104 index.t = &trie{}
105 return index
106 }
107
108 func (b *trieBuilder) computeOffsets(n *trieNode) *trieNode {
109 hasher := fnv.New32()
110 if n.index != nil {
111 for i, nn := range n.index {
112 var vi, vv uint16
113 if nn != nil {
114 nn = b.computeOffsets(nn)
115 n.index[i] = nn
116 vi = nn.refIndex
117 vv = nn.refValue
118 }
119 hasher.Write([]byte{byte(vi >> 8), byte(vi)})
120 hasher.Write([]byte{byte(vv >> 8), byte(vv)})
121 }
122 h := hasher.Sum32()
123 nn, ok := b.lookupBlockIdx[h]
124 if !ok {
125 n.refIndex = uint16(len(b.lookupBlocks)) - blockOffset
126 b.lookupBlocks = append(b.lookupBlocks, n)
127 b.lookupBlockIdx[h] = n
128 } else {
129 n = nn
130 }
131 } else {
132 for _, v := range n.value {
133 hasher.Write([]byte{byte(v >> 24), byte(v >> 16), byte(v >> 8), byte(v)})
134 }
135 h := hasher.Sum32()
136 nn, ok := b.valueBlockIdx[h]
137 if !ok {
138 n.refValue = uint16(len(b.valueBlocks)) - blockOffset
139 n.refIndex = n.refValue
140 b.valueBlocks = append(b.valueBlocks, n)
141 b.valueBlockIdx[h] = n
142 } else {
143 n = nn
144 }
145 }
146 return n
147 }
148
149 func (b *trieBuilder) addStartValueBlock(n *trieNode) uint16 {
150 hasher := fnv.New32()
151 for _, v := range n.value[:2*blockSize] {
152 hasher.Write([]byte{byte(v >> 24), byte(v >> 16), byte(v >> 8), byte(v)})
153 }
154 h := hasher.Sum32()
155 nn, ok := b.valueBlockIdx[h]
156 if !ok {
157 n.refValue = uint16(len(b.valueBlocks))
158 n.refIndex = n.refValue
159 b.valueBlocks = append(b.valueBlocks, n)
160
161 b.valueBlocks = append(b.valueBlocks, nil)
162 b.valueBlockIdx[h] = n
163 } else {
164 n = nn
165 }
166 return n.refValue
167 }
168
169 func genValueBlock(t *trie, n *trieNode) {
170 if n != nil {
171 for _, v := range n.value {
172 t.values = append(t.values, v)
173 }
174 }
175 }
176
177 func genLookupBlock(t *trie, n *trieNode) {
178 for _, nn := range n.index {
179 v := uint16(0)
180 if nn != nil {
181 if n.index != nil {
182 v = nn.refIndex
183 } else {
184 v = nn.refValue
185 }
186 }
187 t.index = append(t.index, v)
188 }
189 }
190
191 func (b *trieBuilder) addTrie(n *trieNode) *trieHandle {
192 h := &trieHandle{}
193 b.roots = append(b.roots, h)
194 h.valueStart = b.addStartValueBlock(n)
195 if len(b.roots) == 1 {
196
197
198
199
200 b.valueBlocks = append(b.valueBlocks, nil)
201 }
202 n = b.computeOffsets(n)
203
204 h.lookupStart = n.refIndex - 1
205 return h
206 }
207
208
209 func (b *trieBuilder) generate() (t *trie, err error) {
210 t = b.t
211 if len(b.valueBlocks) >= 1<<16 {
212 return nil, fmt.Errorf("maximum number of value blocks exceeded (%d > %d)", len(b.valueBlocks), 1<<16)
213 }
214 if len(b.lookupBlocks) >= 1<<16 {
215 return nil, fmt.Errorf("maximum number of lookup blocks exceeded (%d > %d)", len(b.lookupBlocks), 1<<16)
216 }
217 genValueBlock(t, b.valueBlocks[0])
218 genValueBlock(t, &trieNode{value: make([]uint32, 64)})
219 for i := 2; i < len(b.valueBlocks); i++ {
220 genValueBlock(t, b.valueBlocks[i])
221 }
222 n := &trieNode{index: make([]*trieNode, 64)}
223 genLookupBlock(t, n)
224 genLookupBlock(t, n)
225 genLookupBlock(t, n)
226 for i := 3; i < len(b.lookupBlocks); i++ {
227 genLookupBlock(t, b.lookupBlocks[i])
228 }
229 return b.t, nil
230 }
231
232 func (t *trie) printArrays(w io.Writer, name string) (n, size int, err error) {
233 p := func(f string, a ...interface{}) {
234 nn, e := fmt.Fprintf(w, f, a...)
235 n += nn
236 if err == nil {
237 err = e
238 }
239 }
240 nv := len(t.values)
241 p("// %sValues: %d entries, %d bytes\n", name, nv, nv*4)
242 p("// Block 2 is the null block.\n")
243 p("var %sValues = [%d]uint32 {", name, nv)
244 var printnewline bool
245 for i, v := range t.values {
246 if i%blockSize == 0 {
247 p("\n\t// Block %#x, offset %#x", i/blockSize, i)
248 }
249 if i%4 == 0 {
250 printnewline = true
251 }
252 if v != 0 {
253 if printnewline {
254 p("\n\t")
255 printnewline = false
256 }
257 p("%#04x:%#08x, ", i, v)
258 }
259 }
260 p("\n}\n\n")
261 ni := len(t.index)
262 p("// %sLookup: %d entries, %d bytes\n", name, ni, ni*2)
263 p("// Block 0 is the null block.\n")
264 p("var %sLookup = [%d]uint16 {", name, ni)
265 printnewline = false
266 for i, v := range t.index {
267 if i%blockSize == 0 {
268 p("\n\t// Block %#x, offset %#x", i/blockSize, i)
269 }
270 if i%8 == 0 {
271 printnewline = true
272 }
273 if v != 0 {
274 if printnewline {
275 p("\n\t")
276 printnewline = false
277 }
278 p("%#03x:%#02x, ", i, v)
279 }
280 }
281 p("\n}\n\n")
282 return n, nv*4 + ni*2, err
283 }
284
285 func (t *trie) printStruct(w io.Writer, handle *trieHandle, name string) (n, sz int, err error) {
286 const msg = "trie{ %sLookup[%d:], %sValues[%d:], %sLookup[:], %sValues[:]}"
287 n, err = fmt.Fprintf(w, msg, name, handle.lookupStart*blockSize, name, handle.valueStart*blockSize, name, name)
288 sz += int(reflect.TypeOf(trie{}).Size())
289 return
290 }
291
View as plain text