1
2
3
4
5 package build
6
7 import (
8 "fmt"
9 "log"
10 "sort"
11 "strings"
12 "unicode"
13
14 "golang.org/x/text/internal/colltab"
15 "golang.org/x/text/unicode/norm"
16 )
17
18 type logicalAnchor int
19
20 const (
21 firstAnchor logicalAnchor = -1
22 noAnchor = 0
23 lastAnchor = 1
24 )
25
26
27
28
29
30 type entry struct {
31 str string
32 runes []rune
33 elems []rawCE
34 extend string
35 before bool
36 lock bool
37
38
39 prev, next *entry
40 level colltab.Level
41 skipRemove bool
42
43 decompose bool
44 exclude bool
45 implicit bool
46 modified bool
47 logical logicalAnchor
48
49 expansionIndex int
50 contractionHandle ctHandle
51 contractionIndex int
52 }
53
54 func (e *entry) String() string {
55 return fmt.Sprintf("%X (%q) -> %X (ch:%x; ci:%d, ei:%d)",
56 e.runes, e.str, e.elems, e.contractionHandle, e.contractionIndex, e.expansionIndex)
57 }
58
59 func (e *entry) skip() bool {
60 return e.contraction()
61 }
62
63 func (e *entry) expansion() bool {
64 return !e.decompose && len(e.elems) > 1
65 }
66
67 func (e *entry) contraction() bool {
68 return len(e.runes) > 1
69 }
70
71 func (e *entry) contractionStarter() bool {
72 return e.contractionHandle.n != 0
73 }
74
75
76
77
78
79
80 func (e *entry) nextIndexed() (*entry, colltab.Level) {
81 level := e.level
82 for e = e.next; e != nil && (e.exclude || len(e.elems) == 0); e = e.next {
83 if e.level < level {
84 level = e.level
85 }
86 }
87 return e, level
88 }
89
90
91
92
93
94 func (e *entry) remove() {
95 if e.logical != noAnchor {
96 log.Fatalf("may not remove anchor %q", e.str)
97 }
98
99 e.elems = nil
100 if !e.skipRemove {
101 if e.prev != nil {
102 e.prev.next = e.next
103 }
104 if e.next != nil {
105 e.next.prev = e.prev
106 }
107 }
108 e.skipRemove = false
109 }
110
111
112 func (e *entry) insertAfter(n *entry) {
113 if e == n {
114 panic("e == anchor")
115 }
116 if e == nil {
117 panic("unexpected nil anchor")
118 }
119 n.remove()
120 n.decompose = false
121
122 n.next = e.next
123 n.prev = e
124 if e.next != nil {
125 e.next.prev = n
126 }
127 e.next = n
128 }
129
130
131 func (e *entry) insertBefore(n *entry) {
132 if e == n {
133 panic("e == anchor")
134 }
135 if e == nil {
136 panic("unexpected nil anchor")
137 }
138 n.remove()
139 n.decompose = false
140
141 n.prev = e.prev
142 n.next = e
143 if e.prev != nil {
144 e.prev.next = n
145 }
146 e.prev = n
147 }
148
149 func (e *entry) encodeBase() (ce uint32, err error) {
150 switch {
151 case e.expansion():
152 ce, err = makeExpandIndex(e.expansionIndex)
153 default:
154 if e.decompose {
155 log.Fatal("decompose should be handled elsewhere")
156 }
157 ce, err = makeCE(e.elems[0])
158 }
159 return
160 }
161
162 func (e *entry) encode() (ce uint32, err error) {
163 if e.skip() {
164 log.Fatal("cannot build colElem for entry that should be skipped")
165 }
166 switch {
167 case e.decompose:
168 t1 := e.elems[0].w[2]
169 t2 := 0
170 if len(e.elems) > 1 {
171 t2 = e.elems[1].w[2]
172 }
173 ce, err = makeDecompose(t1, t2)
174 case e.contractionStarter():
175 ce, err = makeContractIndex(e.contractionHandle, e.contractionIndex)
176 default:
177 if len(e.runes) > 1 {
178 log.Fatal("colElem: contractions are handled in contraction trie")
179 }
180 ce, err = e.encodeBase()
181 }
182 return
183 }
184
185
186 func entryLess(a, b *entry) bool {
187 if res, _ := compareWeights(a.elems, b.elems); res != 0 {
188 return res == -1
189 }
190 if a.logical != noAnchor {
191 return a.logical == firstAnchor
192 }
193 if b.logical != noAnchor {
194 return b.logical == lastAnchor
195 }
196 return a.str < b.str
197 }
198
199 type sortedEntries []*entry
200
201 func (s sortedEntries) Len() int {
202 return len(s)
203 }
204
205 func (s sortedEntries) Swap(i, j int) {
206 s[i], s[j] = s[j], s[i]
207 }
208
209 func (s sortedEntries) Less(i, j int) bool {
210 return entryLess(s[i], s[j])
211 }
212
213 type ordering struct {
214 id string
215 entryMap map[string]*entry
216 ordered []*entry
217 handle *trieHandle
218 }
219
220
221
222
223 func (o *ordering) insert(e *entry) {
224 if e.logical == noAnchor {
225 o.entryMap[e.str] = e
226 } else {
227
228 o.entryMap[fmt.Sprintf("[%s]", e.str)] = e
229
230 o.entryMap[fmt.Sprintf("<%s/>", strings.Replace(e.str, " ", "_", -1))] = e
231 }
232 o.ordered = append(o.ordered, e)
233 }
234
235
236
237 func (o *ordering) newEntry(s string, ces []rawCE) *entry {
238 e := &entry{
239 runes: []rune(s),
240 elems: ces,
241 str: s,
242 }
243 o.insert(e)
244 return e
245 }
246
247
248
249
250 func (o *ordering) find(str string) *entry {
251 e := o.entryMap[str]
252 if e == nil {
253 r := []rune(str)
254 if len(r) == 1 {
255 const (
256 firstHangul = 0xAC00
257 lastHangul = 0xD7A3
258 )
259 if r[0] >= firstHangul && r[0] <= lastHangul {
260 ce := []rawCE{}
261 nfd := norm.NFD.String(str)
262 for _, r := range nfd {
263 ce = append(ce, o.find(string(r)).elems...)
264 }
265 e = o.newEntry(nfd, ce)
266 } else {
267 e = o.newEntry(string(r[0]), []rawCE{
268 {w: []int{
269 implicitPrimary(r[0]),
270 defaultSecondary,
271 defaultTertiary,
272 int(r[0]),
273 },
274 },
275 })
276 e.modified = true
277 }
278 e.exclude = true
279 }
280 }
281 return e
282 }
283
284
285
286
287
288
289 func makeRootOrdering() ordering {
290 const max = unicode.MaxRune
291 o := ordering{
292 entryMap: make(map[string]*entry),
293 }
294 insert := func(typ logicalAnchor, s string, ce []int) {
295 e := &entry{
296 elems: []rawCE{{w: ce}},
297 str: s,
298 exclude: true,
299 logical: typ,
300 }
301 o.insert(e)
302 }
303 insert(firstAnchor, "first tertiary ignorable", []int{0, 0, 0, 0})
304 insert(lastAnchor, "last tertiary ignorable", []int{0, 0, 0, max})
305 insert(lastAnchor, "last primary ignorable", []int{0, defaultSecondary, defaultTertiary, max})
306 insert(lastAnchor, "last non ignorable", []int{maxPrimary, defaultSecondary, defaultTertiary, max})
307 insert(lastAnchor, "__END__", []int{1 << maxPrimaryBits, defaultSecondary, defaultTertiary, max})
308 return o
309 }
310
311
312
313
314
315 func (o *ordering) patchForInsert() {
316 for i := 0; i < len(o.ordered)-1; {
317 e := o.ordered[i]
318 lev := e.level
319 n := e.next
320 for ; n != nil && len(n.elems) > 1; n = n.next {
321 if n.level < lev {
322 lev = n.level
323 }
324 n.skipRemove = true
325 }
326 for ; o.ordered[i] != n; i++ {
327 o.ordered[i].level = lev
328 o.ordered[i].next = n
329 o.ordered[i+1].prev = e
330 }
331 }
332 }
333
334
335 func (o *ordering) clone() *ordering {
336 o.sort()
337 oo := ordering{
338 entryMap: make(map[string]*entry),
339 }
340 for _, e := range o.ordered {
341 ne := &entry{
342 runes: e.runes,
343 elems: e.elems,
344 str: e.str,
345 decompose: e.decompose,
346 exclude: e.exclude,
347 logical: e.logical,
348 }
349 oo.insert(ne)
350 }
351 oo.sort()
352 oo.patchForInsert()
353 return &oo
354 }
355
356
357
358 func (o *ordering) front() *entry {
359 e := o.ordered[0]
360 if e.prev != nil {
361 log.Panicf("unexpected first entry: %v", e)
362 }
363
364 e, _ = e.nextIndexed()
365 return e
366 }
367
368
369
370 func (o *ordering) sort() {
371 sort.Sort(sortedEntries(o.ordered))
372 l := o.ordered
373 for i := 1; i < len(l); i++ {
374 k := i - 1
375 l[k].next = l[i]
376 _, l[k].level = compareWeights(l[k].elems, l[i].elems)
377 l[i].prev = l[k]
378 }
379 }
380
381
382
383 func (o *ordering) genColElems(str string) []rawCE {
384 elems := []rawCE{}
385 for _, r := range []rune(str) {
386 for _, ce := range o.find(string(r)).elems {
387 if ce.w[0] != 0 || ce.w[1] != 0 || ce.w[2] != 0 {
388 elems = append(elems, ce)
389 }
390 }
391 }
392 return elems
393 }
394
View as plain text