1
2
3
4
5 package build
6
7 import (
8 "fmt"
9 "io"
10 "log"
11 "sort"
12 "strings"
13 "unicode/utf8"
14
15 "golang.org/x/text/internal/colltab"
16 "golang.org/x/text/language"
17 "golang.org/x/text/unicode/norm"
18 )
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37 type Builder struct {
38 index *trieBuilder
39 root ordering
40 locale []*Tailoring
41 t *table
42 err error
43 built bool
44
45 minNonVar int
46 varTop int
47
48
49 expIndex map[string]int
50 ctHandle map[string]ctHandle
51 ctElem map[string]int
52 }
53
54
55
56
57
58
59 type Tailoring struct {
60 id string
61 builder *Builder
62 index *ordering
63
64 anchor *entry
65 before bool
66 }
67
68
69 func NewBuilder() *Builder {
70 return &Builder{
71 index: newTrieBuilder(),
72 root: makeRootOrdering(),
73 expIndex: make(map[string]int),
74 ctHandle: make(map[string]ctHandle),
75 ctElem: make(map[string]int),
76 }
77 }
78
79
80
81 func (b *Builder) Tailoring(loc language.Tag) *Tailoring {
82 t := &Tailoring{
83 id: loc.String(),
84 builder: b,
85 index: b.root.clone(),
86 }
87 t.index.id = t.id
88 b.locale = append(b.locale, t)
89 return t
90 }
91
92
93
94
95
96
97
98
99
100
101 func (b *Builder) Add(runes []rune, colelems [][]int, variables []int) error {
102 str := string(runes)
103 elems := make([]rawCE, len(colelems))
104 for i, ce := range colelems {
105 if len(ce) == 0 {
106 break
107 }
108 elems[i] = makeRawCE(ce, 0)
109 if len(ce) == 1 {
110 elems[i].w[1] = defaultSecondary
111 }
112 if len(ce) <= 2 {
113 elems[i].w[2] = defaultTertiary
114 }
115 if len(ce) <= 3 {
116 elems[i].w[3] = ce[0]
117 }
118 }
119 for i, ce := range elems {
120 p := ce.w[0]
121 isvar := false
122 for _, j := range variables {
123 if i == j {
124 isvar = true
125 }
126 }
127 if isvar {
128 if p >= b.minNonVar && b.minNonVar > 0 {
129 return fmt.Errorf("primary value %X of variable is larger than the smallest non-variable %X", p, b.minNonVar)
130 }
131 if p > b.varTop {
132 b.varTop = p
133 }
134 } else if p > 1 {
135 if p <= b.varTop {
136 return fmt.Errorf("primary value %X of non-variable is smaller than the highest variable %X", p, b.varTop)
137 }
138 if b.minNonVar == 0 || p < b.minNonVar {
139 b.minNonVar = p
140 }
141 }
142 }
143 elems, err := convertLargeWeights(elems)
144 if err != nil {
145 return err
146 }
147 cccs := []uint8{}
148 nfd := norm.NFD.String(str)
149 for i := range nfd {
150 cccs = append(cccs, norm.NFD.PropertiesString(nfd[i:]).CCC())
151 }
152 if len(cccs) < len(elems) {
153 if len(cccs) > 2 {
154 return fmt.Errorf("number of decomposed characters should be greater or equal to the number of collation elements for len(colelems) > 3 (%d < %d)", len(cccs), len(elems))
155 }
156 p := len(elems) - 1
157 for ; p > 0 && elems[p].w[0] == 0; p-- {
158 elems[p].ccc = cccs[len(cccs)-1]
159 }
160 for ; p >= 0; p-- {
161 elems[p].ccc = cccs[0]
162 }
163 } else {
164 for i := range elems {
165 elems[i].ccc = cccs[i]
166 }
167 }
168
169 if len(elems) > 1 && len(cccs) > 1 && cccs[0] != 0 && cccs[0] != cccs[len(cccs)-1] {
170 return fmt.Errorf("incompatible CCC values for expansion %X (%d)", runes, cccs)
171 }
172 b.root.newEntry(str, elems)
173 return nil
174 }
175
176 func (t *Tailoring) setAnchor(anchor string) error {
177 anchor = norm.NFC.String(anchor)
178 a := t.index.find(anchor)
179 if a == nil {
180 a = t.index.newEntry(anchor, nil)
181 a.implicit = true
182 a.modified = true
183 for _, r := range []rune(anchor) {
184 e := t.index.find(string(r))
185 e.lock = true
186 }
187 }
188 t.anchor = a
189 return nil
190 }
191
192
193
194
195
196
197
198 func (t *Tailoring) SetAnchor(anchor string) error {
199 if err := t.setAnchor(anchor); err != nil {
200 return err
201 }
202 t.before = false
203 return nil
204 }
205
206
207
208 func (t *Tailoring) SetAnchorBefore(anchor string) error {
209 if err := t.setAnchor(anchor); err != nil {
210 return err
211 }
212 t.before = true
213 return nil
214 }
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259 func (t *Tailoring) Insert(level colltab.Level, str, extend string) error {
260 if t.anchor == nil {
261 return fmt.Errorf("%s:Insert: no anchor point set for tailoring of %s", t.id, str)
262 }
263 str = norm.NFC.String(str)
264 e := t.index.find(str)
265 if e == nil {
266 e = t.index.newEntry(str, nil)
267 } else if e.logical != noAnchor {
268 return fmt.Errorf("%s:Insert: cannot reinsert logical reset position %q", t.id, e.str)
269 }
270 if e.lock {
271 return fmt.Errorf("%s:Insert: cannot reinsert element %q", t.id, e.str)
272 }
273 a := t.anchor
274
275
276
277 e.before = t.before
278 if t.before {
279 t.before = false
280 if a.prev == nil {
281 a.insertBefore(e)
282 } else {
283 for a = a.prev; a.level > level; a = a.prev {
284 }
285 a.insertAfter(e)
286 }
287 e.level = level
288 } else {
289 for ; a.level > level; a = a.next {
290 }
291 e.level = a.level
292 if a != e {
293 a.insertAfter(e)
294 a.level = level
295 } else {
296
297
298
299 a.prev.level = level
300 }
301 }
302 e.extend = norm.NFD.String(extend)
303 e.exclude = false
304 e.modified = true
305 e.elems = nil
306 t.anchor = e
307 return nil
308 }
309
310 func (o *ordering) getWeight(e *entry) []rawCE {
311 if len(e.elems) == 0 && e.logical == noAnchor {
312 if e.implicit {
313 for _, r := range e.runes {
314 e.elems = append(e.elems, o.getWeight(o.find(string(r)))...)
315 }
316 } else if e.before {
317 count := [colltab.Identity + 1]int{}
318 a := e
319 for ; a.elems == nil && !a.implicit; a = a.next {
320 count[a.level]++
321 }
322 e.elems = []rawCE{makeRawCE(a.elems[0].w, a.elems[0].ccc)}
323 for i := colltab.Primary; i < colltab.Quaternary; i++ {
324 if count[i] != 0 {
325 e.elems[0].w[i] -= count[i]
326 break
327 }
328 }
329 if e.prev != nil {
330 o.verifyWeights(e.prev, e, e.prev.level)
331 }
332 } else {
333 prev := e.prev
334 e.elems = nextWeight(prev.level, o.getWeight(prev))
335 o.verifyWeights(e, e.next, e.level)
336 }
337 }
338 return e.elems
339 }
340
341 func (o *ordering) addExtension(e *entry) {
342 if ex := o.find(e.extend); ex != nil {
343 e.elems = append(e.elems, ex.elems...)
344 } else {
345 for _, r := range []rune(e.extend) {
346 e.elems = append(e.elems, o.find(string(r)).elems...)
347 }
348 }
349 e.extend = ""
350 }
351
352 func (o *ordering) verifyWeights(a, b *entry, level colltab.Level) error {
353 if level == colltab.Identity || b == nil || b.elems == nil || a.elems == nil {
354 return nil
355 }
356 for i := colltab.Primary; i < level; i++ {
357 if a.elems[0].w[i] < b.elems[0].w[i] {
358 return nil
359 }
360 }
361 if a.elems[0].w[level] >= b.elems[0].w[level] {
362 err := fmt.Errorf("%s:overflow: collation elements of %q (%X) overflows those of %q (%X) at level %d (%X >= %X)", o.id, a.str, a.runes, b.str, b.runes, level, a.elems, b.elems)
363 log.Println(err)
364
365 }
366 return nil
367 }
368
369 func (b *Builder) error(e error) {
370 if e != nil {
371 b.err = e
372 }
373 }
374
375 func (b *Builder) errorID(locale string, e error) {
376 if e != nil {
377 b.err = fmt.Errorf("%s:%v", locale, e)
378 }
379 }
380
381
382 func (o *ordering) patchNorm() {
383
384 for _, e := range o.ordered {
385 nfd := norm.NFD.String(e.str)
386 if nfd != e.str {
387 if e0 := o.find(nfd); e0 != nil && !e0.modified {
388 e0.elems = e.elems
389 } else if e.modified && !equalCEArrays(o.genColElems(nfd), e.elems) {
390 e := o.newEntry(nfd, e.elems)
391 e.modified = true
392 }
393 }
394 }
395
396 for _, e := range o.ordered {
397 nfd := norm.NFD.String(e.str)
398 if e.modified || nfd == e.str {
399 continue
400 }
401 if e0 := o.find(nfd); e0 != nil {
402 e.elems = e0.elems
403 } else {
404 e.elems = o.genColElems(nfd)
405 if norm.NFD.LastBoundary([]byte(nfd)) == 0 {
406 r := []rune(nfd)
407 head := string(r[0])
408 tail := ""
409 for i := 1; i < len(r); i++ {
410 s := norm.NFC.String(head + string(r[i]))
411 if e0 := o.find(s); e0 != nil && e0.modified {
412 head = s
413 } else {
414 tail += string(r[i])
415 }
416 }
417 e.elems = append(o.genColElems(head), o.genColElems(tail)...)
418 }
419 }
420 }
421
422 for _, e := range o.ordered {
423 if len(e.runes) > 1 && equalCEArrays(o.genColElems(e.str), e.elems) {
424 e.exclude = true
425 }
426 }
427 }
428
429 func (b *Builder) buildOrdering(o *ordering) {
430 for _, e := range o.ordered {
431 o.getWeight(e)
432 }
433 for _, e := range o.ordered {
434 o.addExtension(e)
435 }
436 o.patchNorm()
437 o.sort()
438 simplify(o)
439 b.processExpansions(o)
440 b.processContractions(o)
441
442 t := newNode()
443 for e := o.front(); e != nil; e, _ = e.nextIndexed() {
444 if !e.skip() {
445 ce, err := e.encode()
446 b.errorID(o.id, err)
447 t.insert(e.runes[0], ce)
448 }
449 }
450 o.handle = b.index.addTrie(t)
451 }
452
453 func (b *Builder) build() (*table, error) {
454 if b.built {
455 return b.t, b.err
456 }
457 b.built = true
458 b.t = &table{
459 Table: colltab.Table{
460 MaxContractLen: utf8.UTFMax,
461 VariableTop: uint32(b.varTop),
462 },
463 }
464
465 b.buildOrdering(&b.root)
466 b.t.root = b.root.handle
467 for _, t := range b.locale {
468 b.buildOrdering(t.index)
469 if b.err != nil {
470 break
471 }
472 }
473 i, err := b.index.generate()
474 b.t.trie = *i
475 b.t.Index = colltab.Trie{
476 Index: i.index,
477 Values: i.values,
478 Index0: i.index[blockSize*b.t.root.lookupStart:],
479 Values0: i.values[blockSize*b.t.root.valueStart:],
480 }
481 b.error(err)
482 return b.t, b.err
483 }
484
485
486 func (b *Builder) Build() (colltab.Weighter, error) {
487 table, err := b.build()
488 if err != nil {
489 return nil, err
490 }
491 return table, nil
492 }
493
494
495 func (t *Tailoring) Build() (colltab.Weighter, error) {
496
497 return nil, nil
498 }
499
500
501
502 func (b *Builder) Print(w io.Writer) (n int, err error) {
503 p := func(nn int, e error) {
504 n += nn
505 if err == nil {
506 err = e
507 }
508 }
509 t, err := b.build()
510 if err != nil {
511 return 0, err
512 }
513 p(fmt.Fprintf(w, `var availableLocales = "und`))
514 for _, loc := range b.locale {
515 if loc.id != "und" {
516 p(fmt.Fprintf(w, ",%s", loc.id))
517 }
518 }
519 p(fmt.Fprint(w, "\"\n\n"))
520 p(fmt.Fprintf(w, "const varTop = 0x%x\n\n", b.varTop))
521 p(fmt.Fprintln(w, "var locales = [...]tableIndex{"))
522 for _, loc := range b.locale {
523 if loc.id == "und" {
524 p(t.fprintIndex(w, loc.index.handle, loc.id))
525 }
526 }
527 for _, loc := range b.locale {
528 if loc.id != "und" {
529 p(t.fprintIndex(w, loc.index.handle, loc.id))
530 }
531 }
532 p(fmt.Fprint(w, "}\n\n"))
533 n, _, err = t.fprint(w, "main")
534 return
535 }
536
537
538
539 func reproducibleFromNFKD(e *entry, exp, nfkd []rawCE) bool {
540
541 if len(exp) != len(nfkd) {
542 return false
543 }
544 for i, ce := range exp {
545
546 if ce.w[0] != nfkd[i].w[0] || ce.w[1] != nfkd[i].w[1] {
547 return false
548 }
549
550
551
552 if i >= 2 && ce.w[2] != maxTertiary {
553 return false
554 }
555 if _, err := makeCE(ce); err != nil {
556
557 return false
558 }
559 }
560 return true
561 }
562
563 func simplify(o *ordering) {
564
565
566 keep := make(map[rune]bool)
567 for e := o.front(); e != nil; e, _ = e.nextIndexed() {
568 if len(e.runes) > 1 {
569 keep[e.runes[0]] = true
570 }
571 }
572
573 for e := o.front(); e != nil; e, _ = e.nextIndexed() {
574 s := e.str
575 nfkd := norm.NFKD.String(s)
576 nfd := norm.NFD.String(s)
577 if e.decompose || len(e.runes) > 1 || len(e.elems) == 1 || keep[e.runes[0]] || nfkd == nfd {
578 continue
579 }
580 if reproducibleFromNFKD(e, e.elems, o.genColElems(nfkd)) {
581 e.decompose = true
582 }
583 }
584 }
585
586
587
588
589 func (b *Builder) appendExpansion(e *entry) int {
590 t := b.t
591 i := len(t.ExpandElem)
592 ce := uint32(len(e.elems))
593 t.ExpandElem = append(t.ExpandElem, ce)
594 for _, w := range e.elems {
595 ce, err := makeCE(w)
596 if err != nil {
597 b.error(err)
598 return -1
599 }
600 t.ExpandElem = append(t.ExpandElem, ce)
601 }
602 return i
603 }
604
605
606
607 func (b *Builder) processExpansions(o *ordering) {
608 for e := o.front(); e != nil; e, _ = e.nextIndexed() {
609 if !e.expansion() {
610 continue
611 }
612 key := fmt.Sprintf("%v", e.elems)
613 i, ok := b.expIndex[key]
614 if !ok {
615 i = b.appendExpansion(e)
616 b.expIndex[key] = i
617 }
618 e.expansionIndex = i
619 }
620 }
621
622 func (b *Builder) processContractions(o *ordering) {
623
624 starters := []rune{}
625 cm := make(map[rune][]*entry)
626 for e := o.front(); e != nil; e, _ = e.nextIndexed() {
627 if e.contraction() {
628 if len(e.str) > b.t.MaxContractLen {
629 b.t.MaxContractLen = len(e.str)
630 }
631 r := e.runes[0]
632 if _, ok := cm[r]; !ok {
633 starters = append(starters, r)
634 }
635 cm[r] = append(cm[r], e)
636 }
637 }
638
639 for e := o.front(); e != nil; e, _ = e.nextIndexed() {
640 if !e.contraction() {
641 r := e.runes[0]
642 if _, ok := cm[r]; ok {
643 cm[r] = append(cm[r], e)
644 }
645 }
646 }
647
648 t := b.t
649 for _, r := range starters {
650 l := cm[r]
651
652
653
654 sufx := []string{}
655 hasSingle := false
656 for _, e := range l {
657 if len(e.runes) > 1 {
658 sufx = append(sufx, string(e.runes[1:]))
659 } else {
660 hasSingle = true
661 }
662 }
663 if !hasSingle {
664 b.error(fmt.Errorf("no single entry for starter rune %U found", r))
665 continue
666 }
667
668 sort.Strings(sufx)
669 key := strings.Join(sufx, "\n")
670 handle, ok := b.ctHandle[key]
671 if !ok {
672 var err error
673 handle, err = appendTrie(&t.ContractTries, sufx)
674 if err != nil {
675 b.error(err)
676 }
677 b.ctHandle[key] = handle
678 }
679
680 es := make([]*entry, len(l))
681 for _, e := range l {
682 var p, sn int
683 if len(e.runes) > 1 {
684 str := []byte(string(e.runes[1:]))
685 p, sn = lookup(&t.ContractTries, handle, str)
686 if sn != len(str) {
687 log.Fatalf("%s: processContractions: unexpected length for '%X'; len=%d; want %d", o.id, e.runes, sn, len(str))
688 }
689 }
690 if es[p] != nil {
691 log.Fatalf("%s: multiple contractions for position %d for rune %U", o.id, p, e.runes[0])
692 }
693 es[p] = e
694 }
695
696 elems := []uint32{}
697 for _, e := range es {
698 ce, err := e.encodeBase()
699 b.errorID(o.id, err)
700 elems = append(elems, ce)
701 }
702 key = fmt.Sprintf("%v", elems)
703 i, ok := b.ctElem[key]
704 if !ok {
705 i = len(t.ContractElem)
706 b.ctElem[key] = i
707 t.ContractElem = append(t.ContractElem, elems...)
708 }
709
710 es[0].contractionIndex = i
711 es[0].contractionHandle = handle
712 }
713 }
714
View as plain text