1
2
3
4
5
6
7
8
9
10
11 package main
12
13 import (
14 "bytes"
15 "encoding/binary"
16 "flag"
17 "fmt"
18 "io"
19 "log"
20 "sort"
21 "strconv"
22 "strings"
23
24 "golang.org/x/text/internal/gen"
25 "golang.org/x/text/internal/triegen"
26 "golang.org/x/text/internal/ucd"
27 )
28
29 func main() {
30 gen.Init()
31 loadUnicodeData()
32 compactCCC()
33 loadCompositionExclusions()
34 completeCharFields(FCanonical)
35 completeCharFields(FCompatibility)
36 computeNonStarterCounts()
37 verifyComputed()
38 printChars()
39 testDerived()
40 printTestdata()
41 makeTables()
42 }
43
44 var (
45 tablelist = flag.String("tables",
46 "all",
47 "comma-separated list of which tables to generate; "+
48 "can be 'decomp', 'recomp', 'info' and 'all'")
49 test = flag.Bool("test",
50 false,
51 "test existing tables against DerivedNormalizationProps and generate test data for regression testing")
52 verbose = flag.Bool("verbose",
53 false,
54 "write data to stdout as it is parsed")
55 )
56
57 const MaxChar = 0x10FFFF
58
59
60
61
62
63
64 type QCResult int
65
66 const (
67 QCUnknown QCResult = iota
68 QCYes
69 QCNo
70 QCMaybe
71 )
72
73 func (r QCResult) String() string {
74 switch r {
75 case QCYes:
76 return "Yes"
77 case QCNo:
78 return "No"
79 case QCMaybe:
80 return "Maybe"
81 }
82 return "***UNKNOWN***"
83 }
84
85 const (
86 FCanonical = iota
87 FCompatibility
88 FNumberOfFormTypes
89 )
90
91 const (
92 MComposed = iota
93 MDecomposed
94 MNumberOfModes
95 )
96
97
98 type Char struct {
99 name string
100 codePoint rune
101 ccc uint8
102 origCCC uint8
103 excludeInComp bool
104 compatDecomp bool
105
106 nTrailingNonStarters uint8
107 nLeadingNonStarters uint8
108
109 forms [FNumberOfFormTypes]FormInfo
110
111 state State
112 }
113
114 var chars = make([]Char, MaxChar+1)
115 var cccMap = make(map[uint8]uint8)
116
117 func (c Char) String() string {
118 buf := new(bytes.Buffer)
119
120 fmt.Fprintf(buf, "%U [%s]:\n", c.codePoint, c.name)
121 fmt.Fprintf(buf, " ccc: %v\n", c.ccc)
122 fmt.Fprintf(buf, " excludeInComp: %v\n", c.excludeInComp)
123 fmt.Fprintf(buf, " compatDecomp: %v\n", c.compatDecomp)
124 fmt.Fprintf(buf, " state: %v\n", c.state)
125 fmt.Fprintf(buf, " NFC:\n")
126 fmt.Fprint(buf, c.forms[FCanonical])
127 fmt.Fprintf(buf, " NFKC:\n")
128 fmt.Fprint(buf, c.forms[FCompatibility])
129
130 return buf.String()
131 }
132
133
134
135
136
137
138
139 type State int
140
141 const (
142 SNormal State = iota
143 SFirst
144 SLast
145 SMissing
146 )
147
148 var lastChar = rune('\u0000')
149
150 func (c Char) isValid() bool {
151 return c.codePoint != 0 && c.state != SMissing
152 }
153
154 type FormInfo struct {
155 quickCheck [MNumberOfModes]QCResult
156 verified [MNumberOfModes]bool
157
158 combinesForward bool
159 combinesBackward bool
160 isOneWay bool
161 inDecomp bool
162 decomp Decomposition
163 expandedDecomp Decomposition
164 }
165
166 func (f FormInfo) String() string {
167 buf := bytes.NewBuffer(make([]byte, 0))
168
169 fmt.Fprintf(buf, " quickCheck[C]: %v\n", f.quickCheck[MComposed])
170 fmt.Fprintf(buf, " quickCheck[D]: %v\n", f.quickCheck[MDecomposed])
171 fmt.Fprintf(buf, " cmbForward: %v\n", f.combinesForward)
172 fmt.Fprintf(buf, " cmbBackward: %v\n", f.combinesBackward)
173 fmt.Fprintf(buf, " isOneWay: %v\n", f.isOneWay)
174 fmt.Fprintf(buf, " inDecomp: %v\n", f.inDecomp)
175 fmt.Fprintf(buf, " decomposition: %X\n", f.decomp)
176 fmt.Fprintf(buf, " expandedDecomp: %X\n", f.expandedDecomp)
177
178 return buf.String()
179 }
180
181 type Decomposition []rune
182
183 func parseDecomposition(s string, skipfirst bool) (a []rune, err error) {
184 decomp := strings.Split(s, " ")
185 if len(decomp) > 0 && skipfirst {
186 decomp = decomp[1:]
187 }
188 for _, d := range decomp {
189 point, err := strconv.ParseUint(d, 16, 64)
190 if err != nil {
191 return a, err
192 }
193 a = append(a, rune(point))
194 }
195 return a, nil
196 }
197
198 func loadUnicodeData() {
199 f := gen.OpenUCDFile("UnicodeData.txt")
200 defer f.Close()
201 p := ucd.New(f)
202 for p.Next() {
203 r := p.Rune(ucd.CodePoint)
204 char := &chars[r]
205
206 char.ccc = uint8(p.Uint(ucd.CanonicalCombiningClass))
207 decmap := p.String(ucd.DecompMapping)
208
209 exp, err := parseDecomposition(decmap, false)
210 isCompat := false
211 if err != nil {
212 if len(decmap) > 0 {
213 exp, err = parseDecomposition(decmap, true)
214 if err != nil {
215 log.Fatalf(`%U: bad decomp |%v|: "%s"`, r, decmap, err)
216 }
217 isCompat = true
218 }
219 }
220
221 char.name = p.String(ucd.Name)
222 char.codePoint = r
223 char.forms[FCompatibility].decomp = exp
224 if !isCompat {
225 char.forms[FCanonical].decomp = exp
226 } else {
227 char.compatDecomp = true
228 }
229 if len(decmap) > 0 {
230 char.forms[FCompatibility].decomp = exp
231 }
232 }
233 if err := p.Err(); err != nil {
234 log.Fatal(err)
235 }
236 }
237
238
239
240 func compactCCC() {
241 m := make(map[uint8]uint8)
242 for i := range chars {
243 c := &chars[i]
244 m[c.ccc] = 0
245 }
246 cccs := []int{}
247 for v, _ := range m {
248 cccs = append(cccs, int(v))
249 }
250 sort.Ints(cccs)
251 for i, c := range cccs {
252 cccMap[uint8(i)] = uint8(c)
253 m[uint8(c)] = uint8(i)
254 }
255 for i := range chars {
256 c := &chars[i]
257 c.origCCC = c.ccc
258 c.ccc = m[c.ccc]
259 }
260 if len(m) >= 1<<6 {
261 log.Fatalf("too many difference CCC values: %d >= 64", len(m))
262 }
263 }
264
265
266
267
268 func loadCompositionExclusions() {
269 f := gen.OpenUCDFile("CompositionExclusions.txt")
270 defer f.Close()
271 p := ucd.New(f)
272 for p.Next() {
273 c := &chars[p.Rune(0)]
274 if c.excludeInComp {
275 log.Fatalf("%U: Duplicate entry in exclusions.", c.codePoint)
276 }
277 c.excludeInComp = true
278 }
279 if e := p.Err(); e != nil {
280 log.Fatal(e)
281 }
282 }
283
284
285
286
287 func hasCompatDecomp(r rune) bool {
288 c := &chars[r]
289 if c.compatDecomp {
290 return true
291 }
292 for _, d := range c.forms[FCompatibility].decomp {
293 if hasCompatDecomp(d) {
294 return true
295 }
296 }
297 return false
298 }
299
300
301 const (
302 HangulBase = 0xAC00
303 HangulEnd = 0xD7A4
304
305 JamoLBase = 0x1100
306 JamoLEnd = 0x1113
307 JamoVBase = 0x1161
308 JamoVEnd = 0x1176
309 JamoTBase = 0x11A8
310 JamoTEnd = 0x11C3
311
312 JamoLVTCount = 19 * 21 * 28
313 JamoTCount = 28
314 )
315
316 func isHangul(r rune) bool {
317 return HangulBase <= r && r < HangulEnd
318 }
319
320 func isHangulWithoutJamoT(r rune) bool {
321 if !isHangul(r) {
322 return false
323 }
324 r -= HangulBase
325 return r < JamoLVTCount && r%JamoTCount == 0
326 }
327
328 func ccc(r rune) uint8 {
329 return chars[r].ccc
330 }
331
332
333 func insertOrdered(b Decomposition, r rune) Decomposition {
334 n := len(b)
335 b = append(b, 0)
336 cc := ccc(r)
337 if cc > 0 {
338
339 for ; n > 0; n-- {
340 if ccc(b[n-1]) <= cc {
341 break
342 }
343 b[n] = b[n-1]
344 }
345 }
346 b[n] = r
347 return b
348 }
349
350
351 func decomposeRecursive(form int, r rune, d Decomposition) Decomposition {
352 dcomp := chars[r].forms[form].decomp
353 if len(dcomp) == 0 {
354 return insertOrdered(d, r)
355 }
356 for _, c := range dcomp {
357 d = decomposeRecursive(form, c, d)
358 }
359 return d
360 }
361
362 func completeCharFields(form int) {
363
364 for i := range chars {
365 f := &chars[i].forms[form]
366 if len(f.decomp) == 0 {
367 continue
368 }
369 exp := make(Decomposition, 0)
370 for _, c := range f.decomp {
371 exp = decomposeRecursive(form, c, exp)
372 }
373 f.expandedDecomp = exp
374 }
375
376
377 for i := range chars {
378 c := &chars[i]
379 f := &c.forms[form]
380
381
382 f.isOneWay = c.excludeInComp
383
384
385 f.isOneWay = f.isOneWay || len(f.decomp) == 1
386
387
388 if len(f.decomp) > 1 {
389 chk := c.ccc != 0 || chars[f.decomp[0]].ccc != 0
390 f.isOneWay = f.isOneWay || chk
391 }
392
393
394 f.isOneWay = f.isOneWay || len(f.decomp) > 2
395
396 if form == FCompatibility {
397 f.isOneWay = f.isOneWay || hasCompatDecomp(c.codePoint)
398 }
399
400 for _, r := range f.decomp {
401 chars[r].forms[form].inDecomp = true
402 }
403 }
404
405
406 for i := range chars {
407 c := &chars[i]
408 f := &c.forms[form]
409
410 if !f.isOneWay && len(f.decomp) == 2 {
411 f0 := &chars[f.decomp[0]].forms[form]
412 f1 := &chars[f.decomp[1]].forms[form]
413 if !f0.isOneWay {
414 f0.combinesForward = true
415 }
416 if !f1.isOneWay {
417 f1.combinesBackward = true
418 }
419 }
420 if isHangulWithoutJamoT(rune(i)) {
421 f.combinesForward = true
422 }
423 }
424
425
426 for i := range chars {
427 c := &chars[i]
428 f := &c.forms[form]
429
430 switch {
431 case len(f.decomp) > 0:
432 f.quickCheck[MDecomposed] = QCNo
433 case isHangul(rune(i)):
434 f.quickCheck[MDecomposed] = QCNo
435 default:
436 f.quickCheck[MDecomposed] = QCYes
437 }
438 switch {
439 case f.isOneWay:
440 f.quickCheck[MComposed] = QCNo
441 case (i & 0xffff00) == JamoLBase:
442 f.quickCheck[MComposed] = QCYes
443 if JamoLBase <= i && i < JamoLEnd {
444 f.combinesForward = true
445 }
446 if JamoVBase <= i && i < JamoVEnd {
447 f.quickCheck[MComposed] = QCMaybe
448 f.combinesBackward = true
449 f.combinesForward = true
450 }
451 if JamoTBase <= i && i < JamoTEnd {
452 f.quickCheck[MComposed] = QCMaybe
453 f.combinesBackward = true
454 }
455 case !f.combinesBackward:
456 f.quickCheck[MComposed] = QCYes
457 default:
458 f.quickCheck[MComposed] = QCMaybe
459 }
460 }
461 }
462
463 func computeNonStarterCounts() {
464
465 for i := range chars {
466 c := &chars[i]
467
468 runes := []rune{rune(i)}
469
470
471 if exp := c.forms[FCompatibility].expandedDecomp; len(exp) > 0 {
472 runes = exp
473 }
474
475
476 for _, r := range runes {
477 if cr := &chars[r]; cr.ccc == 0 && !cr.forms[FCompatibility].combinesBackward {
478 break
479 }
480 c.nLeadingNonStarters++
481 }
482 for i := len(runes) - 1; i >= 0; i-- {
483 if cr := &chars[runes[i]]; cr.ccc == 0 && !cr.forms[FCompatibility].combinesBackward {
484 break
485 }
486 c.nTrailingNonStarters++
487 }
488 if c.nTrailingNonStarters > 3 {
489 log.Fatalf("%U: Decomposition with more than 3 (%d) trailing modifiers (%U)", i, c.nTrailingNonStarters, runes)
490 }
491
492 if isHangul(rune(i)) {
493 c.nTrailingNonStarters = 2
494 if isHangulWithoutJamoT(rune(i)) {
495 c.nTrailingNonStarters = 1
496 }
497 }
498
499 if l, t := c.nLeadingNonStarters, c.nTrailingNonStarters; l > 0 && l != t {
500 log.Fatalf("%U: number of leading and trailing non-starters should be equal (%d vs %d)", i, l, t)
501 }
502 if t := c.nTrailingNonStarters; t > 3 {
503 log.Fatalf("%U: number of trailing non-starters is %d > 3", t)
504 }
505 }
506 }
507
508 func printBytes(w io.Writer, b []byte, name string) {
509 fmt.Fprintf(w, "// %s: %d bytes\n", name, len(b))
510 fmt.Fprintf(w, "var %s = [...]byte {", name)
511 for i, c := range b {
512 switch {
513 case i%64 == 0:
514 fmt.Fprintf(w, "\n// Bytes %x - %x\n", i, i+63)
515 case i%8 == 0:
516 fmt.Fprintf(w, "\n")
517 }
518 fmt.Fprintf(w, "0x%.2X, ", c)
519 }
520 fmt.Fprint(w, "\n}\n\n")
521 }
522
523
524 func makeEntry(f *FormInfo, c *Char) uint16 {
525 e := uint16(0)
526 if r := c.codePoint; HangulBase <= r && r < HangulEnd {
527 e |= 0x40
528 }
529 if f.combinesForward {
530 e |= 0x20
531 }
532 if f.quickCheck[MDecomposed] == QCNo {
533 e |= 0x4
534 }
535 switch f.quickCheck[MComposed] {
536 case QCYes:
537 case QCNo:
538 e |= 0x10
539 case QCMaybe:
540 e |= 0x18
541 default:
542 log.Fatalf("Illegal quickcheck value %v.", f.quickCheck[MComposed])
543 }
544 e |= uint16(c.nTrailingNonStarters)
545 return e
546 }
547
548
549
550 type decompSet [7]map[string]bool
551
552 const (
553 normalDecomp = iota
554 firstMulti
555 firstCCC
556 endMulti
557 firstLeadingCCC
558 firstCCCZeroExcept
559 firstStarterWithNLead
560 lastDecomp
561 )
562
563 var cname = []string{"firstMulti", "firstCCC", "endMulti", "firstLeadingCCC", "firstCCCZeroExcept", "firstStarterWithNLead", "lastDecomp"}
564
565 func makeDecompSet() decompSet {
566 m := decompSet{}
567 for i := range m {
568 m[i] = make(map[string]bool)
569 }
570 return m
571 }
572 func (m *decompSet) insert(key int, s string) {
573 m[key][s] = true
574 }
575
576 func printCharInfoTables(w io.Writer) int {
577 mkstr := func(r rune, f *FormInfo) (int, string) {
578 d := f.expandedDecomp
579 s := string([]rune(d))
580 if max := 1 << 6; len(s) >= max {
581 const msg = "%U: too many bytes in decomposition: %d >= %d"
582 log.Fatalf(msg, r, len(s), max)
583 }
584 head := uint8(len(s))
585 if f.quickCheck[MComposed] != QCYes {
586 head |= 0x40
587 }
588 if f.combinesForward {
589 head |= 0x80
590 }
591 s = string([]byte{head}) + s
592
593 lccc := ccc(d[0])
594 tccc := ccc(d[len(d)-1])
595 cc := ccc(r)
596 if cc != 0 && lccc == 0 && tccc == 0 {
597 log.Fatalf("%U: trailing and leading ccc are 0 for non-zero ccc %d", r, cc)
598 }
599 if tccc < lccc && lccc != 0 {
600 const msg = "%U: lccc (%d) must be <= tcc (%d)"
601 log.Fatalf(msg, r, lccc, tccc)
602 }
603 index := normalDecomp
604 nTrail := chars[r].nTrailingNonStarters
605 nLead := chars[r].nLeadingNonStarters
606 if tccc > 0 || lccc > 0 || nTrail > 0 {
607 tccc <<= 2
608 tccc |= nTrail
609 s += string([]byte{tccc})
610 index = endMulti
611 for _, r := range d[1:] {
612 if ccc(r) == 0 {
613 index = firstCCC
614 }
615 }
616 if lccc > 0 || nLead > 0 {
617 s += string([]byte{lccc})
618 if index == firstCCC {
619 log.Fatalf("%U: multi-segment decomposition not supported for decompositions with leading CCC != 0", r)
620 }
621 index = firstLeadingCCC
622 }
623 if cc != lccc {
624 if cc != 0 {
625 log.Fatalf("%U: for lccc != ccc, expected ccc to be 0; was %d", r, cc)
626 }
627 index = firstCCCZeroExcept
628 }
629 } else if len(d) > 1 {
630 index = firstMulti
631 }
632 return index, s
633 }
634
635 decompSet := makeDecompSet()
636 const nLeadStr = "\x00\x01"
637 decompSet.insert(firstStarterWithNLead, nLeadStr)
638
639
640
641 for _, c := range chars {
642 for _, f := range c.forms {
643 if len(f.expandedDecomp) == 0 {
644 continue
645 }
646 if f.combinesBackward {
647 log.Fatalf("%U: combinesBackward and decompose", c.codePoint)
648 }
649 index, s := mkstr(c.codePoint, &f)
650 decompSet.insert(index, s)
651 }
652 }
653
654 decompositions := bytes.NewBuffer(make([]byte, 0, 10000))
655 size := 0
656 positionMap := make(map[string]uint16)
657 decompositions.WriteString("\000")
658 fmt.Fprintln(w, "const (")
659 for i, m := range decompSet {
660 sa := []string{}
661 for s := range m {
662 sa = append(sa, s)
663 }
664 sort.Strings(sa)
665 for _, s := range sa {
666 p := decompositions.Len()
667 decompositions.WriteString(s)
668 positionMap[s] = uint16(p)
669 }
670 if cname[i] != "" {
671 fmt.Fprintf(w, "%s = 0x%X\n", cname[i], decompositions.Len())
672 }
673 }
674 fmt.Fprintln(w, "maxDecomp = 0x8000")
675 fmt.Fprintln(w, ")")
676 b := decompositions.Bytes()
677 printBytes(w, b, "decomps")
678 size += len(b)
679
680 varnames := []string{"nfc", "nfkc"}
681 for i := 0; i < FNumberOfFormTypes; i++ {
682 trie := triegen.NewTrie(varnames[i])
683
684 for r, c := range chars {
685 f := c.forms[i]
686 d := f.expandedDecomp
687 if len(d) != 0 {
688 _, key := mkstr(c.codePoint, &f)
689 trie.Insert(rune(r), uint64(positionMap[key]))
690 if c.ccc != ccc(d[0]) {
691
692 if ccc(d[0]) == 0 {
693 log.Fatalf("Expected leading CCC to be non-zero; ccc is %d", c.ccc)
694 }
695 }
696 } else if c.nLeadingNonStarters > 0 && len(f.expandedDecomp) == 0 && c.ccc == 0 && !f.combinesBackward {
697
698
699 trie.Insert(c.codePoint, uint64(positionMap[nLeadStr]))
700 } else if v := makeEntry(&f, &c)<<8 | uint16(c.ccc); v != 0 {
701 trie.Insert(c.codePoint, uint64(0x8000|v))
702 }
703 }
704 sz, err := trie.Gen(w, triegen.Compact(&normCompacter{name: varnames[i]}))
705 if err != nil {
706 log.Fatal(err)
707 }
708 size += sz
709 }
710 return size
711 }
712
713 func contains(sa []string, s string) bool {
714 for _, a := range sa {
715 if a == s {
716 return true
717 }
718 }
719 return false
720 }
721
722 func makeTables() {
723 w := &bytes.Buffer{}
724
725 size := 0
726 if *tablelist == "" {
727 return
728 }
729 list := strings.Split(*tablelist, ",")
730 if *tablelist == "all" {
731 list = []string{"recomp", "info"}
732 }
733
734
735 max := 0
736 for _, c := range chars {
737 if n := len(string(c.forms[FCompatibility].expandedDecomp)); n > max {
738 max = n
739 }
740 }
741 fmt.Fprintln(w, `import "sync"`)
742 fmt.Fprintln(w)
743
744 fmt.Fprintln(w, "const (")
745 fmt.Fprintln(w, "\t// Version is the Unicode edition from which the tables are derived.")
746 fmt.Fprintf(w, "\tVersion = %q\n", gen.UnicodeVersion())
747 fmt.Fprintln(w)
748 fmt.Fprintln(w, "\t// MaxTransformChunkSize indicates the maximum number of bytes that Transform")
749 fmt.Fprintln(w, "\t// may need to write atomically for any Form. Making a destination buffer at")
750 fmt.Fprintln(w, "\t// least this size ensures that Transform can always make progress and that")
751 fmt.Fprintln(w, "\t// the user does not need to grow the buffer on an ErrShortDst.")
752 fmt.Fprintf(w, "\tMaxTransformChunkSize = %d+maxNonStarters*4\n", len(string(0x034F))+max)
753 fmt.Fprintln(w, ")\n")
754
755
756 size += len(cccMap)
757 fmt.Fprintf(w, "var ccc = [%d]uint8{", len(cccMap))
758 for i := 0; i < len(cccMap); i++ {
759 if i%8 == 0 {
760 fmt.Fprintln(w)
761 }
762 fmt.Fprintf(w, "%3d, ", cccMap[uint8(i)])
763 }
764 fmt.Fprintln(w, "\n}\n")
765
766 if contains(list, "info") {
767 size += printCharInfoTables(w)
768 }
769
770 if contains(list, "recomp") {
771
772
773
774
775
776
777
778
779
780 nrentries := 0
781 for _, c := range chars {
782 f := c.forms[FCanonical]
783 if !f.isOneWay && len(f.decomp) > 0 {
784 nrentries++
785 }
786 }
787 sz := nrentries * 8
788 size += sz
789 fmt.Fprintf(w, "// recompMap: %d bytes (entries only)\n", sz)
790 fmt.Fprintln(w, "var recompMap map[uint32]rune")
791 fmt.Fprintln(w, "var recompMapOnce sync.Once\n")
792 fmt.Fprintln(w, `const recompMapPacked = "" +`)
793 var buf [8]byte
794 for i, c := range chars {
795 f := c.forms[FCanonical]
796 d := f.decomp
797 if !f.isOneWay && len(d) > 0 {
798 key := uint32(uint16(d[0]))<<16 + uint32(uint16(d[1]))
799 binary.BigEndian.PutUint32(buf[:4], key)
800 binary.BigEndian.PutUint32(buf[4:], uint32(i))
801 fmt.Fprintf(w, "\t\t%q + // 0x%.8X: 0x%.8X\n", string(buf[:]), key, uint32(i))
802 }
803 }
804
805 fmt.Fprintf(w, ` ""`)
806 fmt.Fprintln(w)
807 }
808
809 fmt.Fprintf(w, "// Total size of tables: %dKB (%d bytes)\n", (size+512)/1024, size)
810 gen.WriteVersionedGoFile("tables.go", "norm", w.Bytes())
811 }
812
813 func printChars() {
814 if *verbose {
815 for _, c := range chars {
816 if !c.isValid() || c.state == SMissing {
817 continue
818 }
819 fmt.Println(c)
820 }
821 }
822 }
823
824
825 func verifyComputed() {
826 for i, c := range chars {
827 for _, f := range c.forms {
828 isNo := (f.quickCheck[MDecomposed] == QCNo)
829 if (len(f.decomp) > 0) != isNo && !isHangul(rune(i)) {
830 log.Fatalf("%U: NF*D QC must be No if rune decomposes", i)
831 }
832
833 isMaybe := f.quickCheck[MComposed] == QCMaybe
834 if f.combinesBackward != isMaybe {
835 log.Fatalf("%U: NF*C QC must be Maybe if combinesBackward", i)
836 }
837 if len(f.decomp) > 0 && f.combinesForward && isMaybe {
838 log.Fatalf("%U: NF*C QC must be Yes or No if combinesForward and decomposes", i)
839 }
840
841 if len(f.expandedDecomp) != 0 {
842 continue
843 }
844 if a, b := c.nLeadingNonStarters > 0, (c.ccc > 0 || f.combinesBackward); a != b {
845
846
847
848
849
850
851
852
853
854 if i != 0xFF9E && i != 0xFF9F && !(0x3133 <= i && i <= 0x318E) && !(0xFFA3 <= i && i <= 0xFFDC) {
855 log.Fatalf("%U: nLead was %v; want %v", i, a, b)
856 }
857 }
858 }
859 nfc := c.forms[FCanonical]
860 nfkc := c.forms[FCompatibility]
861 if nfc.combinesBackward != nfkc.combinesBackward {
862 log.Fatalf("%U: Cannot combine combinesBackward\n", c.codePoint)
863 }
864 }
865 }
866
867
868
869
870
871
872
873 func testDerived() {
874 f := gen.OpenUCDFile("DerivedNormalizationProps.txt")
875 defer f.Close()
876 p := ucd.New(f)
877 for p.Next() {
878 r := p.Rune(0)
879 c := &chars[r]
880
881 var ftype, mode int
882 qt := p.String(1)
883 switch qt {
884 case "NFC_QC":
885 ftype, mode = FCanonical, MComposed
886 case "NFD_QC":
887 ftype, mode = FCanonical, MDecomposed
888 case "NFKC_QC":
889 ftype, mode = FCompatibility, MComposed
890 case "NFKD_QC":
891 ftype, mode = FCompatibility, MDecomposed
892 default:
893 continue
894 }
895 var qr QCResult
896 switch p.String(2) {
897 case "Y":
898 qr = QCYes
899 case "N":
900 qr = QCNo
901 case "M":
902 qr = QCMaybe
903 default:
904 log.Fatalf(`Unexpected quick check value "%s"`, p.String(2))
905 }
906 if got := c.forms[ftype].quickCheck[mode]; got != qr {
907 log.Printf("%U: FAILED %s (was %v need %v)\n", r, qt, got, qr)
908 }
909 c.forms[ftype].verified[mode] = true
910 }
911 if err := p.Err(); err != nil {
912 log.Fatal(err)
913 }
914
915 for i, c := range chars {
916 for j, fd := range c.forms {
917 for k, qr := range fd.quickCheck {
918 if !fd.verified[k] && qr != QCYes {
919 m := "%U: FAIL F:%d M:%d (was %v need Yes) %s\n"
920 log.Printf(m, i, j, k, qr, c.name)
921 }
922 }
923 }
924 }
925 }
926
927 var testHeader = `const (
928 Yes = iota
929 No
930 Maybe
931 )
932
933 type formData struct {
934 qc uint8
935 combinesForward bool
936 decomposition string
937 }
938
939 type runeData struct {
940 r rune
941 ccc uint8
942 nLead uint8
943 nTrail uint8
944 f [2]formData // 0: canonical; 1: compatibility
945 }
946
947 func f(qc uint8, cf bool, dec string) [2]formData {
948 return [2]formData{{qc, cf, dec}, {qc, cf, dec}}
949 }
950
951 func g(qc, qck uint8, cf, cfk bool, d, dk string) [2]formData {
952 return [2]formData{{qc, cf, d}, {qck, cfk, dk}}
953 }
954
955 var testData = []runeData{
956 `
957
958 func printTestdata() {
959 type lastInfo struct {
960 ccc uint8
961 nLead uint8
962 nTrail uint8
963 f string
964 }
965
966 last := lastInfo{}
967 w := &bytes.Buffer{}
968 fmt.Fprintf(w, testHeader)
969 for r, c := range chars {
970 f := c.forms[FCanonical]
971 qc, cf, d := f.quickCheck[MComposed], f.combinesForward, string(f.expandedDecomp)
972 f = c.forms[FCompatibility]
973 qck, cfk, dk := f.quickCheck[MComposed], f.combinesForward, string(f.expandedDecomp)
974 s := ""
975 if d == dk && qc == qck && cf == cfk {
976 s = fmt.Sprintf("f(%s, %v, %q)", qc, cf, d)
977 } else {
978 s = fmt.Sprintf("g(%s, %s, %v, %v, %q, %q)", qc, qck, cf, cfk, d, dk)
979 }
980 current := lastInfo{c.ccc, c.nLeadingNonStarters, c.nTrailingNonStarters, s}
981 if last != current {
982 fmt.Fprintf(w, "\t{0x%x, %d, %d, %d, %s},\n", r, c.origCCC, c.nLeadingNonStarters, c.nTrailingNonStarters, s)
983 last = current
984 }
985 }
986 fmt.Fprintln(w, "}")
987 gen.WriteVersionedGoFile("data_test.go", "norm", w.Bytes())
988 }
989
View as plain text