1
2
3
4
5 package collate
6
7 import (
8 "archive/zip"
9 "bufio"
10 "bytes"
11 "flag"
12 "io"
13 "log"
14 "path"
15 "regexp"
16 "strconv"
17 "strings"
18 "testing"
19 "unicode/utf8"
20
21 "golang.org/x/text/collate/build"
22 "golang.org/x/text/internal/gen"
23 "golang.org/x/text/language"
24 )
25
26 var long = flag.Bool("long", false,
27 "run time-consuming tests, such as tests that fetch data online")
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44 type Test struct {
45 name string
46 str [][]byte
47 comment []string
48 }
49
50 var versionRe = regexp.MustCompile(`# UCA Version: (.*)\n?$`)
51 var testRe = regexp.MustCompile(`^([\dA-F ]+);.*# (.*)\n?$`)
52
53 func TestCollation(t *testing.T) {
54 if !gen.IsLocal() && !*long {
55 t.Skip("skipping test to prevent downloading; to run use -long or use -local to specify a local source")
56 }
57 t.Skip("must first update to new file format to support test")
58 for _, test := range loadTestData() {
59 doTest(t, test)
60 }
61 }
62
63 func Error(e error) {
64 if e != nil {
65 log.Fatal(e)
66 }
67 }
68
69
70
71
72 func parseUCA(builder *build.Builder) {
73 r := gen.OpenUnicodeFile("UCA", "", "allkeys.txt")
74 defer r.Close()
75 input := bufio.NewReader(r)
76 colelem := regexp.MustCompile(`\[([.*])([0-9A-F.]+)\]`)
77 for i := 1; true; i++ {
78 l, prefix, err := input.ReadLine()
79 if err == io.EOF {
80 break
81 }
82 Error(err)
83 line := string(l)
84 if prefix {
85 log.Fatalf("%d: buffer overflow", i)
86 }
87 if len(line) == 0 || line[0] == '#' {
88 continue
89 }
90 if line[0] == '@' {
91 if strings.HasPrefix(line[1:], "version ") {
92 if v := strings.Split(line[1:], " ")[1]; v != gen.UnicodeVersion() {
93 log.Fatalf("incompatible version %s; want %s", v, gen.UnicodeVersion())
94 }
95 }
96 } else {
97
98 part := strings.Split(line, " ; ")
99 if len(part) != 2 {
100 log.Fatalf("%d: production rule without ';': %v", i, line)
101 }
102 lhs := []rune{}
103 for _, v := range strings.Split(part[0], " ") {
104 if v != "" {
105 lhs = append(lhs, rune(convHex(i, v)))
106 }
107 }
108 vars := []int{}
109 rhs := [][]int{}
110 for i, m := range colelem.FindAllStringSubmatch(part[1], -1) {
111 if m[1] == "*" {
112 vars = append(vars, i)
113 }
114 elem := []int{}
115 for _, h := range strings.Split(m[2], ".") {
116 elem = append(elem, convHex(i, h))
117 }
118 rhs = append(rhs, elem)
119 }
120 builder.Add(lhs, rhs, vars)
121 }
122 }
123 }
124
125 func convHex(line int, s string) int {
126 r, e := strconv.ParseInt(s, 16, 32)
127 if e != nil {
128 log.Fatalf("%d: %v", line, e)
129 }
130 return int(r)
131 }
132
133 func loadTestData() []Test {
134 f := gen.OpenUnicodeFile("UCA", "", "CollationTest.zip")
135 buffer, err := io.ReadAll(f)
136 f.Close()
137 Error(err)
138 archive, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
139 Error(err)
140 tests := []Test{}
141 for _, f := range archive.File {
142
143 if strings.Contains(f.Name, "SHORT") || f.FileInfo().IsDir() {
144 continue
145 }
146 ff, err := f.Open()
147 Error(err)
148 defer ff.Close()
149 scanner := bufio.NewScanner(ff)
150 test := Test{name: path.Base(f.Name)}
151 for scanner.Scan() {
152 line := scanner.Text()
153 if len(line) <= 1 || line[0] == '#' {
154 if m := versionRe.FindStringSubmatch(line); m != nil {
155 if m[1] != gen.UnicodeVersion() {
156 log.Printf("warning:%s: version is %s; want %s", f.Name, m[1], gen.UnicodeVersion())
157 }
158 }
159 continue
160 }
161 m := testRe.FindStringSubmatch(line)
162 if m == nil || len(m) < 3 {
163 log.Fatalf(`Failed to parse: "%s" result: %#v`, line, m)
164 }
165 str := []byte{}
166
167
168
169
170
171 valid := true
172 for _, split := range strings.Split(m[1], " ") {
173 r, err := strconv.ParseUint(split, 16, 64)
174 Error(err)
175 valid = valid && utf8.ValidRune(rune(r))
176 str = append(str, string(rune(r))...)
177 }
178 if valid {
179 test.str = append(test.str, str)
180 test.comment = append(test.comment, m[2])
181 }
182 }
183 if scanner.Err() != nil {
184 log.Fatal(scanner.Err())
185 }
186 tests = append(tests, test)
187 }
188 return tests
189 }
190
191 var errorCount int
192
193 func runes(b []byte) []rune {
194 return []rune(string(b))
195 }
196
197 var shifted = language.MustParse("und-u-ka-shifted-ks-level4")
198
199 func doTest(t *testing.T, tc Test) {
200 bld := build.NewBuilder()
201 parseUCA(bld)
202 w, err := bld.Build()
203 Error(err)
204 var tag language.Tag
205 if !strings.Contains(tc.name, "NON_IGNOR") {
206 tag = shifted
207 }
208 c := NewFromTable(w, OptionsFromTag(tag))
209 b := &Buffer{}
210 prev := tc.str[0]
211 for i := 1; i < len(tc.str); i++ {
212 b.Reset()
213 s := tc.str[i]
214 ka := c.Key(b, prev)
215 kb := c.Key(b, s)
216 if r := bytes.Compare(ka, kb); r == 1 {
217 t.Errorf("%s:%d: Key(%.4X) < Key(%.4X) (%X < %X) == %d; want -1 or 0", tc.name, i, []rune(string(prev)), []rune(string(s)), ka, kb, r)
218 prev = s
219 continue
220 }
221 if r := c.Compare(prev, s); r == 1 {
222 t.Errorf("%s:%d: Compare(%.4X, %.4X) == %d; want -1 or 0", tc.name, i, runes(prev), runes(s), r)
223 }
224 if r := c.Compare(s, prev); r == -1 {
225 t.Errorf("%s:%d: Compare(%.4X, %.4X) == %d; want 1 or 0", tc.name, i, runes(s), runes(prev), r)
226 }
227 prev = s
228 }
229 }
230
View as plain text