1
2
3
4
5 package search
6
7 import (
8 "reflect"
9 "strings"
10 "testing"
11
12 "golang.org/x/text/language"
13 )
14
15 func TestCompile(t *testing.T) {
16 for i, tc := range []struct {
17 desc string
18 pattern string
19 options []Option
20 n int
21 }{{
22 desc: "empty",
23 pattern: "",
24 n: 0,
25 }, {
26 desc: "single",
27 pattern: "a",
28 n: 1,
29 }, {
30 desc: "keep modifier",
31 pattern: "a\u0300",
32 n: 2,
33 }, {
34 desc: "remove modifier",
35 pattern: "a\u0300",
36 options: []Option{IgnoreDiacritics},
37 n: 1,
38 }, {
39 desc: "single with double collation element",
40 pattern: "ä",
41 n: 2,
42 }, {
43 desc: "leading variable",
44 pattern: " a",
45 n: 2,
46 }, {
47 desc: "trailing variable",
48 pattern: "aa ",
49 n: 3,
50 }, {
51 desc: "leading and trailing variable",
52 pattern: " äb ",
53 n: 5,
54 }, {
55 desc: "keep interior variable",
56 pattern: " ä b ",
57 n: 6,
58 }, {
59 desc: "keep interior variables",
60 pattern: " b ä ",
61 n: 7,
62 }, {
63 desc: "remove ignoreables (zero-weights across the board)",
64 pattern: "\u009Db\u009Dä\u009D",
65 n: 3,
66 }} {
67 m := New(language.Und, tc.options...)
68 p := m.CompileString(tc.pattern)
69 if len(p.ce) != tc.n {
70 t.Errorf("%d:%s: Compile(%+q): got %d; want %d", i, tc.desc, tc.pattern, len(p.ce), tc.n)
71 }
72 }
73 }
74
75 func TestNorm(t *testing.T) {
76
77
78 for _, tc := range []struct {
79 desc string
80 a string
81 b string
82 want bool
83 }{{
84 "simple",
85 "eee\u0300\u031b",
86 "eee\u031b\u0300",
87 true,
88 }, {
89 "large number of modifiers in pattern",
90 strings.Repeat("\u0300", 29) + "\u0318",
91 "\u0318" + strings.Repeat("\u0300", 29),
92 true,
93 }, {
94 "modifier overflow in pattern",
95 strings.Repeat("\u0300", 30) + "\u0318",
96 "\u0318" + strings.Repeat("\u0300", 30),
97 false,
98 }} {
99 m := New(language.Und)
100 a := m.CompileString(tc.a)
101 b := m.CompileString(tc.b)
102 if got := reflect.DeepEqual(a, b); got != tc.want {
103 t.Errorf("Compile(a) == Compile(b) == %v; want %v", got, tc.want)
104 }
105 }
106 }
107
108 func TestForwardSearch(t *testing.T) {
109 for i, tc := range []struct {
110 desc string
111 tag string
112 options []Option
113 pattern string
114 text string
115 want []int
116 }{{
117
118
119
120
121 desc: "empty pattern and text",
122 tag: "und",
123 pattern: "",
124 text: "",
125 want: nil,
126 }, {
127 desc: "non-empty pattern and empty text",
128 tag: "und",
129 pattern: " ",
130 text: "",
131 want: nil,
132 }, {
133 desc: "empty pattern and non-empty text",
134 tag: "und",
135 pattern: "",
136 text: "abc",
137 want: nil,
138 }, {
139
140
141
142 desc: "exact match of variable",
143 tag: "und",
144 pattern: " ",
145 text: " ",
146 want: []int{0, 1},
147 }, {
148 desc: "variables not handled by default",
149 tag: "und",
150 pattern: "- ",
151 text: " -",
152 want: nil,
153 }, {
154 desc: "multiple subsequent identical variables",
155 tag: "und",
156 pattern: " ",
157 text: " ",
158 want: []int{0, 1, 1, 2, 2, 3, 3, 4},
159 }, {
160 desc: "text with variables",
161 tag: "und",
162 options: []Option{IgnoreDiacritics},
163 pattern: "abc",
164 text: "3 abc 3",
165 want: []int{2, 5},
166 }, {
167 desc: "pattern with interior variables",
168 tag: "und",
169 options: []Option{IgnoreDiacritics},
170 pattern: "a b c",
171 text: "3 a b c abc a b c 3",
172 want: []int{2, 7},
173
174
175 }, {
176
177
178 desc: "match all levels",
179 tag: "und",
180 pattern: "Abc",
181 text: "abcAbcABCÁbcábc",
182 want: []int{3, 6},
183 }, {
184 desc: "ignore diacritics in text",
185 tag: "und",
186 options: []Option{IgnoreDiacritics},
187 pattern: "Abc",
188 text: "Ábc",
189 want: []int{0, 4},
190 }, {
191 desc: "ignore diacritics in pattern",
192 tag: "und",
193 options: []Option{IgnoreDiacritics},
194 pattern: "Ábc",
195 text: "Abc",
196 want: []int{0, 3},
197 }, {
198 desc: "ignore diacritics",
199 tag: "und",
200 options: []Option{IgnoreDiacritics},
201 pattern: "Abc",
202 text: "abcAbcABCÁbcábc",
203 want: []int{3, 6, 9, 13},
204 }, {
205 desc: "ignore case",
206 tag: "und",
207 options: []Option{IgnoreCase},
208 pattern: "Abc",
209 text: "abcAbcABCÁbcábc",
210 want: []int{0, 3, 3, 6, 6, 9},
211 }, {
212 desc: "ignore case and diacritics",
213 tag: "und",
214 options: []Option{IgnoreCase, IgnoreDiacritics},
215 pattern: "Abc",
216 text: "abcAbcABCÁbcábc",
217 want: []int{0, 3, 3, 6, 6, 9, 9, 13, 13, 17},
218 }, {
219 desc: "ignore width to fullwidth",
220 tag: "und",
221 options: []Option{IgnoreWidth},
222 pattern: "abc",
223 text: "123 \uFF41\uFF42\uFF43 123",
224 want: []int{4, 13},
225 }, {
226
227 desc: "don't ignore width to fullwidth, ignoring only case",
228 tag: "und",
229 options: []Option{IgnoreCase},
230 pattern: "abc",
231 text: "123 \uFF41\uFF42\uFF43 123",
232 want: []int{4, 13},
233 }, {
234 desc: "ignore width to fullwidth and diacritics",
235 tag: "und",
236 options: []Option{IgnoreWidth, IgnoreDiacritics},
237 pattern: "abc",
238 text: "123 \uFF41\uFF42\uFF43 123",
239 want: []int{4, 13},
240 }, {
241 desc: "whole grapheme, single rune",
242 tag: "und",
243 pattern: "eee",
244 text: "123 eeé 123",
245 want: nil,
246 }, {
247
248
249
250
251 desc: "whole grapheme, contractions",
252 tag: "da",
253 pattern: "aba",
254
255 text: "123 abaa 123",
256 want: []int{},
257 }, {
258 desc: "whole grapheme, trailing modifier",
259 tag: "und",
260 pattern: "eee",
261 text: "123 eee\u0300 123",
262 want: nil,
263 }, {
264
265
266 desc: "",
267 tag: "da",
268 options: []Option{IgnoreCase},
269 pattern: "Århus",
270 text: "AarhusÅrhus Århus ",
271 want: []int{0, 6, 6, 12, 14, 20},
272 }, {
273 desc: "",
274 tag: "da",
275 options: []Option{IgnoreCase},
276 pattern: "Aarhus",
277 text: "Århus Aarhus",
278 want: []int{0, 6, 7, 13},
279 }, {
280 desc: "",
281 tag: "en",
282 options: []Option{IgnoreCase},
283 pattern: "Aarhus",
284 text: "Århus",
285 want: nil,
286 }, {
287 desc: "ignore modifier in text",
288 options: []Option{IgnoreDiacritics},
289 tag: "und",
290 pattern: "eee",
291 text: "123 eee\u0300 123",
292 want: []int{4, 9},
293 }, {
294 desc: "ignore multiple modifiers in text",
295 options: []Option{IgnoreDiacritics},
296 tag: "und",
297 pattern: "eee",
298 text: "123 eee\u0300\u0300 123",
299 want: []int{4, 11},
300 }, {
301 desc: "ignore modifier in pattern",
302 options: []Option{IgnoreDiacritics},
303 tag: "und",
304 pattern: "eee\u0300",
305 text: "123 eee 123",
306 want: []int{4, 7},
307 }, {
308 desc: "ignore multiple modifiers in pattern",
309 options: []Option{IgnoreDiacritics},
310 tag: "und",
311 pattern: "eee\u0300\u0300",
312 text: "123 eee 123",
313 want: []int{4, 7},
314 }, {
315 desc: "match non-normalized pattern",
316 tag: "und",
317
318
319 pattern: "eee\u0300\u031b",
320 text: "123 eee\u031b\u0300 123",
321 want: []int{4, 11},
322 }, {
323 desc: "match non-normalized text",
324 tag: "und",
325
326
327 pattern: "eee\u031b\u0300",
328 text: "123 eee\u0300\u031b 123",
329 want: []int{4, 11},
330 }} {
331 m := New(language.MustParse(tc.tag), tc.options...)
332 p := m.CompileString(tc.pattern)
333 for j := 0; j < len(tc.text); {
334 start, end := p.IndexString(tc.text[j:])
335 if start == -1 && end == -1 {
336 j++
337 continue
338 }
339 start += j
340 end += j
341 j = end
342 if len(tc.want) == 0 {
343 t.Errorf("%d:%s: found unexpected result [%d %d]", i, tc.desc, start, end)
344 break
345 }
346 if tc.want[0] != start || tc.want[1] != end {
347 t.Errorf("%d:%s: got [%d %d]; want %v", i, tc.desc, start, end, tc.want[:2])
348 tc.want = tc.want[2:]
349 break
350 }
351 tc.want = tc.want[2:]
352 }
353 if len(tc.want) != 0 {
354 t.Errorf("%d:%s: %d extra results", i, tc.desc, len(tc.want)/2)
355 }
356 }
357 }
358
View as plain text