1
2
3
4
5 package cldr
6
7 import (
8 "bufio"
9 "encoding/xml"
10 "errors"
11 "fmt"
12 "strconv"
13 "strings"
14 "unicode"
15 "unicode/utf8"
16 )
17
18
19
20 type RuleProcessor interface {
21 Reset(anchor string, before int) error
22 Insert(level int, str, context, extend string) error
23 Index(id string)
24 }
25
26 const (
27
28
29
30
31 cldrIndex = "\uFDD0"
32
33
34
35 specialAnchor = "<%s/>"
36 )
37
38
39
40 func (c Collation) Process(p RuleProcessor) (err error) {
41 if len(c.Cr) > 0 {
42 if len(c.Cr) > 1 {
43 return fmt.Errorf("multiple cr elements, want 0 or 1")
44 }
45 return processRules(p, c.Cr[0].Data())
46 }
47 if c.Rules.Any != nil {
48 return c.processXML(p)
49 }
50 return errors.New("no tailoring data")
51 }
52
53
54
55 func processRules(p RuleProcessor, s string) (err error) {
56 chk := func(s string, e error) string {
57 if err == nil {
58 err = e
59 }
60 return s
61 }
62 i := 0
63 scanner := bufio.NewScanner(strings.NewReader(s))
64 for ; scanner.Scan() && err == nil; i++ {
65 for s := skipSpace(scanner.Text()); s != "" && s[0] != '#'; s = skipSpace(s) {
66 level := 5
67 var ch byte
68 switch ch, s = s[0], s[1:]; ch {
69 case '&':
70 if s = skipSpace(s); consume(&s, '[') {
71 s = chk(parseSpecialAnchor(p, s))
72 } else {
73 s = chk(parseAnchor(p, 0, s))
74 }
75 case '<':
76 for level = 1; consume(&s, '<'); level++ {
77 }
78 if level > 4 {
79 err = fmt.Errorf("level %d > 4", level)
80 }
81 fallthrough
82 case '=':
83 if consume(&s, '*') {
84 s = chk(parseSequence(p, level, s))
85 } else {
86 s = chk(parseOrder(p, level, s))
87 }
88 default:
89 chk("", fmt.Errorf("illegal operator %q", ch))
90 break
91 }
92 }
93 }
94 if chk("", scanner.Err()); err != nil {
95 return fmt.Errorf("%d: %v", i, err)
96 }
97 return nil
98 }
99
100
101
102
103
104
105
106
107
108
109 func parseSpecialAnchor(p RuleProcessor, s string) (tail string, err error) {
110 i := strings.IndexByte(s, ']')
111 if i == -1 {
112 return "", errors.New("unmatched bracket")
113 }
114 a := strings.TrimSpace(s[:i])
115 s = s[i+1:]
116 if strings.HasPrefix(a, "before ") {
117 l, err := strconv.ParseUint(skipSpace(a[len("before "):]), 10, 3)
118 if err != nil {
119 return s, err
120 }
121 return parseAnchor(p, int(l), s)
122 }
123 return s, p.Reset(fmt.Sprintf(specialAnchor, a), 0)
124 }
125
126 func parseAnchor(p RuleProcessor, level int, s string) (tail string, err error) {
127 anchor, s, err := scanString(s)
128 if err != nil {
129 return s, err
130 }
131 return s, p.Reset(anchor, level)
132 }
133
134 func parseOrder(p RuleProcessor, level int, s string) (tail string, err error) {
135 var value, context, extend string
136 if value, s, err = scanString(s); err != nil {
137 return s, err
138 }
139 if strings.HasPrefix(value, cldrIndex) {
140 p.Index(value[len(cldrIndex):])
141 return
142 }
143 if consume(&s, '|') {
144 if context, s, err = scanString(s); err != nil {
145 return s, errors.New("missing string after context")
146 }
147 }
148 if consume(&s, '/') {
149 if extend, s, err = scanString(s); err != nil {
150 return s, errors.New("missing string after extension")
151 }
152 }
153 return s, p.Insert(level, value, context, extend)
154 }
155
156
157 func scanString(s string) (str, tail string, err error) {
158 if s = skipSpace(s); s == "" {
159 return s, s, errors.New("missing string")
160 }
161 buf := [16]byte{}
162 value := buf[:0]
163 for s != "" {
164 if consume(&s, '\'') {
165 i := strings.IndexByte(s, '\'')
166 if i == -1 {
167 return "", "", errors.New(`unmatched single quote`)
168 }
169 if i == 0 {
170 value = append(value, '\'')
171 } else {
172 value = append(value, s[:i]...)
173 }
174 s = s[i+1:]
175 continue
176 }
177 r, sz := utf8.DecodeRuneInString(s)
178 if unicode.IsSpace(r) || strings.ContainsRune("&<=#", r) {
179 break
180 }
181 value = append(value, s[:sz]...)
182 s = s[sz:]
183 }
184 return string(value), skipSpace(s), nil
185 }
186
187 func parseSequence(p RuleProcessor, level int, s string) (tail string, err error) {
188 if s = skipSpace(s); s == "" {
189 return s, errors.New("empty sequence")
190 }
191 last := rune(0)
192 for s != "" {
193 r, sz := utf8.DecodeRuneInString(s)
194 s = s[sz:]
195
196 if r == '-' {
197
198 if last == 0 {
199 return s, errors.New("range without starter value")
200 }
201 r, sz = utf8.DecodeRuneInString(s)
202 s = s[sz:]
203 if r == utf8.RuneError || r < last {
204 return s, fmt.Errorf("invalid range %q-%q", last, r)
205 }
206 for i := last + 1; i <= r; i++ {
207 if err := p.Insert(level, string(i), "", ""); err != nil {
208 return s, err
209 }
210 }
211 last = 0
212 continue
213 }
214
215 if unicode.IsSpace(r) || unicode.IsPunct(r) {
216 break
217 }
218
219
220 if err := p.Insert(level, string(r), "", ""); err != nil {
221 return s, err
222 }
223 last = r
224 }
225 return s, nil
226 }
227
228 func skipSpace(s string) string {
229 return strings.TrimLeftFunc(s, unicode.IsSpace)
230 }
231
232
233
234 func consume(s *string, ch byte) (ok bool) {
235 if *s == "" || (*s)[0] != ch {
236 return false
237 }
238 *s = (*s)[1:]
239 return true
240 }
241
242
243
244 var lmap = map[byte]int{
245 'p': 1,
246 's': 2,
247 't': 3,
248 'i': 5,
249 }
250
251 type rulesElem struct {
252 Rules struct {
253 Common
254 Any []*struct {
255 XMLName xml.Name
256 rule
257 } `xml:",any"`
258 } `xml:"rules"`
259 }
260
261 type rule struct {
262 Value string `xml:",chardata"`
263 Before string `xml:"before,attr"`
264 Any []*struct {
265 XMLName xml.Name
266 rule
267 } `xml:",any"`
268 }
269
270 var emptyValueError = errors.New("cldr: empty rule value")
271
272 func (r *rule) value() (string, error) {
273
274 s := charRe.ReplaceAllStringFunc(r.Value, replaceUnicode)
275 r.Value = s
276 if s == "" {
277 if len(r.Any) != 1 {
278 return "", emptyValueError
279 }
280 r.Value = fmt.Sprintf(specialAnchor, r.Any[0].XMLName.Local)
281 r.Any = nil
282 } else if len(r.Any) != 0 {
283 return "", fmt.Errorf("cldr: XML elements found in collation rule: %v", r.Any)
284 }
285 return r.Value, nil
286 }
287
288 func (r rule) process(p RuleProcessor, name, context, extend string) error {
289 v, err := r.value()
290 if err != nil {
291 return err
292 }
293 switch name {
294 case "p", "s", "t", "i":
295 if strings.HasPrefix(v, cldrIndex) {
296 p.Index(v[len(cldrIndex):])
297 return nil
298 }
299 if err := p.Insert(lmap[name[0]], v, context, extend); err != nil {
300 return err
301 }
302 case "pc", "sc", "tc", "ic":
303 level := lmap[name[0]]
304 for _, s := range v {
305 if err := p.Insert(level, string(s), context, extend); err != nil {
306 return err
307 }
308 }
309 default:
310 return fmt.Errorf("cldr: unsupported tag: %q", name)
311 }
312 return nil
313 }
314
315
316 func (c Collation) processXML(p RuleProcessor) (err error) {
317
318 var v string
319 for _, r := range c.Rules.Any {
320 switch r.XMLName.Local {
321 case "reset":
322 level := 0
323 switch r.Before {
324 case "primary", "1":
325 level = 1
326 case "secondary", "2":
327 level = 2
328 case "tertiary", "3":
329 level = 3
330 case "":
331 default:
332 return fmt.Errorf("cldr: unknown level %q", r.Before)
333 }
334 v, err = r.value()
335 if err == nil {
336 err = p.Reset(v, level)
337 }
338 case "x":
339 var context, extend string
340 for _, r1 := range r.Any {
341 v, err = r1.value()
342 switch r1.XMLName.Local {
343 case "context":
344 context = v
345 case "extend":
346 extend = v
347 }
348 }
349 for _, r1 := range r.Any {
350 if t := r1.XMLName.Local; t == "context" || t == "extend" {
351 continue
352 }
353 r1.rule.process(p, r1.XMLName.Local, context, extend)
354 }
355 default:
356 err = r.rule.process(p, r.XMLName.Local, "", "")
357 }
358 if err != nil {
359 return err
360 }
361 }
362 return nil
363 }
364
View as plain text