1
2
3
4
5
6
7
8
9
10 package ucd
11
12 import (
13 "bufio"
14 "errors"
15 "fmt"
16 "io"
17 "log"
18 "regexp"
19 "strconv"
20 "strings"
21 )
22
23
24 const (
25 CodePoint = iota
26 Name
27 GeneralCategory
28 CanonicalCombiningClass
29 BidiClass
30 DecompMapping
31 DecimalValue
32 DigitValue
33 NumericValue
34 BidiMirrored
35 Unicode1Name
36 ISOComment
37 SimpleUppercaseMapping
38 SimpleLowercaseMapping
39 SimpleTitlecaseMapping
40 )
41
42
43
44
45
46 func Parse(r io.ReadCloser, f func(p *Parser)) {
47 defer r.Close()
48
49 p := New(r)
50 for p.Next() {
51 f(p)
52 }
53 if err := p.Err(); err != nil {
54 r.Close()
55 log.Fatal(err)
56 }
57 }
58
59
60 type Option func(p *Parser)
61
62 func keepRanges(p *Parser) {
63 p.keepRanges = true
64 }
65
66 var (
67
68
69 KeepRanges Option = keepRanges
70 )
71
72
73
74 func Part(f func(p *Parser)) Option {
75 return func(p *Parser) {
76 p.partHandler = f
77 }
78 }
79
80
81
82 func CommentHandler(f func(s string)) Option {
83 return func(p *Parser) {
84 p.commentHandler = f
85 }
86 }
87
88
89 type Parser struct {
90 scanner *bufio.Scanner
91
92 keepRanges bool
93
94 err error
95 comment string
96 field []string
97
98
99 line int
100 parsedRange bool
101 rangeStart, rangeEnd rune
102
103 partHandler func(p *Parser)
104 commentHandler func(s string)
105 }
106
107 func (p *Parser) setError(err error, msg string) {
108 if p.err == nil && err != nil {
109 if msg == "" {
110 p.err = fmt.Errorf("ucd:line:%d: %v", p.line, err)
111 } else {
112 p.err = fmt.Errorf("ucd:line:%d:%s: %v", p.line, msg, err)
113 }
114 }
115 }
116
117 func (p *Parser) getField(i int) string {
118 if i >= len(p.field) {
119 return ""
120 }
121 return p.field[i]
122 }
123
124
125 func (p *Parser) Err() error {
126 return p.err
127 }
128
129
130 func New(r io.Reader, o ...Option) *Parser {
131 p := &Parser{
132 scanner: bufio.NewScanner(r),
133 }
134 for _, f := range o {
135 f(p)
136 }
137 return p
138 }
139
140
141
142 func (p *Parser) Next() bool {
143 if !p.keepRanges && p.rangeStart < p.rangeEnd {
144 p.rangeStart++
145 return true
146 }
147 p.comment = ""
148 p.field = p.field[:0]
149 p.parsedRange = false
150
151 for p.scanner.Scan() && p.err == nil {
152 p.line++
153 s := p.scanner.Text()
154 if s == "" {
155 continue
156 }
157 if s[0] == '#' {
158 if p.commentHandler != nil {
159 p.commentHandler(strings.TrimSpace(s[1:]))
160 }
161 continue
162 }
163
164
165 if i := strings.IndexByte(s, '#'); i != -1 {
166 p.comment = strings.TrimSpace(s[i+1:])
167 s = s[:i]
168 }
169 if s[0] == '@' {
170 if p.partHandler != nil {
171 p.field = append(p.field, strings.TrimSpace(s[1:]))
172 p.partHandler(p)
173 p.field = p.field[:0]
174 }
175 p.comment = ""
176 continue
177 }
178 for {
179 i := strings.IndexByte(s, ';')
180 if i == -1 {
181 p.field = append(p.field, strings.TrimSpace(s))
182 break
183 }
184 p.field = append(p.field, strings.TrimSpace(s[:i]))
185 s = s[i+1:]
186 }
187 if !p.keepRanges {
188 p.rangeStart, p.rangeEnd = p.getRange(0)
189 }
190 return true
191 }
192 p.setError(p.scanner.Err(), "scanner failed")
193 return false
194 }
195
196 func parseRune(b string) (rune, error) {
197 if len(b) > 2 && b[0] == 'U' && b[1] == '+' {
198 b = b[2:]
199 }
200 x, err := strconv.ParseUint(b, 16, 32)
201 return rune(x), err
202 }
203
204 func (p *Parser) parseRune(s string) rune {
205 x, err := parseRune(s)
206 p.setError(err, "failed to parse rune")
207 return x
208 }
209
210
211 func (p *Parser) Rune(i int) rune {
212 if i > 0 || p.keepRanges {
213 return p.parseRune(p.getField(i))
214 }
215 return p.rangeStart
216 }
217
218
219 func (p *Parser) Runes(i int) (runes []rune) {
220 add := func(s string) {
221 if s = strings.TrimSpace(s); len(s) > 0 {
222 runes = append(runes, p.parseRune(s))
223 }
224 }
225 for b := p.getField(i); ; {
226 i := strings.IndexByte(b, ' ')
227 if i == -1 {
228 add(b)
229 break
230 }
231 add(b[:i])
232 b = b[i+1:]
233 }
234 return
235 }
236
237 var (
238 errIncorrectLegacyRange = errors.New("ucd: unmatched <* First>")
239
240
241 reRange = regexp.MustCompile("^([0-9A-F]*);<([^,]*), ([^>]*)>(.*)$")
242 )
243
244
245
246
247 func (p *Parser) Range(i int) (first, last rune) {
248 if !p.keepRanges {
249 return p.rangeStart, p.rangeStart
250 }
251 return p.getRange(i)
252 }
253
254 func (p *Parser) getRange(i int) (first, last rune) {
255 b := p.getField(i)
256 if k := strings.Index(b, ".."); k != -1 {
257 return p.parseRune(b[:k]), p.parseRune(b[k+2:])
258 }
259
260
261 x, err := parseRune(b)
262 if err != nil {
263
264
265
266 p.keepRanges = true
267 }
268
269 if i == 0 && len(p.field) > 1 && strings.HasSuffix(p.field[1], "First>") {
270 if p.parsedRange {
271 return p.rangeStart, p.rangeEnd
272 }
273 mf := reRange.FindStringSubmatch(p.scanner.Text())
274 p.line++
275 if mf == nil || !p.scanner.Scan() {
276 p.setError(errIncorrectLegacyRange, "")
277 return x, x
278 }
279
280
281 ml := reRange.FindStringSubmatch(p.scanner.Text())
282 if ml == nil || mf[2] != ml[2] || ml[3] != "Last" || mf[4] != ml[4] {
283 p.setError(errIncorrectLegacyRange, "")
284 return x, x
285 }
286 p.rangeStart, p.rangeEnd = x, p.parseRune(p.scanner.Text()[:len(ml[1])])
287 p.parsedRange = true
288 return p.rangeStart, p.rangeEnd
289 }
290 return x, x
291 }
292
293
294 var bools = map[string]bool{
295 "": false,
296 "N": false,
297 "No": false,
298 "F": false,
299 "False": false,
300 "Y": true,
301 "Yes": true,
302 "T": true,
303 "True": true,
304 }
305
306
307 func (p *Parser) Bool(i int) bool {
308 f := p.getField(i)
309 for s, v := range bools {
310 if f == s {
311 return v
312 }
313 }
314 p.setError(strconv.ErrSyntax, "error parsing bool")
315 return false
316 }
317
318
319 func (p *Parser) Int(i int) int {
320 x, err := strconv.ParseInt(p.getField(i), 10, 64)
321 p.setError(err, "error parsing int")
322 return int(x)
323 }
324
325
326 func (p *Parser) Uint(i int) uint {
327 x, err := strconv.ParseUint(p.getField(i), 10, 64)
328 p.setError(err, "error parsing uint")
329 return uint(x)
330 }
331
332
333 func (p *Parser) Float(i int) float64 {
334 x, err := strconv.ParseFloat(p.getField(i), 64)
335 p.setError(err, "error parsing float")
336 return x
337 }
338
339
340 func (p *Parser) String(i int) string {
341 return p.getField(i)
342 }
343
344
345 func (p *Parser) Strings(i int) []string {
346 ss := strings.Split(string(p.getField(i)), " ")
347 for i, s := range ss {
348 ss[i] = strings.TrimSpace(s)
349 }
350 return ss
351 }
352
353
354 func (p *Parser) Comment() string {
355 return string(p.comment)
356 }
357
358 var errUndefinedEnum = errors.New("ucd: undefined enum value")
359
360
361
362 func (p *Parser) Enum(i int, enum ...string) string {
363 f := p.getField(i)
364 for _, s := range enum {
365 if f == s {
366 return s
367 }
368 }
369 p.setError(errUndefinedEnum, "error parsing enum")
370 return ""
371 }
372
View as plain text