1
2
3
4
5 package precis
6
7 import "errors"
8
9
10
11 type catBitmap uint16
12
13 const (
14
15 bJapanese catBitmap = 1 << iota
16 bArabicIndicDigit
17 bExtendedArabicIndicDigit
18
19
20 bJoinStart
21 bJoinMid
22 bJoinEnd
23 bVirama
24 bLatinSmallL
25 bGreek
26 bHebrew
27
28
29
30 bMustHaveJapn
31
32 permanent = bJapanese | bArabicIndicDigit | bExtendedArabicIndicDigit | bMustHaveJapn
33 )
34
35 const finalShift = 10
36
37 var errContext = errors.New("precis: contextual rule violated")
38
39 func init() {
40
41
42 for i, ct := range categoryTransitions {
43 categoryTransitions[i].keep |= permanent
44 categoryTransitions[i].accept |= ct.term
45 }
46 }
47
48 var categoryTransitions = []struct {
49 keep catBitmap
50 set catBitmap
51
52
53
54 term catBitmap
55 accept catBitmap
56
57
58
59 rule func(beforeBits catBitmap) (doLookahead bool, err error)
60 }{
61 joiningL: {set: bJoinStart},
62 joiningD: {set: bJoinStart | bJoinEnd},
63 joiningT: {keep: bJoinStart, set: bJoinMid},
64 joiningR: {set: bJoinEnd},
65 viramaModifier: {set: bVirama},
66 viramaJoinT: {set: bVirama | bJoinMid},
67 latinSmallL: {set: bLatinSmallL},
68 greek: {set: bGreek},
69 greekJoinT: {set: bGreek | bJoinMid},
70 hebrew: {set: bHebrew},
71 hebrewJoinT: {set: bHebrew | bJoinMid},
72 japanese: {set: bJapanese},
73 katakanaMiddleDot: {set: bMustHaveJapn},
74
75 zeroWidthNonJoiner: {
76 term: bJoinEnd,
77 accept: bJoinMid,
78 rule: func(before catBitmap) (doLookAhead bool, err error) {
79 if before&bVirama != 0 {
80 return false, nil
81 }
82 if before&bJoinStart == 0 {
83 return false, errContext
84 }
85 return true, nil
86 },
87 },
88 zeroWidthJoiner: {
89 rule: func(before catBitmap) (doLookAhead bool, err error) {
90 if before&bVirama == 0 {
91 err = errContext
92 }
93 return false, err
94 },
95 },
96 middleDot: {
97 term: bLatinSmallL,
98 rule: func(before catBitmap) (doLookAhead bool, err error) {
99 if before&bLatinSmallL == 0 {
100 return false, errContext
101 }
102 return true, nil
103 },
104 },
105 greekLowerNumeralSign: {
106 set: bGreek,
107 term: bGreek,
108 rule: func(before catBitmap) (doLookAhead bool, err error) {
109 return true, nil
110 },
111 },
112 hebrewPreceding: {
113 set: bHebrew,
114 rule: func(before catBitmap) (doLookAhead bool, err error) {
115 if before&bHebrew == 0 {
116 err = errContext
117 }
118 return false, err
119 },
120 },
121 arabicIndicDigit: {
122 set: bArabicIndicDigit,
123 rule: func(before catBitmap) (doLookAhead bool, err error) {
124 if before&bExtendedArabicIndicDigit != 0 {
125 err = errContext
126 }
127 return false, err
128 },
129 },
130 extendedArabicIndicDigit: {
131 set: bExtendedArabicIndicDigit,
132 rule: func(before catBitmap) (doLookAhead bool, err error) {
133 if before&bArabicIndicDigit != 0 {
134 err = errContext
135 }
136 return false, err
137 },
138 },
139 }
140
View as plain text