...
1
2
3
4
5 package colltab
6
7 import (
8 "fmt"
9 "unicode"
10 )
11
12
13
14
15
16
17
18 type Level int
19
20 const (
21 Primary Level = iota
22 Secondary
23 Tertiary
24 Quaternary
25 Identity
26
27 NumLevels
28 )
29
30 const (
31 defaultSecondary = 0x20
32 defaultTertiary = 0x2
33 maxTertiary = 0x1F
34 MaxQuaternary = 0x1FFFFF
35 )
36
37
38
39
40
41 type Elem uint32
42
43 const (
44 maxCE Elem = 0xAFFFFFFF
45 PrivateUse = minContract
46 minContract = 0xC0000000
47 maxContract = 0xDFFFFFFF
48 minExpand = 0xE0000000
49 maxExpand = 0xEFFFFFFF
50 minDecomp = 0xF0000000
51 )
52
53 type ceType int
54
55 const (
56 ceNormal ceType = iota
57 ceContractionIndex
58 ceExpansionIndex
59 ceDecompose
60 )
61
62 func (ce Elem) ctype() ceType {
63 if ce <= maxCE {
64 return ceNormal
65 }
66 if ce <= maxContract {
67 return ceContractionIndex
68 } else {
69 if ce <= maxExpand {
70 return ceExpansionIndex
71 }
72 return ceDecompose
73 }
74 panic("should not reach here")
75 return ceType(-1)
76 }
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102 const (
103 ceTypeMask = 0xC0000000
104 ceTypeMaskExt = 0xE0000000
105 ceIgnoreMask = 0xF00FFFFF
106 ceType1 = 0x40000000
107 ceType2 = 0x00000000
108 ceType3or4 = 0x80000000
109 ceType4 = 0xA0000000
110 ceTypeQ = 0xC0000000
111 Ignore = ceType4
112 firstNonPrimary = 0x80000000
113 lastSpecialPrimary = 0xA0000000
114 secondaryMask = 0x80000000
115 hasTertiaryMask = 0x40000000
116 primaryValueMask = 0x3FFFFE00
117 maxPrimaryBits = 21
118 compactPrimaryBits = 16
119 maxSecondaryBits = 12
120 maxTertiaryBits = 8
121 maxCCCBits = 8
122 maxSecondaryCompactBits = 8
123 maxSecondaryDiffBits = 4
124 maxTertiaryCompactBits = 5
125 primaryShift = 9
126 compactSecondaryShift = 5
127 minCompactSecondary = defaultSecondary - 4
128 )
129
130 func makeImplicitCE(primary int) Elem {
131 return ceType1 | Elem(primary<<primaryShift) | defaultSecondary
132 }
133
134
135
136 func MakeElem(primary, secondary, tertiary int, ccc uint8) (Elem, error) {
137 if w := primary; w >= 1<<maxPrimaryBits || w < 0 {
138 return 0, fmt.Errorf("makeCE: primary weight out of bounds: %x >= %x", w, 1<<maxPrimaryBits)
139 }
140 if w := secondary; w >= 1<<maxSecondaryBits || w < 0 {
141 return 0, fmt.Errorf("makeCE: secondary weight out of bounds: %x >= %x", w, 1<<maxSecondaryBits)
142 }
143 if w := tertiary; w >= 1<<maxTertiaryBits || w < 0 {
144 return 0, fmt.Errorf("makeCE: tertiary weight out of bounds: %x >= %x", w, 1<<maxTertiaryBits)
145 }
146 ce := Elem(0)
147 if primary != 0 {
148 if ccc != 0 {
149 if primary >= 1<<compactPrimaryBits {
150 return 0, fmt.Errorf("makeCE: primary weight with non-zero CCC out of bounds: %x >= %x", primary, 1<<compactPrimaryBits)
151 }
152 if secondary != defaultSecondary {
153 return 0, fmt.Errorf("makeCE: cannot combine non-default secondary value (%x) with non-zero CCC (%x)", secondary, ccc)
154 }
155 ce = Elem(tertiary << (compactPrimaryBits + maxCCCBits))
156 ce |= Elem(ccc) << compactPrimaryBits
157 ce |= Elem(primary)
158 ce |= ceType3or4
159 } else if tertiary == defaultTertiary {
160 if secondary >= 1<<maxSecondaryCompactBits {
161 return 0, fmt.Errorf("makeCE: secondary weight with non-zero primary out of bounds: %x >= %x", secondary, 1<<maxSecondaryCompactBits)
162 }
163 ce = Elem(primary<<(maxSecondaryCompactBits+1) + secondary)
164 ce |= ceType1
165 } else {
166 d := secondary - defaultSecondary + maxSecondaryDiffBits
167 if d >= 1<<maxSecondaryDiffBits || d < 0 {
168 return 0, fmt.Errorf("makeCE: secondary weight diff out of bounds: %x < 0 || %x > %x", d, d, 1<<maxSecondaryDiffBits)
169 }
170 if tertiary >= 1<<maxTertiaryCompactBits {
171 return 0, fmt.Errorf("makeCE: tertiary weight with non-zero primary out of bounds: %x > %x", tertiary, 1<<maxTertiaryCompactBits)
172 }
173 ce = Elem(primary<<maxSecondaryDiffBits + d)
174 ce = ce<<maxTertiaryCompactBits + Elem(tertiary)
175 }
176 } else {
177 ce = Elem(secondary<<maxTertiaryBits + tertiary)
178 ce += Elem(ccc) << (maxSecondaryBits + maxTertiaryBits)
179 ce |= ceType4
180 }
181 return ce, nil
182 }
183
184
185 func MakeQuaternary(v int) Elem {
186 return ceTypeQ | Elem(v<<primaryShift)
187 }
188
189
190
191
192 func (ce Elem) Mask(l Level) uint32 {
193 return 0
194 }
195
196
197
198 func (ce Elem) CCC() uint8 {
199 if ce&ceType3or4 != 0 {
200 if ce&ceType4 == ceType3or4 {
201 return uint8(ce >> 16)
202 }
203 return uint8(ce >> 20)
204 }
205 return 0
206 }
207
208
209 func (ce Elem) Primary() int {
210 if ce >= firstNonPrimary {
211 if ce > lastSpecialPrimary {
212 return 0
213 }
214 return int(uint16(ce))
215 }
216 return int(ce&primaryValueMask) >> primaryShift
217 }
218
219
220 func (ce Elem) Secondary() int {
221 switch ce & ceTypeMask {
222 case ceType1:
223 return int(uint8(ce))
224 case ceType2:
225 return minCompactSecondary + int((ce>>compactSecondaryShift)&0xF)
226 case ceType3or4:
227 if ce < ceType4 {
228 return defaultSecondary
229 }
230 return int(ce>>8) & 0xFFF
231 case ceTypeQ:
232 return 0
233 }
234 panic("should not reach here")
235 }
236
237
238 func (ce Elem) Tertiary() uint8 {
239 if ce&hasTertiaryMask == 0 {
240 if ce&ceType3or4 == 0 {
241 return uint8(ce & 0x1F)
242 }
243 if ce&ceType4 == ceType4 {
244 return uint8(ce)
245 }
246 return uint8(ce>>24) & 0x1F
247 } else if ce&ceTypeMask == ceType1 {
248 return defaultTertiary
249 }
250
251 return 0
252 }
253
254 func (ce Elem) updateTertiary(t uint8) Elem {
255 if ce&ceTypeMask == ceType1 {
256
257 nce := ce & primaryValueMask
258 nce |= Elem(uint8(ce)-minCompactSecondary) << compactSecondaryShift
259 ce = nce
260 } else if ce&ceTypeMaskExt == ceType3or4 {
261 ce &= ^Elem(maxTertiary << 24)
262 return ce | (Elem(t) << 24)
263 } else {
264
265 ce &= ^Elem(maxTertiary)
266 }
267 return ce | Elem(t)
268 }
269
270
271
272
273 func (ce Elem) Quaternary() int {
274 if ce&ceTypeMask == ceTypeQ {
275 return int(ce&primaryValueMask) >> primaryShift
276 } else if ce&ceIgnoreMask == Ignore {
277 return 0
278 }
279 return MaxQuaternary
280 }
281
282
283 func (ce Elem) Weight(l Level) int {
284 switch l {
285 case Primary:
286 return ce.Primary()
287 case Secondary:
288 return ce.Secondary()
289 case Tertiary:
290 return int(ce.Tertiary())
291 case Quaternary:
292 return ce.Quaternary()
293 }
294 return 0
295 }
296
297
298
299
300
301
302
303
304 const (
305 maxNBits = 4
306 maxTrieIndexBits = 12
307 maxContractOffsetBits = 13
308 )
309
310 func splitContractIndex(ce Elem) (index, n, offset int) {
311 n = int(ce & (1<<maxNBits - 1))
312 ce >>= maxNBits
313 index = int(ce & (1<<maxTrieIndexBits - 1))
314 ce >>= maxTrieIndexBits
315 offset = int(ce & (1<<maxContractOffsetBits - 1))
316 return
317 }
318
319
320
321 const maxExpandIndexBits = 16
322
323 func splitExpandIndex(ce Elem) (index int) {
324 return int(uint16(ce))
325 }
326
327
328
329
330
331
332
333
334
335
336 func splitDecompose(ce Elem) (t1, t2 uint8) {
337 return uint8(ce), uint8(ce >> 8)
338 }
339
340 const (
341
342 minUnified rune = 0x4E00
343 maxUnified = 0x9FFF
344 minCompatibility = 0xF900
345 maxCompatibility = 0xFAFF
346 minRare = 0x3400
347 maxRare = 0x4DBF
348 )
349 const (
350 commonUnifiedOffset = 0x10000
351 rareUnifiedOffset = 0x20000
352 otherOffset = 0x50000
353 illegalOffset = otherOffset + int(unicode.MaxRune)
354 maxPrimary = illegalOffset + 1
355 )
356
357
358
359
360
361
362 func implicitPrimary(r rune) int {
363 if unicode.Is(unicode.Ideographic, r) {
364 if r >= minUnified && r <= maxUnified {
365
366 return int(r) + commonUnifiedOffset
367 }
368 if r >= minCompatibility && r <= maxCompatibility {
369
370
371 return int(r) + commonUnifiedOffset
372 }
373 return int(r) + rareUnifiedOffset
374 }
375 return int(r) + otherOffset
376 }
377
View as plain text