...
1
2
3
4
5
6 package utf16
7
8
9
10
11
12 const (
13 replacementChar = '\uFFFD'
14 maxRune = '\U0010FFFF'
15 )
16
17 const (
18
19
20
21 surr1 = 0xd800
22 surr2 = 0xdc00
23 surr3 = 0xe000
24
25 surrSelf = 0x10000
26 )
27
28
29
30 func IsSurrogate(r rune) bool {
31 return surr1 <= r && r < surr3
32 }
33
34
35
36
37 func DecodeRune(r1, r2 rune) rune {
38 if surr1 <= r1 && r1 < surr2 && surr2 <= r2 && r2 < surr3 {
39 return (r1-surr1)<<10 | (r2 - surr2) + surrSelf
40 }
41 return replacementChar
42 }
43
44
45
46
47 func EncodeRune(r rune) (r1, r2 rune) {
48 if r < surrSelf || r > maxRune {
49 return replacementChar, replacementChar
50 }
51 r -= surrSelf
52 return surr1 + (r>>10)&0x3ff, surr2 + r&0x3ff
53 }
54
55
56 func Encode(s []rune) []uint16 {
57 n := len(s)
58 for _, v := range s {
59 if v >= surrSelf {
60 n++
61 }
62 }
63
64 a := make([]uint16, n)
65 n = 0
66 for _, v := range s {
67 switch {
68 case 0 <= v && v < surr1, surr3 <= v && v < surrSelf:
69
70 a[n] = uint16(v)
71 n++
72 case surrSelf <= v && v <= maxRune:
73
74 r1, r2 := EncodeRune(v)
75 a[n] = uint16(r1)
76 a[n+1] = uint16(r2)
77 n += 2
78 default:
79 a[n] = uint16(replacementChar)
80 n++
81 }
82 }
83 return a[:n]
84 }
85
86
87
88
89 func AppendRune(a []uint16, r rune) []uint16 {
90
91 switch {
92 case 0 <= r && r < surr1, surr3 <= r && r < surrSelf:
93
94 return append(a, uint16(r))
95 case surrSelf <= r && r <= maxRune:
96
97 r1, r2 := EncodeRune(r)
98 return append(a, uint16(r1), uint16(r2))
99 }
100 return append(a, replacementChar)
101 }
102
103
104
105 func Decode(s []uint16) []rune {
106
107
108 buf := make([]rune, 0, 64)
109 return decode(s, buf)
110 }
111
112
113
114 func decode(s []uint16, buf []rune) []rune {
115 for i := 0; i < len(s); i++ {
116 var ar rune
117 switch r := s[i]; {
118 case r < surr1, surr3 <= r:
119
120 ar = rune(r)
121 case surr1 <= r && r < surr2 && i+1 < len(s) &&
122 surr2 <= s[i+1] && s[i+1] < surr3:
123
124 ar = DecodeRune(rune(r), rune(s[i+1]))
125 i++
126 default:
127
128 ar = replacementChar
129 }
130 buf = append(buf, ar)
131 }
132 return buf
133 }
134
View as plain text