...
1
2
3
4
5 package simplifiedchinese
6
7 import (
8 "unicode/utf8"
9
10 "golang.org/x/text/encoding"
11 "golang.org/x/text/encoding/internal"
12 "golang.org/x/text/encoding/internal/identifier"
13 "golang.org/x/text/transform"
14 )
15
16
17 var HZGB2312 encoding.Encoding = &hzGB2312
18
19 var hzGB2312 = internal.Encoding{
20 internal.FuncEncoding{hzGB2312NewDecoder, hzGB2312NewEncoder},
21 "HZ-GB2312",
22 identifier.HZGB2312,
23 }
24
25 func hzGB2312NewDecoder() transform.Transformer {
26 return new(hzGB2312Decoder)
27 }
28
29 func hzGB2312NewEncoder() transform.Transformer {
30 return new(hzGB2312Encoder)
31 }
32
33 const (
34 asciiState = iota
35 gbState
36 )
37
38 type hzGB2312Decoder int
39
40 func (d *hzGB2312Decoder) Reset() {
41 *d = asciiState
42 }
43
44 func (d *hzGB2312Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
45 r, size := rune(0), 0
46 loop:
47 for ; nSrc < len(src); nSrc += size {
48 c0 := src[nSrc]
49 if c0 >= utf8.RuneSelf {
50 r, size = utf8.RuneError, 1
51 goto write
52 }
53
54 if c0 == '~' {
55 if nSrc+1 >= len(src) {
56 if !atEOF {
57 err = transform.ErrShortSrc
58 break loop
59 }
60 r, size = utf8.RuneError, 1
61 goto write
62 }
63 size = 2
64 switch src[nSrc+1] {
65 case '{':
66 *d = gbState
67 continue
68 case '}':
69 *d = asciiState
70 continue
71 case '~':
72 if nDst >= len(dst) {
73 err = transform.ErrShortDst
74 break loop
75 }
76 dst[nDst] = '~'
77 nDst++
78 continue
79 case '\n':
80 continue
81 default:
82 r = utf8.RuneError
83 goto write
84 }
85 }
86
87 if *d == asciiState {
88 r, size = rune(c0), 1
89 } else {
90 if nSrc+1 >= len(src) {
91 if !atEOF {
92 err = transform.ErrShortSrc
93 break loop
94 }
95 r, size = utf8.RuneError, 1
96 goto write
97 }
98 size = 2
99 c1 := src[nSrc+1]
100 if c0 < 0x21 || 0x7e <= c0 || c1 < 0x21 || 0x7f <= c1 {
101
102 } else if i := int(c0-0x01)*190 + int(c1+0x3f); i < len(decode) {
103 r = rune(decode[i])
104 if r != 0 {
105 goto write
106 }
107 }
108 if c1 > utf8.RuneSelf {
109
110 size = 1
111 }
112 r = utf8.RuneError
113 }
114
115 write:
116 if nDst+utf8.RuneLen(r) > len(dst) {
117 err = transform.ErrShortDst
118 break loop
119 }
120 nDst += utf8.EncodeRune(dst[nDst:], r)
121 }
122 return nDst, nSrc, err
123 }
124
125 type hzGB2312Encoder int
126
127 func (d *hzGB2312Encoder) Reset() {
128 *d = asciiState
129 }
130
131 func (e *hzGB2312Encoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
132 r, size := rune(0), 0
133 for ; nSrc < len(src); nSrc += size {
134 r = rune(src[nSrc])
135
136
137 if r < utf8.RuneSelf {
138 size = 1
139 if r == '~' {
140 if nDst+2 > len(dst) {
141 err = transform.ErrShortDst
142 break
143 }
144 dst[nDst+0] = '~'
145 dst[nDst+1] = '~'
146 nDst += 2
147 continue
148 } else if *e != asciiState {
149 if nDst+3 > len(dst) {
150 err = transform.ErrShortDst
151 break
152 }
153 *e = asciiState
154 dst[nDst+0] = '~'
155 dst[nDst+1] = '}'
156 nDst += 2
157 } else if nDst >= len(dst) {
158 err = transform.ErrShortDst
159 break
160 }
161 dst[nDst] = uint8(r)
162 nDst += 1
163 continue
164
165 }
166
167
168 r, size = utf8.DecodeRune(src[nSrc:])
169 if size == 1 {
170
171
172
173 if !atEOF && !utf8.FullRune(src[nSrc:]) {
174 err = transform.ErrShortSrc
175 break
176 }
177 }
178
179
180 switch {
181 case encode0Low <= r && r < encode0High:
182 if r = rune(encode0[r-encode0Low]); r != 0 {
183 goto writeGB
184 }
185 case encode1Low <= r && r < encode1High:
186 if r = rune(encode1[r-encode1Low]); r != 0 {
187 goto writeGB
188 }
189 case encode2Low <= r && r < encode2High:
190 if r = rune(encode2[r-encode2Low]); r != 0 {
191 goto writeGB
192 }
193 case encode3Low <= r && r < encode3High:
194 if r = rune(encode3[r-encode3Low]); r != 0 {
195 goto writeGB
196 }
197 case encode4Low <= r && r < encode4High:
198 if r = rune(encode4[r-encode4Low]); r != 0 {
199 goto writeGB
200 }
201 }
202
203 terminateInASCIIState:
204
205
206 if *e != asciiState {
207 if nDst+2 > len(dst) {
208 err = transform.ErrShortDst
209 break
210 }
211 dst[nDst+0] = '~'
212 dst[nDst+1] = '}'
213 nDst += 2
214 }
215 err = internal.ErrASCIIReplacement
216 break
217
218 writeGB:
219 c0 := uint8(r>>8) - 0x80
220 c1 := uint8(r) - 0x80
221 if c0 < 0x21 || 0x7e <= c0 || c1 < 0x21 || 0x7f <= c1 {
222 goto terminateInASCIIState
223 }
224 if *e == asciiState {
225 if nDst+4 > len(dst) {
226 err = transform.ErrShortDst
227 break
228 }
229 *e = gbState
230 dst[nDst+0] = '~'
231 dst[nDst+1] = '{'
232 nDst += 2
233 } else if nDst+2 > len(dst) {
234 err = transform.ErrShortDst
235 break
236 }
237 dst[nDst+0] = c0
238 dst[nDst+1] = c1
239 nDst += 2
240 continue
241 }
242
243
244 return nDst, nSrc, err
245 }
246
View as plain text