1
2
3
4
5 package japanese
6
7 import (
8 "unicode/utf8"
9
10 "golang.org/x/text/encoding"
11 "golang.org/x/text/encoding/internal"
12 "golang.org/x/text/encoding/internal/identifier"
13 "golang.org/x/text/transform"
14 )
15
16
17 var ISO2022JP encoding.Encoding = &iso2022JP
18
19 var iso2022JP = internal.Encoding{
20 internal.FuncEncoding{iso2022JPNewDecoder, iso2022JPNewEncoder},
21 "ISO-2022-JP",
22 identifier.ISO2022JP,
23 }
24
25 func iso2022JPNewDecoder() transform.Transformer {
26 return new(iso2022JPDecoder)
27 }
28
29 func iso2022JPNewEncoder() transform.Transformer {
30 return new(iso2022JPEncoder)
31 }
32
33 const (
34 asciiState = iota
35 katakanaState
36 jis0208State
37 jis0212State
38 )
39
40 const asciiEsc = 0x1b
41
42 type iso2022JPDecoder int
43
44 func (d *iso2022JPDecoder) Reset() {
45 *d = asciiState
46 }
47
48 func (d *iso2022JPDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
49 r, size := rune(0), 0
50 for ; nSrc < len(src); nSrc += size {
51 c0 := src[nSrc]
52 if c0 >= utf8.RuneSelf {
53 r, size = '\ufffd', 1
54 goto write
55 }
56
57 if c0 == asciiEsc {
58 if nSrc+2 >= len(src) {
59 if !atEOF {
60 return nDst, nSrc, transform.ErrShortSrc
61 }
62
63 r, size = '\ufffd', 1
64 goto write
65 }
66 size = 3
67 c1 := src[nSrc+1]
68 c2 := src[nSrc+2]
69 switch {
70 case c1 == '$' && (c2 == '@' || c2 == 'B'):
71 *d = jis0208State
72 continue
73 case c1 == '$' && c2 == '(':
74 if nSrc+3 >= len(src) {
75 if !atEOF {
76 return nDst, nSrc, transform.ErrShortSrc
77 }
78 r, size = '\ufffd', 1
79 goto write
80 }
81 size = 4
82 if src[nSrc+3] == 'D' {
83 *d = jis0212State
84 continue
85 }
86 case c1 == '(' && (c2 == 'B' || c2 == 'J'):
87 *d = asciiState
88 continue
89 case c1 == '(' && c2 == 'I':
90 *d = katakanaState
91 continue
92 }
93 r, size = '\ufffd', 1
94 goto write
95 }
96
97 switch *d {
98 case asciiState:
99 r, size = rune(c0), 1
100
101 case katakanaState:
102 if c0 < 0x21 || 0x60 <= c0 {
103 r, size = '\ufffd', 1
104 goto write
105 }
106 r, size = rune(c0)+(0xff61-0x21), 1
107
108 default:
109 if c0 == 0x0a {
110 *d = asciiState
111 r, size = rune(c0), 1
112 goto write
113 }
114 if nSrc+1 >= len(src) {
115 if !atEOF {
116 return nDst, nSrc, transform.ErrShortSrc
117 }
118 r, size = '\ufffd', 1
119 goto write
120 }
121 size = 2
122 c1 := src[nSrc+1]
123 i := int(c0-0x21)*94 + int(c1-0x21)
124 if *d == jis0208State && i < len(jis0208Decode) {
125 r = rune(jis0208Decode[i])
126 } else if *d == jis0212State && i < len(jis0212Decode) {
127 r = rune(jis0212Decode[i])
128 } else {
129 r = '\ufffd'
130 goto write
131 }
132 if r == 0 {
133 r = '\ufffd'
134 }
135 }
136
137 write:
138 if nDst+utf8.RuneLen(r) > len(dst) {
139 return nDst, nSrc, transform.ErrShortDst
140 }
141 nDst += utf8.EncodeRune(dst[nDst:], r)
142 }
143 return nDst, nSrc, err
144 }
145
146 type iso2022JPEncoder int
147
148 func (e *iso2022JPEncoder) Reset() {
149 *e = asciiState
150 }
151
152 func (e *iso2022JPEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
153 r, size := rune(0), 0
154 for ; nSrc < len(src); nSrc += size {
155 r = rune(src[nSrc])
156
157
158 if r < utf8.RuneSelf {
159 size = 1
160
161 } else {
162
163 r, size = utf8.DecodeRune(src[nSrc:])
164 if size == 1 {
165
166
167
168 if !atEOF && !utf8.FullRune(src[nSrc:]) {
169 err = transform.ErrShortSrc
170 break
171 }
172 }
173
174
175
176
177
178
179
180
181
182
183 switch {
184 case encode0Low <= r && r < encode0High:
185 if r = rune(encode0[r-encode0Low]); r>>tableShift == jis0208 {
186 goto writeJIS
187 }
188 case encode1Low <= r && r < encode1High:
189 if r = rune(encode1[r-encode1Low]); r>>tableShift == jis0208 {
190 goto writeJIS
191 }
192 case encode2Low <= r && r < encode2High:
193 if r = rune(encode2[r-encode2Low]); r>>tableShift == jis0208 {
194 goto writeJIS
195 }
196 case encode3Low <= r && r < encode3High:
197 if r = rune(encode3[r-encode3Low]); r>>tableShift == jis0208 {
198 goto writeJIS
199 }
200 case encode4Low <= r && r < encode4High:
201 if r = rune(encode4[r-encode4Low]); r>>tableShift == jis0208 {
202 goto writeJIS
203 }
204 case encode5Low <= r && r < encode5High:
205 if 0xff61 <= r && r < 0xffa0 {
206 goto writeKatakana
207 }
208 if r = rune(encode5[r-encode5Low]); r>>tableShift == jis0208 {
209 goto writeJIS
210 }
211 }
212
213
214
215 if *e != asciiState {
216 if nDst+3 > len(dst) {
217 err = transform.ErrShortDst
218 break
219 }
220 *e = asciiState
221 dst[nDst+0] = asciiEsc
222 dst[nDst+1] = '('
223 dst[nDst+2] = 'B'
224 nDst += 3
225 }
226 err = internal.ErrASCIIReplacement
227 break
228 }
229
230 if *e != asciiState {
231 if nDst+4 > len(dst) {
232 err = transform.ErrShortDst
233 break
234 }
235 *e = asciiState
236 dst[nDst+0] = asciiEsc
237 dst[nDst+1] = '('
238 dst[nDst+2] = 'B'
239 nDst += 3
240 } else if nDst >= len(dst) {
241 err = transform.ErrShortDst
242 break
243 }
244 dst[nDst] = uint8(r)
245 nDst++
246 continue
247
248 writeJIS:
249 if *e != jis0208State {
250 if nDst+5 > len(dst) {
251 err = transform.ErrShortDst
252 break
253 }
254 *e = jis0208State
255 dst[nDst+0] = asciiEsc
256 dst[nDst+1] = '$'
257 dst[nDst+2] = 'B'
258 nDst += 3
259 } else if nDst+2 > len(dst) {
260 err = transform.ErrShortDst
261 break
262 }
263 dst[nDst+0] = 0x21 + uint8(r>>codeShift)&codeMask
264 dst[nDst+1] = 0x21 + uint8(r)&codeMask
265 nDst += 2
266 continue
267
268 writeKatakana:
269 if *e != katakanaState {
270 if nDst+4 > len(dst) {
271 err = transform.ErrShortDst
272 break
273 }
274 *e = katakanaState
275 dst[nDst+0] = asciiEsc
276 dst[nDst+1] = '('
277 dst[nDst+2] = 'I'
278 nDst += 3
279 } else if nDst >= len(dst) {
280 err = transform.ErrShortDst
281 break
282 }
283 dst[nDst] = uint8(r - (0xff61 - 0x21))
284 nDst++
285 continue
286 }
287 if atEOF && err == nil && *e != asciiState {
288 if nDst+3 > len(dst) {
289 err = transform.ErrShortDst
290 } else {
291 *e = asciiState
292 dst[nDst+0] = asciiEsc
293 dst[nDst+1] = '('
294 dst[nDst+2] = 'B'
295 nDst += 3
296 }
297 }
298 return nDst, nSrc, err
299 }
300
View as plain text