1 package decoder
2
3 import (
4 "bytes"
5 "fmt"
6 "reflect"
7 "unicode"
8 "unicode/utf16"
9 "unicode/utf8"
10 "unsafe"
11
12 "github.com/goccy/go-json/internal/errors"
13 )
14
15 type stringDecoder struct {
16 structName string
17 fieldName string
18 }
19
20 func newStringDecoder(structName, fieldName string) *stringDecoder {
21 return &stringDecoder{
22 structName: structName,
23 fieldName: fieldName,
24 }
25 }
26
27 func (d *stringDecoder) errUnmarshalType(typeName string, offset int64) *errors.UnmarshalTypeError {
28 return &errors.UnmarshalTypeError{
29 Value: typeName,
30 Type: reflect.TypeOf(""),
31 Offset: offset,
32 Struct: d.structName,
33 Field: d.fieldName,
34 }
35 }
36
37 func (d *stringDecoder) DecodeStream(s *Stream, depth int64, p unsafe.Pointer) error {
38 bytes, err := d.decodeStreamByte(s)
39 if err != nil {
40 return err
41 }
42 if bytes == nil {
43 return nil
44 }
45 **(**string)(unsafe.Pointer(&p)) = *(*string)(unsafe.Pointer(&bytes))
46 s.reset()
47 return nil
48 }
49
50 func (d *stringDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) {
51 bytes, c, err := d.decodeByte(ctx.Buf, cursor)
52 if err != nil {
53 return 0, err
54 }
55 if bytes == nil {
56 return c, nil
57 }
58 cursor = c
59 **(**string)(unsafe.Pointer(&p)) = *(*string)(unsafe.Pointer(&bytes))
60 return cursor, nil
61 }
62
63 func (d *stringDecoder) DecodePath(ctx *RuntimeContext, cursor, depth int64) ([][]byte, int64, error) {
64 bytes, c, err := d.decodeByte(ctx.Buf, cursor)
65 if err != nil {
66 return nil, 0, err
67 }
68 if bytes == nil {
69 return [][]byte{nullbytes}, c, nil
70 }
71 return [][]byte{bytes}, c, nil
72 }
73
74 var (
75 hexToInt = [256]int{
76 '0': 0,
77 '1': 1,
78 '2': 2,
79 '3': 3,
80 '4': 4,
81 '5': 5,
82 '6': 6,
83 '7': 7,
84 '8': 8,
85 '9': 9,
86 'A': 10,
87 'B': 11,
88 'C': 12,
89 'D': 13,
90 'E': 14,
91 'F': 15,
92 'a': 10,
93 'b': 11,
94 'c': 12,
95 'd': 13,
96 'e': 14,
97 'f': 15,
98 }
99 )
100
101 func unicodeToRune(code []byte) rune {
102 var r rune
103 for i := 0; i < len(code); i++ {
104 r = r*16 + rune(hexToInt[code[i]])
105 }
106 return r
107 }
108
109 func readAtLeast(s *Stream, n int64, p *unsafe.Pointer) bool {
110 for s.cursor+n >= s.length {
111 if !s.read() {
112 return false
113 }
114 *p = s.bufptr()
115 }
116 return true
117 }
118
119 func decodeUnicodeRune(s *Stream, p unsafe.Pointer) (rune, int64, unsafe.Pointer, error) {
120 const defaultOffset = 5
121 const surrogateOffset = 11
122
123 if !readAtLeast(s, defaultOffset, &p) {
124 return rune(0), 0, nil, errors.ErrInvalidCharacter(s.char(), "escaped string", s.totalOffset())
125 }
126
127 r := unicodeToRune(s.buf[s.cursor+1 : s.cursor+defaultOffset])
128 if utf16.IsSurrogate(r) {
129 if !readAtLeast(s, surrogateOffset, &p) {
130 return unicode.ReplacementChar, defaultOffset, p, nil
131 }
132 if s.buf[s.cursor+defaultOffset] != '\\' || s.buf[s.cursor+defaultOffset+1] != 'u' {
133 return unicode.ReplacementChar, defaultOffset, p, nil
134 }
135 r2 := unicodeToRune(s.buf[s.cursor+defaultOffset+2 : s.cursor+surrogateOffset])
136 if r := utf16.DecodeRune(r, r2); r != unicode.ReplacementChar {
137 return r, surrogateOffset, p, nil
138 }
139 }
140 return r, defaultOffset, p, nil
141 }
142
143 func decodeUnicode(s *Stream, p unsafe.Pointer) (unsafe.Pointer, error) {
144 const backSlashAndULen = 2
145
146 r, offset, pp, err := decodeUnicodeRune(s, p)
147 if err != nil {
148 return nil, err
149 }
150 unicode := []byte(string(r))
151 unicodeLen := int64(len(unicode))
152 s.buf = append(append(s.buf[:s.cursor-1], unicode...), s.buf[s.cursor+offset:]...)
153 unicodeOrgLen := offset - 1
154 s.length = s.length - (backSlashAndULen + (unicodeOrgLen - unicodeLen))
155 s.cursor = s.cursor - backSlashAndULen + unicodeLen
156 return pp, nil
157 }
158
159 func decodeEscapeString(s *Stream, p unsafe.Pointer) (unsafe.Pointer, error) {
160 s.cursor++
161 RETRY:
162 switch s.buf[s.cursor] {
163 case '"':
164 s.buf[s.cursor] = '"'
165 case '\\':
166 s.buf[s.cursor] = '\\'
167 case '/':
168 s.buf[s.cursor] = '/'
169 case 'b':
170 s.buf[s.cursor] = '\b'
171 case 'f':
172 s.buf[s.cursor] = '\f'
173 case 'n':
174 s.buf[s.cursor] = '\n'
175 case 'r':
176 s.buf[s.cursor] = '\r'
177 case 't':
178 s.buf[s.cursor] = '\t'
179 case 'u':
180 return decodeUnicode(s, p)
181 case nul:
182 if !s.read() {
183 return nil, errors.ErrInvalidCharacter(s.char(), "escaped string", s.totalOffset())
184 }
185 p = s.bufptr()
186 goto RETRY
187 default:
188 return nil, errors.ErrUnexpectedEndOfJSON("string", s.totalOffset())
189 }
190 s.buf = append(s.buf[:s.cursor-1], s.buf[s.cursor:]...)
191 s.length--
192 s.cursor--
193 p = s.bufptr()
194 return p, nil
195 }
196
197 var (
198 runeErrBytes = []byte(string(utf8.RuneError))
199 runeErrBytesLen = int64(len(runeErrBytes))
200 )
201
202 func stringBytes(s *Stream) ([]byte, error) {
203 _, cursor, p := s.stat()
204 cursor++
205 start := cursor
206 for {
207 switch char(p, cursor) {
208 case '\\':
209 s.cursor = cursor
210 pp, err := decodeEscapeString(s, p)
211 if err != nil {
212 return nil, err
213 }
214 p = pp
215 cursor = s.cursor
216 case '"':
217 literal := s.buf[start:cursor]
218 cursor++
219 s.cursor = cursor
220 return literal, nil
221 case
222
223 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
224 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
225 0x20, 0x21 , 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
226 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
227 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
228 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B , 0x5D, 0x5E, 0x5F,
229 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
230 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F:
231
232 case
233 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
234 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
235 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
236 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
237 0xC0, 0xC1,
238 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF:
239
240 s.buf = append(append(append([]byte{}, s.buf[:cursor]...), runeErrBytes...), s.buf[cursor+1:]...)
241 _, _, p = s.stat()
242 cursor += runeErrBytesLen
243 s.length += runeErrBytesLen
244 continue
245 case nul:
246 s.cursor = cursor
247 if s.read() {
248 _, cursor, p = s.stat()
249 continue
250 }
251 goto ERROR
252 case 0xEF:
253
254 if s.buf[cursor+1] == 0xBF && s.buf[cursor+2] == 0xBD {
255
256 cursor += 2
257 break
258 }
259 fallthrough
260 default:
261
262 if !utf8.FullRune(s.buf[cursor : len(s.buf)-1]) {
263 s.cursor = cursor
264 if s.read() {
265 _, cursor, p = s.stat()
266 continue
267 }
268 goto ERROR
269 }
270 r, size := utf8.DecodeRune(s.buf[cursor:])
271 if r == utf8.RuneError {
272 s.buf = append(append(append([]byte{}, s.buf[:cursor]...), runeErrBytes...), s.buf[cursor+1:]...)
273 cursor += runeErrBytesLen
274 s.length += runeErrBytesLen
275 _, _, p = s.stat()
276 } else {
277 cursor += int64(size)
278 }
279 continue
280 }
281 cursor++
282 }
283 ERROR:
284 return nil, errors.ErrUnexpectedEndOfJSON("string", s.totalOffset())
285 }
286
287 func (d *stringDecoder) decodeStreamByte(s *Stream) ([]byte, error) {
288 for {
289 switch s.char() {
290 case ' ', '\n', '\t', '\r':
291 s.cursor++
292 continue
293 case '[':
294 return nil, d.errUnmarshalType("array", s.totalOffset())
295 case '{':
296 return nil, d.errUnmarshalType("object", s.totalOffset())
297 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
298 return nil, d.errUnmarshalType("number", s.totalOffset())
299 case '"':
300 return stringBytes(s)
301 case 'n':
302 if err := nullBytes(s); err != nil {
303 return nil, err
304 }
305 return nil, nil
306 case nul:
307 if s.read() {
308 continue
309 }
310 }
311 break
312 }
313 return nil, errors.ErrInvalidBeginningOfValue(s.char(), s.totalOffset())
314 }
315
316 func (d *stringDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, error) {
317 for {
318 switch buf[cursor] {
319 case ' ', '\n', '\t', '\r':
320 cursor++
321 case '[':
322 return nil, 0, d.errUnmarshalType("array", cursor)
323 case '{':
324 return nil, 0, d.errUnmarshalType("object", cursor)
325 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
326 return nil, 0, d.errUnmarshalType("number", cursor)
327 case '"':
328 cursor++
329 start := cursor
330 b := (*sliceHeader)(unsafe.Pointer(&buf)).data
331 escaped := 0
332 for {
333 switch char(b, cursor) {
334 case '\\':
335 escaped++
336 cursor++
337 switch char(b, cursor) {
338 case '"', '\\', '/', 'b', 'f', 'n', 'r', 't':
339 cursor++
340 case 'u':
341 buflen := int64(len(buf))
342 if cursor+5 >= buflen {
343 return nil, 0, errors.ErrUnexpectedEndOfJSON("escaped string", cursor)
344 }
345 for i := int64(1); i <= 4; i++ {
346 c := char(b, cursor+i)
347 if !(('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')) {
348 return nil, 0, errors.ErrSyntax(fmt.Sprintf("json: invalid character %c in \\u hexadecimal character escape", c), cursor+i)
349 }
350 }
351 cursor += 5
352 default:
353 return nil, 0, errors.ErrUnexpectedEndOfJSON("escaped string", cursor)
354 }
355 continue
356 case '"':
357 literal := buf[start:cursor]
358 if escaped > 0 {
359 literal = literal[:unescapeString(literal)]
360 }
361 cursor++
362 return literal, cursor, nil
363 case nul:
364 return nil, 0, errors.ErrUnexpectedEndOfJSON("string", cursor)
365 }
366 cursor++
367 }
368 case 'n':
369 if err := validateNull(buf, cursor); err != nil {
370 return nil, 0, err
371 }
372 cursor += 4
373 return nil, cursor, nil
374 default:
375 return nil, 0, errors.ErrInvalidBeginningOfValue(buf[cursor], cursor)
376 }
377 }
378 }
379
380 var unescapeMap = [256]byte{
381 '"': '"',
382 '\\': '\\',
383 '/': '/',
384 'b': '\b',
385 'f': '\f',
386 'n': '\n',
387 'r': '\r',
388 't': '\t',
389 }
390
391 func unsafeAdd(ptr unsafe.Pointer, offset int) unsafe.Pointer {
392 return unsafe.Pointer(uintptr(ptr) + uintptr(offset))
393 }
394
395 func unescapeString(buf []byte) int {
396 p := (*sliceHeader)(unsafe.Pointer(&buf)).data
397 end := unsafeAdd(p, len(buf))
398 src := unsafeAdd(p, bytes.IndexByte(buf, '\\'))
399 dst := src
400 for src != end {
401 c := char(src, 0)
402 if c == '\\' {
403 escapeChar := char(src, 1)
404 if escapeChar != 'u' {
405 *(*byte)(dst) = unescapeMap[escapeChar]
406 src = unsafeAdd(src, 2)
407 dst = unsafeAdd(dst, 1)
408 } else {
409 v1 := hexToInt[char(src, 2)]
410 v2 := hexToInt[char(src, 3)]
411 v3 := hexToInt[char(src, 4)]
412 v4 := hexToInt[char(src, 5)]
413 code := rune((v1 << 12) | (v2 << 8) | (v3 << 4) | v4)
414 if code >= 0xd800 && code < 0xdc00 && uintptr(unsafeAdd(src, 11)) < uintptr(end) {
415 if char(src, 6) == '\\' && char(src, 7) == 'u' {
416 v1 := hexToInt[char(src, 8)]
417 v2 := hexToInt[char(src, 9)]
418 v3 := hexToInt[char(src, 10)]
419 v4 := hexToInt[char(src, 11)]
420 lo := rune((v1 << 12) | (v2 << 8) | (v3 << 4) | v4)
421 if lo >= 0xdc00 && lo < 0xe000 {
422 code = (code-0xd800)<<10 | (lo - 0xdc00) + 0x10000
423 src = unsafeAdd(src, 6)
424 }
425 }
426 }
427 var b [utf8.UTFMax]byte
428 n := utf8.EncodeRune(b[:], code)
429 switch n {
430 case 4:
431 *(*byte)(unsafeAdd(dst, 3)) = b[3]
432 fallthrough
433 case 3:
434 *(*byte)(unsafeAdd(dst, 2)) = b[2]
435 fallthrough
436 case 2:
437 *(*byte)(unsafeAdd(dst, 1)) = b[1]
438 fallthrough
439 case 1:
440 *(*byte)(unsafeAdd(dst, 0)) = b[0]
441 }
442 src = unsafeAdd(src, 6)
443 dst = unsafeAdd(dst, n)
444 }
445 } else {
446 *(*byte)(dst) = c
447 src = unsafeAdd(src, 1)
448 dst = unsafeAdd(dst, 1)
449 }
450 }
451 return int(uintptr(dst) - uintptr(p))
452 }
453
View as plain text