1 package decoder
2
3 import (
4 "bytes"
5 "encoding"
6 "fmt"
7 "unicode"
8 "unicode/utf16"
9 "unicode/utf8"
10 "unsafe"
11
12 "github.com/goccy/go-json/internal/errors"
13 "github.com/goccy/go-json/internal/runtime"
14 )
15
16 type unmarshalTextDecoder struct {
17 typ *runtime.Type
18 structName string
19 fieldName string
20 }
21
22 func newUnmarshalTextDecoder(typ *runtime.Type, structName, fieldName string) *unmarshalTextDecoder {
23 return &unmarshalTextDecoder{
24 typ: typ,
25 structName: structName,
26 fieldName: fieldName,
27 }
28 }
29
30 func (d *unmarshalTextDecoder) annotateError(cursor int64, err error) {
31 switch e := err.(type) {
32 case *errors.UnmarshalTypeError:
33 e.Struct = d.structName
34 e.Field = d.fieldName
35 case *errors.SyntaxError:
36 e.Offset = cursor
37 }
38 }
39
40 var (
41 nullbytes = []byte(`null`)
42 )
43
44 func (d *unmarshalTextDecoder) DecodeStream(s *Stream, depth int64, p unsafe.Pointer) error {
45 s.skipWhiteSpace()
46 start := s.cursor
47 if err := s.skipValue(depth); err != nil {
48 return err
49 }
50 src := s.buf[start:s.cursor]
51 if len(src) > 0 {
52 switch src[0] {
53 case '[':
54 return &errors.UnmarshalTypeError{
55 Value: "array",
56 Type: runtime.RType2Type(d.typ),
57 Offset: s.totalOffset(),
58 }
59 case '{':
60 return &errors.UnmarshalTypeError{
61 Value: "object",
62 Type: runtime.RType2Type(d.typ),
63 Offset: s.totalOffset(),
64 }
65 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
66 return &errors.UnmarshalTypeError{
67 Value: "number",
68 Type: runtime.RType2Type(d.typ),
69 Offset: s.totalOffset(),
70 }
71 case 'n':
72 if bytes.Equal(src, nullbytes) {
73 *(*unsafe.Pointer)(p) = nil
74 return nil
75 }
76 }
77 }
78 dst := make([]byte, len(src))
79 copy(dst, src)
80
81 if b, ok := unquoteBytes(dst); ok {
82 dst = b
83 }
84 v := *(*interface{})(unsafe.Pointer(&emptyInterface{
85 typ: d.typ,
86 ptr: p,
87 }))
88 if err := v.(encoding.TextUnmarshaler).UnmarshalText(dst); err != nil {
89 d.annotateError(s.cursor, err)
90 return err
91 }
92 return nil
93 }
94
95 func (d *unmarshalTextDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) {
96 buf := ctx.Buf
97 cursor = skipWhiteSpace(buf, cursor)
98 start := cursor
99 end, err := skipValue(buf, cursor, depth)
100 if err != nil {
101 return 0, err
102 }
103 src := buf[start:end]
104 if len(src) > 0 {
105 switch src[0] {
106 case '[':
107 return 0, &errors.UnmarshalTypeError{
108 Value: "array",
109 Type: runtime.RType2Type(d.typ),
110 Offset: start,
111 }
112 case '{':
113 return 0, &errors.UnmarshalTypeError{
114 Value: "object",
115 Type: runtime.RType2Type(d.typ),
116 Offset: start,
117 }
118 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
119 return 0, &errors.UnmarshalTypeError{
120 Value: "number",
121 Type: runtime.RType2Type(d.typ),
122 Offset: start,
123 }
124 case 'n':
125 if bytes.Equal(src, nullbytes) {
126 *(*unsafe.Pointer)(p) = nil
127 return end, nil
128 }
129 }
130 }
131
132 if s, ok := unquoteBytes(src); ok {
133 src = s
134 }
135 v := *(*interface{})(unsafe.Pointer(&emptyInterface{
136 typ: d.typ,
137 ptr: *(*unsafe.Pointer)(unsafe.Pointer(&p)),
138 }))
139 if err := v.(encoding.TextUnmarshaler).UnmarshalText(src); err != nil {
140 d.annotateError(cursor, err)
141 return 0, err
142 }
143 return end, nil
144 }
145
146 func (d *unmarshalTextDecoder) DecodePath(ctx *RuntimeContext, cursor, depth int64) ([][]byte, int64, error) {
147 return nil, 0, fmt.Errorf("json: unmarshal text decoder does not support decode path")
148 }
149
150 func unquoteBytes(s []byte) (t []byte, ok bool) {
151 length := len(s)
152 if length < 2 || s[0] != '"' || s[length-1] != '"' {
153 return
154 }
155 s = s[1 : length-1]
156 length -= 2
157
158
159
160
161 r := 0
162 for r < length {
163 c := s[r]
164 if c == '\\' || c == '"' || c < ' ' {
165 break
166 }
167 if c < utf8.RuneSelf {
168 r++
169 continue
170 }
171 rr, size := utf8.DecodeRune(s[r:])
172 if rr == utf8.RuneError && size == 1 {
173 break
174 }
175 r += size
176 }
177 if r == length {
178 return s, true
179 }
180
181 b := make([]byte, length+2*utf8.UTFMax)
182 w := copy(b, s[0:r])
183 for r < length {
184
185
186
187 if w >= len(b)-2*utf8.UTFMax {
188 nb := make([]byte, (len(b)+utf8.UTFMax)*2)
189 copy(nb, b[0:w])
190 b = nb
191 }
192 switch c := s[r]; {
193 case c == '\\':
194 r++
195 if r >= length {
196 return
197 }
198 switch s[r] {
199 default:
200 return
201 case '"', '\\', '/', '\'':
202 b[w] = s[r]
203 r++
204 w++
205 case 'b':
206 b[w] = '\b'
207 r++
208 w++
209 case 'f':
210 b[w] = '\f'
211 r++
212 w++
213 case 'n':
214 b[w] = '\n'
215 r++
216 w++
217 case 'r':
218 b[w] = '\r'
219 r++
220 w++
221 case 't':
222 b[w] = '\t'
223 r++
224 w++
225 case 'u':
226 r--
227 rr := getu4(s[r:])
228 if rr < 0 {
229 return
230 }
231 r += 6
232 if utf16.IsSurrogate(rr) {
233 rr1 := getu4(s[r:])
234 if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar {
235
236 r += 6
237 w += utf8.EncodeRune(b[w:], dec)
238 break
239 }
240
241 rr = unicode.ReplacementChar
242 }
243 w += utf8.EncodeRune(b[w:], rr)
244 }
245
246
247 case c == '"', c < ' ':
248 return
249
250
251 case c < utf8.RuneSelf:
252 b[w] = c
253 r++
254 w++
255
256
257 default:
258 rr, size := utf8.DecodeRune(s[r:])
259 r += size
260 w += utf8.EncodeRune(b[w:], rr)
261 }
262 }
263 return b[0:w], true
264 }
265
266 func getu4(s []byte) rune {
267 if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
268 return -1
269 }
270 var r rune
271 for _, c := range s[2:6] {
272 switch {
273 case '0' <= c && c <= '9':
274 c = c - '0'
275 case 'a' <= c && c <= 'f':
276 c = c - 'a' + 10
277 case 'A' <= c && c <= 'F':
278 c = c - 'A' + 10
279 default:
280 return -1
281 }
282 r = r*16 + rune(c)
283 }
284 return r
285 }
286
View as plain text