1 package encoder
2
3 import (
4 "math/bits"
5 "reflect"
6 "unsafe"
7 )
8
9 const (
10 lsb = 0x0101010101010101
11 msb = 0x8080808080808080
12 )
13
14 var hex = "0123456789abcdef"
15
16
17 func stringToUint64Slice(s string) []uint64 {
18 return *(*[]uint64)(unsafe.Pointer(&reflect.SliceHeader{
19 Data: ((*reflect.StringHeader)(unsafe.Pointer(&s))).Data,
20 Len: len(s) / 8,
21 Cap: len(s) / 8,
22 }))
23 }
24
25 func AppendString(ctx *RuntimeContext, buf []byte, s string) []byte {
26 if ctx.Option.Flag&HTMLEscapeOption != 0 {
27 if ctx.Option.Flag&NormalizeUTF8Option != 0 {
28 return appendNormalizedHTMLString(buf, s)
29 }
30 return appendHTMLString(buf, s)
31 }
32 if ctx.Option.Flag&NormalizeUTF8Option != 0 {
33 return appendNormalizedString(buf, s)
34 }
35 return appendString(buf, s)
36 }
37
38 func appendNormalizedHTMLString(buf []byte, s string) []byte {
39 valLen := len(s)
40 if valLen == 0 {
41 return append(buf, `""`...)
42 }
43 buf = append(buf, '"')
44 var (
45 i, j int
46 )
47 if valLen >= 8 {
48 chunks := stringToUint64Slice(s)
49 for _, n := range chunks {
50
51
52
53 mask := n | (n - (lsb * 0x20)) |
54 ((n ^ (lsb * '"')) - lsb) |
55 ((n ^ (lsb * '\\')) - lsb) |
56 ((n ^ (lsb * '<')) - lsb) |
57 ((n ^ (lsb * '>')) - lsb) |
58 ((n ^ (lsb * '&')) - lsb)
59 if (mask & msb) != 0 {
60 j = bits.TrailingZeros64(mask&msb) / 8
61 goto ESCAPE_END
62 }
63 }
64 for i := len(chunks) * 8; i < valLen; i++ {
65 if needEscapeHTMLNormalizeUTF8[s[i]] {
66 j = i
67 goto ESCAPE_END
68 }
69 }
70
71 return append(append(buf, s...), '"')
72 }
73 ESCAPE_END:
74 for j < valLen {
75 c := s[j]
76
77 if !needEscapeHTMLNormalizeUTF8[c] {
78
79 j++
80 continue
81 }
82
83 switch c {
84 case '\\', '"':
85 buf = append(buf, s[i:j]...)
86 buf = append(buf, '\\', c)
87 i = j + 1
88 j = j + 1
89 continue
90
91 case '\n':
92 buf = append(buf, s[i:j]...)
93 buf = append(buf, '\\', 'n')
94 i = j + 1
95 j = j + 1
96 continue
97
98 case '\r':
99 buf = append(buf, s[i:j]...)
100 buf = append(buf, '\\', 'r')
101 i = j + 1
102 j = j + 1
103 continue
104
105 case '\t':
106 buf = append(buf, s[i:j]...)
107 buf = append(buf, '\\', 't')
108 i = j + 1
109 j = j + 1
110 continue
111
112 case '<', '>', '&':
113 buf = append(buf, s[i:j]...)
114 buf = append(buf, `\u00`...)
115 buf = append(buf, hex[c>>4], hex[c&0xF])
116 i = j + 1
117 j = j + 1
118 continue
119
120 case 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0B, 0x0C, 0x0E, 0x0F,
121 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F:
122 buf = append(buf, s[i:j]...)
123 buf = append(buf, `\u00`...)
124 buf = append(buf, hex[c>>4], hex[c&0xF])
125 i = j + 1
126 j = j + 1
127 continue
128 }
129 state, size := decodeRuneInString(s[j:])
130 switch state {
131 case runeErrorState:
132 buf = append(buf, s[i:j]...)
133 buf = append(buf, `\ufffd`...)
134 i = j + 1
135 j = j + 1
136 continue
137
138
139
140
141
142
143
144 case lineSepState:
145 buf = append(buf, s[i:j]...)
146 buf = append(buf, `\u2028`...)
147 i = j + 3
148 j = j + 3
149 continue
150 case paragraphSepState:
151 buf = append(buf, s[i:j]...)
152 buf = append(buf, `\u2029`...)
153 i = j + 3
154 j = j + 3
155 continue
156 }
157 j += size
158 }
159
160 return append(append(buf, s[i:]...), '"')
161 }
162
163 func appendHTMLString(buf []byte, s string) []byte {
164 valLen := len(s)
165 if valLen == 0 {
166 return append(buf, `""`...)
167 }
168 buf = append(buf, '"')
169 var (
170 i, j int
171 )
172 if valLen >= 8 {
173 chunks := stringToUint64Slice(s)
174 for _, n := range chunks {
175
176
177
178 mask := n | (n - (lsb * 0x20)) |
179 ((n ^ (lsb * '"')) - lsb) |
180 ((n ^ (lsb * '\\')) - lsb) |
181 ((n ^ (lsb * '<')) - lsb) |
182 ((n ^ (lsb * '>')) - lsb) |
183 ((n ^ (lsb * '&')) - lsb)
184 if (mask & msb) != 0 {
185 j = bits.TrailingZeros64(mask&msb) / 8
186 goto ESCAPE_END
187 }
188 }
189 for i := len(chunks) * 8; i < valLen; i++ {
190 if needEscapeHTML[s[i]] {
191 j = i
192 goto ESCAPE_END
193 }
194 }
195
196 return append(append(buf, s...), '"')
197 }
198 ESCAPE_END:
199 for j < valLen {
200 c := s[j]
201
202 if !needEscapeHTML[c] {
203
204 j++
205 continue
206 }
207
208 switch c {
209 case '\\', '"':
210 buf = append(buf, s[i:j]...)
211 buf = append(buf, '\\', c)
212 i = j + 1
213 j = j + 1
214 continue
215
216 case '\n':
217 buf = append(buf, s[i:j]...)
218 buf = append(buf, '\\', 'n')
219 i = j + 1
220 j = j + 1
221 continue
222
223 case '\r':
224 buf = append(buf, s[i:j]...)
225 buf = append(buf, '\\', 'r')
226 i = j + 1
227 j = j + 1
228 continue
229
230 case '\t':
231 buf = append(buf, s[i:j]...)
232 buf = append(buf, '\\', 't')
233 i = j + 1
234 j = j + 1
235 continue
236
237 case '<', '>', '&':
238 buf = append(buf, s[i:j]...)
239 buf = append(buf, `\u00`...)
240 buf = append(buf, hex[c>>4], hex[c&0xF])
241 i = j + 1
242 j = j + 1
243 continue
244
245 case 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0B, 0x0C, 0x0E, 0x0F,
246 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F:
247 buf = append(buf, s[i:j]...)
248 buf = append(buf, `\u00`...)
249 buf = append(buf, hex[c>>4], hex[c&0xF])
250 i = j + 1
251 j = j + 1
252 continue
253 }
254 j++
255 }
256
257 return append(append(buf, s[i:]...), '"')
258 }
259
260 func appendNormalizedString(buf []byte, s string) []byte {
261 valLen := len(s)
262 if valLen == 0 {
263 return append(buf, `""`...)
264 }
265 buf = append(buf, '"')
266 var (
267 i, j int
268 )
269 if valLen >= 8 {
270 chunks := stringToUint64Slice(s)
271 for _, n := range chunks {
272
273
274
275 mask := n | (n - (lsb * 0x20)) |
276 ((n ^ (lsb * '"')) - lsb) |
277 ((n ^ (lsb * '\\')) - lsb)
278 if (mask & msb) != 0 {
279 j = bits.TrailingZeros64(mask&msb) / 8
280 goto ESCAPE_END
281 }
282 }
283 valLen := len(s)
284 for i := len(chunks) * 8; i < valLen; i++ {
285 if needEscapeNormalizeUTF8[s[i]] {
286 j = i
287 goto ESCAPE_END
288 }
289 }
290 return append(append(buf, s...), '"')
291 }
292 ESCAPE_END:
293 for j < valLen {
294 c := s[j]
295
296 if !needEscapeNormalizeUTF8[c] {
297
298 j++
299 continue
300 }
301
302 switch c {
303 case '\\', '"':
304 buf = append(buf, s[i:j]...)
305 buf = append(buf, '\\', c)
306 i = j + 1
307 j = j + 1
308 continue
309
310 case '\n':
311 buf = append(buf, s[i:j]...)
312 buf = append(buf, '\\', 'n')
313 i = j + 1
314 j = j + 1
315 continue
316
317 case '\r':
318 buf = append(buf, s[i:j]...)
319 buf = append(buf, '\\', 'r')
320 i = j + 1
321 j = j + 1
322 continue
323
324 case '\t':
325 buf = append(buf, s[i:j]...)
326 buf = append(buf, '\\', 't')
327 i = j + 1
328 j = j + 1
329 continue
330
331 case 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0B, 0x0C, 0x0E, 0x0F,
332 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F:
333 buf = append(buf, s[i:j]...)
334 buf = append(buf, `\u00`...)
335 buf = append(buf, hex[c>>4], hex[c&0xF])
336 i = j + 1
337 j = j + 1
338 continue
339 }
340
341 state, size := decodeRuneInString(s[j:])
342 switch state {
343 case runeErrorState:
344 buf = append(buf, s[i:j]...)
345 buf = append(buf, `\ufffd`...)
346 i = j + 1
347 j = j + 1
348 continue
349
350
351
352
353
354
355
356 case lineSepState:
357 buf = append(buf, s[i:j]...)
358 buf = append(buf, `\u2028`...)
359 i = j + 3
360 j = j + 3
361 continue
362 case paragraphSepState:
363 buf = append(buf, s[i:j]...)
364 buf = append(buf, `\u2029`...)
365 i = j + 3
366 j = j + 3
367 continue
368 }
369 j += size
370 }
371
372 return append(append(buf, s[i:]...), '"')
373 }
374
375 func appendString(buf []byte, s string) []byte {
376 valLen := len(s)
377 if valLen == 0 {
378 return append(buf, `""`...)
379 }
380 buf = append(buf, '"')
381 var (
382 i, j int
383 )
384 if valLen >= 8 {
385 chunks := stringToUint64Slice(s)
386 for _, n := range chunks {
387
388
389
390 mask := n | (n - (lsb * 0x20)) |
391 ((n ^ (lsb * '"')) - lsb) |
392 ((n ^ (lsb * '\\')) - lsb)
393 if (mask & msb) != 0 {
394 j = bits.TrailingZeros64(mask&msb) / 8
395 goto ESCAPE_END
396 }
397 }
398 valLen := len(s)
399 for i := len(chunks) * 8; i < valLen; i++ {
400 if needEscape[s[i]] {
401 j = i
402 goto ESCAPE_END
403 }
404 }
405 return append(append(buf, s...), '"')
406 }
407 ESCAPE_END:
408 for j < valLen {
409 c := s[j]
410
411 if !needEscape[c] {
412
413 j++
414 continue
415 }
416
417 switch c {
418 case '\\', '"':
419 buf = append(buf, s[i:j]...)
420 buf = append(buf, '\\', c)
421 i = j + 1
422 j = j + 1
423 continue
424
425 case '\n':
426 buf = append(buf, s[i:j]...)
427 buf = append(buf, '\\', 'n')
428 i = j + 1
429 j = j + 1
430 continue
431
432 case '\r':
433 buf = append(buf, s[i:j]...)
434 buf = append(buf, '\\', 'r')
435 i = j + 1
436 j = j + 1
437 continue
438
439 case '\t':
440 buf = append(buf, s[i:j]...)
441 buf = append(buf, '\\', 't')
442 i = j + 1
443 j = j + 1
444 continue
445
446 case 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0B, 0x0C, 0x0E, 0x0F,
447 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F:
448 buf = append(buf, s[i:j]...)
449 buf = append(buf, `\u00`...)
450 buf = append(buf, hex[c>>4], hex[c&0xF])
451 i = j + 1
452 j = j + 1
453 continue
454 }
455 j++
456 }
457
458 return append(append(buf, s[i:]...), '"')
459 }
460
View as plain text