1
2
3
4
5 package template
6
7 import (
8 "bytes"
9 "strings"
10 )
11
12
13
14
15
16 var transitionFunc = [...]func(context, []byte) (context, int){
17 stateText: tText,
18 stateTag: tTag,
19 stateAttrName: tAttrName,
20 stateAfterName: tAfterName,
21 stateBeforeValue: tBeforeValue,
22 stateHTMLCmt: tHTMLCmt,
23 stateRCDATA: tSpecialTagEnd,
24 stateAttr: tAttr,
25 stateURL: tURL,
26 stateSrcset: tURL,
27 stateJS: tJS,
28 stateJSDqStr: tJSDelimited,
29 stateJSSqStr: tJSDelimited,
30 stateJSRegexp: tJSDelimited,
31 stateJSTmplLit: tJSTmpl,
32 stateJSBlockCmt: tBlockCmt,
33 stateJSLineCmt: tLineCmt,
34 stateJSHTMLOpenCmt: tLineCmt,
35 stateJSHTMLCloseCmt: tLineCmt,
36 stateCSS: tCSS,
37 stateCSSDqStr: tCSSStr,
38 stateCSSSqStr: tCSSStr,
39 stateCSSDqURL: tCSSStr,
40 stateCSSSqURL: tCSSStr,
41 stateCSSURL: tCSSStr,
42 stateCSSBlockCmt: tBlockCmt,
43 stateCSSLineCmt: tLineCmt,
44 stateError: tError,
45 }
46
47 var commentStart = []byte("<!--")
48 var commentEnd = []byte("-->")
49
50
51 func tText(c context, s []byte) (context, int) {
52 k := 0
53 for {
54 i := k + bytes.IndexByte(s[k:], '<')
55 if i < k || i+1 == len(s) {
56 return c, len(s)
57 } else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
58 return context{state: stateHTMLCmt}, i + 4
59 }
60 i++
61 end := false
62 if s[i] == '/' {
63 if i+1 == len(s) {
64 return c, len(s)
65 }
66 end, i = true, i+1
67 }
68 j, e := eatTagName(s, i)
69 if j != i {
70 if end {
71 e = elementNone
72 }
73
74 return context{state: stateTag, element: e}, j
75 }
76 k = j
77 }
78 }
79
80 var elementContentType = [...]state{
81 elementNone: stateText,
82 elementScript: stateJS,
83 elementStyle: stateCSS,
84 elementTextarea: stateRCDATA,
85 elementTitle: stateRCDATA,
86 }
87
88
89 func tTag(c context, s []byte) (context, int) {
90
91 i := eatWhiteSpace(s, 0)
92 if i == len(s) {
93 return c, len(s)
94 }
95 if s[i] == '>' {
96 return context{
97 state: elementContentType[c.element],
98 element: c.element,
99 }, i + 1
100 }
101 j, err := eatAttrName(s, i)
102 if err != nil {
103 return context{state: stateError, err: err}, len(s)
104 }
105 state, attr := stateTag, attrNone
106 if i == j {
107 return context{
108 state: stateError,
109 err: errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
110 }, len(s)
111 }
112
113 attrName := strings.ToLower(string(s[i:j]))
114 if c.element == elementScript && attrName == "type" {
115 attr = attrScriptType
116 } else {
117 switch attrType(attrName) {
118 case contentTypeURL:
119 attr = attrURL
120 case contentTypeCSS:
121 attr = attrStyle
122 case contentTypeJS:
123 attr = attrScript
124 case contentTypeSrcset:
125 attr = attrSrcset
126 }
127 }
128
129 if j == len(s) {
130 state = stateAttrName
131 } else {
132 state = stateAfterName
133 }
134 return context{state: state, element: c.element, attr: attr}, j
135 }
136
137
138 func tAttrName(c context, s []byte) (context, int) {
139 i, err := eatAttrName(s, 0)
140 if err != nil {
141 return context{state: stateError, err: err}, len(s)
142 } else if i != len(s) {
143 c.state = stateAfterName
144 }
145 return c, i
146 }
147
148
149 func tAfterName(c context, s []byte) (context, int) {
150
151 i := eatWhiteSpace(s, 0)
152 if i == len(s) {
153 return c, len(s)
154 } else if s[i] != '=' {
155
156 c.state = stateTag
157 return c, i
158 }
159 c.state = stateBeforeValue
160
161 return c, i + 1
162 }
163
164 var attrStartStates = [...]state{
165 attrNone: stateAttr,
166 attrScript: stateJS,
167 attrScriptType: stateAttr,
168 attrStyle: stateCSS,
169 attrURL: stateURL,
170 attrSrcset: stateSrcset,
171 }
172
173
174 func tBeforeValue(c context, s []byte) (context, int) {
175 i := eatWhiteSpace(s, 0)
176 if i == len(s) {
177 return c, len(s)
178 }
179
180 delim := delimSpaceOrTagEnd
181 switch s[i] {
182 case '\'':
183 delim, i = delimSingleQuote, i+1
184 case '"':
185 delim, i = delimDoubleQuote, i+1
186 }
187 c.state, c.delim = attrStartStates[c.attr], delim
188 return c, i
189 }
190
191
192 func tHTMLCmt(c context, s []byte) (context, int) {
193 if i := bytes.Index(s, commentEnd); i != -1 {
194 return context{}, i + 3
195 }
196 return c, len(s)
197 }
198
199
200
201 var specialTagEndMarkers = [...][]byte{
202 elementScript: []byte("script"),
203 elementStyle: []byte("style"),
204 elementTextarea: []byte("textarea"),
205 elementTitle: []byte("title"),
206 }
207
208 var (
209 specialTagEndPrefix = []byte("</")
210 tagEndSeparators = []byte("> \t\n\f/")
211 )
212
213
214
215 func tSpecialTagEnd(c context, s []byte) (context, int) {
216 if c.element != elementNone {
217
218
219 if c.element == elementScript && (isInScriptLiteral(c.state) || isComment(c.state)) {
220 return c, len(s)
221 }
222 if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 {
223 return context{}, i
224 }
225 }
226 return c, len(s)
227 }
228
229
230 func indexTagEnd(s []byte, tag []byte) int {
231 res := 0
232 plen := len(specialTagEndPrefix)
233 for len(s) > 0 {
234
235 i := bytes.Index(s, specialTagEndPrefix)
236 if i == -1 {
237 return i
238 }
239 s = s[i+plen:]
240
241 if len(tag) <= len(s) && bytes.EqualFold(tag, s[:len(tag)]) {
242 s = s[len(tag):]
243
244 if len(s) > 0 && bytes.IndexByte(tagEndSeparators, s[0]) != -1 {
245 return res + i
246 }
247 res += len(tag)
248 }
249 res += i + plen
250 }
251 return -1
252 }
253
254
255 func tAttr(c context, s []byte) (context, int) {
256 return c, len(s)
257 }
258
259
260 func tURL(c context, s []byte) (context, int) {
261 if bytes.ContainsAny(s, "#?") {
262 c.urlPart = urlPartQueryOrFrag
263 } else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone {
264
265
266 c.urlPart = urlPartPreQuery
267 }
268 return c, len(s)
269 }
270
271
272 func tJS(c context, s []byte) (context, int) {
273 i := bytes.IndexAny(s, "\"`'/{}<-#")
274 if i == -1 {
275
276 c.jsCtx = nextJSCtx(s, c.jsCtx)
277 return c, len(s)
278 }
279 c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
280 switch s[i] {
281 case '"':
282 c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
283 case '\'':
284 c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
285 case '`':
286 c.state, c.jsCtx = stateJSTmplLit, jsCtxRegexp
287 case '/':
288 switch {
289 case i+1 < len(s) && s[i+1] == '/':
290 c.state, i = stateJSLineCmt, i+1
291 case i+1 < len(s) && s[i+1] == '*':
292 c.state, i = stateJSBlockCmt, i+1
293 case c.jsCtx == jsCtxRegexp:
294 c.state = stateJSRegexp
295 case c.jsCtx == jsCtxDivOp:
296 c.jsCtx = jsCtxRegexp
297 default:
298 return context{
299 state: stateError,
300 err: errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]),
301 }, len(s)
302 }
303
304
305
306
307
308
309
310 case '<':
311 if i+3 < len(s) && bytes.Equal(commentStart, s[i:i+4]) {
312 c.state, i = stateJSHTMLOpenCmt, i+3
313 }
314 case '-':
315 if i+2 < len(s) && bytes.Equal(commentEnd, s[i:i+3]) {
316 c.state, i = stateJSHTMLCloseCmt, i+2
317 }
318
319 case '#':
320 if i+1 < len(s) && s[i+1] == '!' {
321 c.state, i = stateJSLineCmt, i+1
322 }
323 case '{':
324
325
326 if len(c.jsBraceDepth) == 0 {
327 return c, i + 1
328 }
329 c.jsBraceDepth[len(c.jsBraceDepth)-1]++
330 case '}':
331 if len(c.jsBraceDepth) == 0 {
332 return c, i + 1
333 }
334
335
336
337
338 c.jsBraceDepth[len(c.jsBraceDepth)-1]--
339 if c.jsBraceDepth[len(c.jsBraceDepth)-1] >= 0 {
340 return c, i + 1
341 }
342 c.jsBraceDepth = c.jsBraceDepth[:len(c.jsBraceDepth)-1]
343 c.state = stateJSTmplLit
344 default:
345 panic("unreachable")
346 }
347 return c, i + 1
348 }
349
350 func tJSTmpl(c context, s []byte) (context, int) {
351 var k int
352 for {
353 i := k + bytes.IndexAny(s[k:], "`\\$")
354 if i < k {
355 break
356 }
357 switch s[i] {
358 case '\\':
359 i++
360 if i == len(s) {
361 return context{
362 state: stateError,
363 err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s),
364 }, len(s)
365 }
366 case '$':
367 if len(s) >= i+2 && s[i+1] == '{' {
368 c.jsBraceDepth = append(c.jsBraceDepth, 0)
369 c.state = stateJS
370 return c, i + 2
371 }
372 case '`':
373
374 c.state = stateJS
375 return c, i + 1
376 }
377 k = i + 1
378 }
379
380 return c, len(s)
381 }
382
383
384
385 func tJSDelimited(c context, s []byte) (context, int) {
386 specials := `\"`
387 switch c.state {
388 case stateJSSqStr:
389 specials = `\'`
390 case stateJSRegexp:
391 specials = `\/[]`
392 }
393
394 k, inCharset := 0, false
395 for {
396 i := k + bytes.IndexAny(s[k:], specials)
397 if i < k {
398 break
399 }
400 switch s[i] {
401 case '\\':
402 i++
403 if i == len(s) {
404 return context{
405 state: stateError,
406 err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s),
407 }, len(s)
408 }
409 case '[':
410 inCharset = true
411 case ']':
412 inCharset = false
413 case '/':
414
415
416
417 if i > 0 && i+7 <= len(s) && bytes.Compare(bytes.ToLower(s[i-1:i+7]), []byte("</script")) == 0 {
418 i++
419 } else if !inCharset {
420 c.state, c.jsCtx = stateJS, jsCtxDivOp
421 return c, i + 1
422 }
423 default:
424
425 if !inCharset {
426 c.state, c.jsCtx = stateJS, jsCtxDivOp
427 return c, i + 1
428 }
429 }
430 k = i + 1
431 }
432
433 if inCharset {
434
435
436 return context{
437 state: stateError,
438 err: errorf(ErrPartialCharset, nil, 0, "unfinished JS regexp charset: %q", s),
439 }, len(s)
440 }
441
442 return c, len(s)
443 }
444
445 var blockCommentEnd = []byte("*/")
446
447
448 func tBlockCmt(c context, s []byte) (context, int) {
449 i := bytes.Index(s, blockCommentEnd)
450 if i == -1 {
451 return c, len(s)
452 }
453 switch c.state {
454 case stateJSBlockCmt:
455 c.state = stateJS
456 case stateCSSBlockCmt:
457 c.state = stateCSS
458 default:
459 panic(c.state.String())
460 }
461 return c, i + 2
462 }
463
464
465 func tLineCmt(c context, s []byte) (context, int) {
466 var lineTerminators string
467 var endState state
468 switch c.state {
469 case stateJSLineCmt, stateJSHTMLOpenCmt, stateJSHTMLCloseCmt:
470 lineTerminators, endState = "\n\r\u2028\u2029", stateJS
471 case stateCSSLineCmt:
472 lineTerminators, endState = "\n\f\r", stateCSS
473
474
475
476
477
478
479
480 default:
481 panic(c.state.String())
482 }
483
484 i := bytes.IndexAny(s, lineTerminators)
485 if i == -1 {
486 return c, len(s)
487 }
488 c.state = endState
489
490
491
492
493
494 return c, i
495 }
496
497
498 func tCSS(c context, s []byte) (context, int) {
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526 k := 0
527 for {
528 i := k + bytes.IndexAny(s[k:], `("'/`)
529 if i < k {
530 return c, len(s)
531 }
532 switch s[i] {
533 case '(':
534
535 p := bytes.TrimRight(s[:i], "\t\n\f\r ")
536 if endsWithCSSKeyword(p, "url") {
537 j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r "))
538 switch {
539 case j != len(s) && s[j] == '"':
540 c.state, j = stateCSSDqURL, j+1
541 case j != len(s) && s[j] == '\'':
542 c.state, j = stateCSSSqURL, j+1
543 default:
544 c.state = stateCSSURL
545 }
546 return c, j
547 }
548 case '/':
549 if i+1 < len(s) {
550 switch s[i+1] {
551 case '/':
552 c.state = stateCSSLineCmt
553 return c, i + 2
554 case '*':
555 c.state = stateCSSBlockCmt
556 return c, i + 2
557 }
558 }
559 case '"':
560 c.state = stateCSSDqStr
561 return c, i + 1
562 case '\'':
563 c.state = stateCSSSqStr
564 return c, i + 1
565 }
566 k = i + 1
567 }
568 }
569
570
571 func tCSSStr(c context, s []byte) (context, int) {
572 var endAndEsc string
573 switch c.state {
574 case stateCSSDqStr, stateCSSDqURL:
575 endAndEsc = `\"`
576 case stateCSSSqStr, stateCSSSqURL:
577 endAndEsc = `\'`
578 case stateCSSURL:
579
580
581 endAndEsc = "\\\t\n\f\r )"
582 default:
583 panic(c.state.String())
584 }
585
586 k := 0
587 for {
588 i := k + bytes.IndexAny(s[k:], endAndEsc)
589 if i < k {
590 c, nread := tURL(c, decodeCSS(s[k:]))
591 return c, k + nread
592 }
593 if s[i] == '\\' {
594 i++
595 if i == len(s) {
596 return context{
597 state: stateError,
598 err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in CSS string: %q", s),
599 }, len(s)
600 }
601 } else {
602 c.state = stateCSS
603 return c, i + 1
604 }
605 c, _ = tURL(c, decodeCSS(s[:i+1]))
606 k = i + 1
607 }
608 }
609
610
611 func tError(c context, s []byte) (context, int) {
612 return c, len(s)
613 }
614
615
616
617
618
619 func eatAttrName(s []byte, i int) (int, *Error) {
620 for j := i; j < len(s); j++ {
621 switch s[j] {
622 case ' ', '\t', '\n', '\f', '\r', '=', '>':
623 return j, nil
624 case '\'', '"', '<':
625
626
627
628 return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s)
629 default:
630
631 }
632 }
633 return len(s), nil
634 }
635
636 var elementNameMap = map[string]element{
637 "script": elementScript,
638 "style": elementStyle,
639 "textarea": elementTextarea,
640 "title": elementTitle,
641 }
642
643
644 func asciiAlpha(c byte) bool {
645 return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
646 }
647
648
649 func asciiAlphaNum(c byte) bool {
650 return asciiAlpha(c) || '0' <= c && c <= '9'
651 }
652
653
654 func eatTagName(s []byte, i int) (int, element) {
655 if i == len(s) || !asciiAlpha(s[i]) {
656 return i, elementNone
657 }
658 j := i + 1
659 for j < len(s) {
660 x := s[j]
661 if asciiAlphaNum(x) {
662 j++
663 continue
664 }
665
666 if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
667 j += 2
668 continue
669 }
670 break
671 }
672 return j, elementNameMap[strings.ToLower(string(s[i:j]))]
673 }
674
675
676 func eatWhiteSpace(s []byte, i int) int {
677 for j := i; j < len(s); j++ {
678 switch s[j] {
679 case ' ', '\t', '\n', '\f', '\r':
680
681 default:
682 return j
683 }
684 }
685 return len(s)
686 }
687
View as plain text