1
2
3
4
5 package html
6
7 import (
8 "errors"
9 "fmt"
10 "io"
11 "strings"
12
13 a "golang.org/x/net/html/atom"
14 )
15
16
17
18 type parser struct {
19
20 tokenizer *Tokenizer
21
22 tok Token
23
24
25 hasSelfClosingToken bool
26
27 doc *Node
28
29
30 oe, afe nodeStack
31
32 head, form *Node
33
34 scripting, framesetOK bool
35
36 templateStack insertionModeStack
37
38 im insertionMode
39
40
41 originalIM insertionMode
42
43
44 fosterParenting bool
45
46 quirks bool
47
48 fragment bool
49
50
51 context *Node
52 }
53
54 func (p *parser) top() *Node {
55 if n := p.oe.top(); n != nil {
56 return n
57 }
58 return p.doc
59 }
60
61
62 var (
63 defaultScopeStopTags = map[string][]a.Atom{
64 "": {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template},
65 "math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext},
66 "svg": {a.Desc, a.ForeignObject, a.Title},
67 }
68 )
69
70 type scope int
71
72 const (
73 defaultScope scope = iota
74 listItemScope
75 buttonScope
76 tableScope
77 tableRowScope
78 tableBodyScope
79 selectScope
80 )
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99 func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool {
100 if i := p.indexOfElementInScope(s, matchTags...); i != -1 {
101 p.oe = p.oe[:i]
102 return true
103 }
104 return false
105 }
106
107
108
109
110 func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int {
111 for i := len(p.oe) - 1; i >= 0; i-- {
112 tagAtom := p.oe[i].DataAtom
113 if p.oe[i].Namespace == "" {
114 for _, t := range matchTags {
115 if t == tagAtom {
116 return i
117 }
118 }
119 switch s {
120 case defaultScope:
121
122 case listItemScope:
123 if tagAtom == a.Ol || tagAtom == a.Ul {
124 return -1
125 }
126 case buttonScope:
127 if tagAtom == a.Button {
128 return -1
129 }
130 case tableScope:
131 if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
132 return -1
133 }
134 case selectScope:
135 if tagAtom != a.Optgroup && tagAtom != a.Option {
136 return -1
137 }
138 default:
139 panic("unreachable")
140 }
141 }
142 switch s {
143 case defaultScope, listItemScope, buttonScope:
144 for _, t := range defaultScopeStopTags[p.oe[i].Namespace] {
145 if t == tagAtom {
146 return -1
147 }
148 }
149 }
150 }
151 return -1
152 }
153
154
155
156 func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool {
157 return p.indexOfElementInScope(s, matchTags...) != -1
158 }
159
160
161
162 func (p *parser) clearStackToContext(s scope) {
163 for i := len(p.oe) - 1; i >= 0; i-- {
164 tagAtom := p.oe[i].DataAtom
165 switch s {
166 case tableScope:
167 if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
168 p.oe = p.oe[:i+1]
169 return
170 }
171 case tableRowScope:
172 if tagAtom == a.Html || tagAtom == a.Tr || tagAtom == a.Template {
173 p.oe = p.oe[:i+1]
174 return
175 }
176 case tableBodyScope:
177 if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead || tagAtom == a.Template {
178 p.oe = p.oe[:i+1]
179 return
180 }
181 default:
182 panic("unreachable")
183 }
184 }
185 }
186
187
188
189
190
191
192 func (p *parser) parseGenericRawTextElement() {
193 p.addElement()
194 p.originalIM = p.im
195 p.im = textIM
196 }
197
198
199
200
201 func (p *parser) generateImpliedEndTags(exceptions ...string) {
202 var i int
203 loop:
204 for i = len(p.oe) - 1; i >= 0; i-- {
205 n := p.oe[i]
206 if n.Type != ElementNode {
207 break
208 }
209 switch n.DataAtom {
210 case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:
211 for _, except := range exceptions {
212 if n.Data == except {
213 break loop
214 }
215 }
216 continue
217 }
218 break
219 }
220
221 p.oe = p.oe[:i+1]
222 }
223
224
225
226 func (p *parser) addChild(n *Node) {
227 if p.shouldFosterParent() {
228 p.fosterParent(n)
229 } else {
230 p.top().AppendChild(n)
231 }
232
233 if n.Type == ElementNode {
234 p.oe = append(p.oe, n)
235 }
236 }
237
238
239
240 func (p *parser) shouldFosterParent() bool {
241 if p.fosterParenting {
242 switch p.top().DataAtom {
243 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
244 return true
245 }
246 }
247 return false
248 }
249
250
251
252 func (p *parser) fosterParent(n *Node) {
253 var table, parent, prev, template *Node
254 var i int
255 for i = len(p.oe) - 1; i >= 0; i-- {
256 if p.oe[i].DataAtom == a.Table {
257 table = p.oe[i]
258 break
259 }
260 }
261
262 var j int
263 for j = len(p.oe) - 1; j >= 0; j-- {
264 if p.oe[j].DataAtom == a.Template {
265 template = p.oe[j]
266 break
267 }
268 }
269
270 if template != nil && (table == nil || j > i) {
271 template.AppendChild(n)
272 return
273 }
274
275 if table == nil {
276
277 parent = p.oe[0]
278 } else {
279 parent = table.Parent
280 }
281 if parent == nil {
282 parent = p.oe[i-1]
283 }
284
285 if table != nil {
286 prev = table.PrevSibling
287 } else {
288 prev = parent.LastChild
289 }
290 if prev != nil && prev.Type == TextNode && n.Type == TextNode {
291 prev.Data += n.Data
292 return
293 }
294
295 parent.InsertBefore(n, table)
296 }
297
298
299
300 func (p *parser) addText(text string) {
301 if text == "" {
302 return
303 }
304
305 if p.shouldFosterParent() {
306 p.fosterParent(&Node{
307 Type: TextNode,
308 Data: text,
309 })
310 return
311 }
312
313 t := p.top()
314 if n := t.LastChild; n != nil && n.Type == TextNode {
315 n.Data += text
316 return
317 }
318 p.addChild(&Node{
319 Type: TextNode,
320 Data: text,
321 })
322 }
323
324
325 func (p *parser) addElement() {
326 p.addChild(&Node{
327 Type: ElementNode,
328 DataAtom: p.tok.DataAtom,
329 Data: p.tok.Data,
330 Attr: p.tok.Attr,
331 })
332 }
333
334
335 func (p *parser) addFormattingElement() {
336 tagAtom, attr := p.tok.DataAtom, p.tok.Attr
337 p.addElement()
338
339
340 identicalElements := 0
341 findIdenticalElements:
342 for i := len(p.afe) - 1; i >= 0; i-- {
343 n := p.afe[i]
344 if n.Type == scopeMarkerNode {
345 break
346 }
347 if n.Type != ElementNode {
348 continue
349 }
350 if n.Namespace != "" {
351 continue
352 }
353 if n.DataAtom != tagAtom {
354 continue
355 }
356 if len(n.Attr) != len(attr) {
357 continue
358 }
359 compareAttributes:
360 for _, t0 := range n.Attr {
361 for _, t1 := range attr {
362 if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val {
363
364 continue compareAttributes
365 }
366 }
367
368
369 continue findIdenticalElements
370 }
371
372 identicalElements++
373 if identicalElements >= 3 {
374 p.afe.remove(n)
375 }
376 }
377
378 p.afe = append(p.afe, p.top())
379 }
380
381
382 func (p *parser) clearActiveFormattingElements() {
383 for {
384 if n := p.afe.pop(); len(p.afe) == 0 || n.Type == scopeMarkerNode {
385 return
386 }
387 }
388 }
389
390
391 func (p *parser) reconstructActiveFormattingElements() {
392 n := p.afe.top()
393 if n == nil {
394 return
395 }
396 if n.Type == scopeMarkerNode || p.oe.index(n) != -1 {
397 return
398 }
399 i := len(p.afe) - 1
400 for n.Type != scopeMarkerNode && p.oe.index(n) == -1 {
401 if i == 0 {
402 i = -1
403 break
404 }
405 i--
406 n = p.afe[i]
407 }
408 for {
409 i++
410 clone := p.afe[i].clone()
411 p.addChild(clone)
412 p.afe[i] = clone
413 if i == len(p.afe)-1 {
414 break
415 }
416 }
417 }
418
419
420 func (p *parser) acknowledgeSelfClosingTag() {
421 p.hasSelfClosingToken = false
422 }
423
424
425
426
427
428 type insertionMode func(*parser) bool
429
430
431
432
433 func (p *parser) setOriginalIM() {
434 if p.originalIM != nil {
435 panic("html: bad parser state: originalIM was set twice")
436 }
437 p.originalIM = p.im
438 }
439
440
441 func (p *parser) resetInsertionMode() {
442 for i := len(p.oe) - 1; i >= 0; i-- {
443 n := p.oe[i]
444 last := i == 0
445 if last && p.context != nil {
446 n = p.context
447 }
448
449 switch n.DataAtom {
450 case a.Select:
451 if !last {
452 for ancestor, first := n, p.oe[0]; ancestor != first; {
453 ancestor = p.oe[p.oe.index(ancestor)-1]
454 switch ancestor.DataAtom {
455 case a.Template:
456 p.im = inSelectIM
457 return
458 case a.Table:
459 p.im = inSelectInTableIM
460 return
461 }
462 }
463 }
464 p.im = inSelectIM
465 case a.Td, a.Th:
466
467
468
469 p.im = inCellIM
470 case a.Tr:
471 p.im = inRowIM
472 case a.Tbody, a.Thead, a.Tfoot:
473 p.im = inTableBodyIM
474 case a.Caption:
475 p.im = inCaptionIM
476 case a.Colgroup:
477 p.im = inColumnGroupIM
478 case a.Table:
479 p.im = inTableIM
480 case a.Template:
481
482 if n.Namespace != "" {
483 continue
484 }
485 p.im = p.templateStack.top()
486 case a.Head:
487
488
489
490 p.im = inHeadIM
491 case a.Body:
492 p.im = inBodyIM
493 case a.Frameset:
494 p.im = inFramesetIM
495 case a.Html:
496 if p.head == nil {
497 p.im = beforeHeadIM
498 } else {
499 p.im = afterHeadIM
500 }
501 default:
502 if last {
503 p.im = inBodyIM
504 return
505 }
506 continue
507 }
508 return
509 }
510 }
511
512 const whitespace = " \t\r\n\f"
513
514
515 func initialIM(p *parser) bool {
516 switch p.tok.Type {
517 case TextToken:
518 p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
519 if len(p.tok.Data) == 0 {
520
521 return true
522 }
523 case CommentToken:
524 p.doc.AppendChild(&Node{
525 Type: CommentNode,
526 Data: p.tok.Data,
527 })
528 return true
529 case DoctypeToken:
530 n, quirks := parseDoctype(p.tok.Data)
531 p.doc.AppendChild(n)
532 p.quirks = quirks
533 p.im = beforeHTMLIM
534 return true
535 }
536 p.quirks = true
537 p.im = beforeHTMLIM
538 return false
539 }
540
541
542 func beforeHTMLIM(p *parser) bool {
543 switch p.tok.Type {
544 case DoctypeToken:
545
546 return true
547 case TextToken:
548 p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
549 if len(p.tok.Data) == 0 {
550
551 return true
552 }
553 case StartTagToken:
554 if p.tok.DataAtom == a.Html {
555 p.addElement()
556 p.im = beforeHeadIM
557 return true
558 }
559 case EndTagToken:
560 switch p.tok.DataAtom {
561 case a.Head, a.Body, a.Html, a.Br:
562 p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
563 return false
564 default:
565
566 return true
567 }
568 case CommentToken:
569 p.doc.AppendChild(&Node{
570 Type: CommentNode,
571 Data: p.tok.Data,
572 })
573 return true
574 }
575 p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
576 return false
577 }
578
579
580 func beforeHeadIM(p *parser) bool {
581 switch p.tok.Type {
582 case TextToken:
583 p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
584 if len(p.tok.Data) == 0 {
585
586 return true
587 }
588 case StartTagToken:
589 switch p.tok.DataAtom {
590 case a.Head:
591 p.addElement()
592 p.head = p.top()
593 p.im = inHeadIM
594 return true
595 case a.Html:
596 return inBodyIM(p)
597 }
598 case EndTagToken:
599 switch p.tok.DataAtom {
600 case a.Head, a.Body, a.Html, a.Br:
601 p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
602 return false
603 default:
604
605 return true
606 }
607 case CommentToken:
608 p.addChild(&Node{
609 Type: CommentNode,
610 Data: p.tok.Data,
611 })
612 return true
613 case DoctypeToken:
614
615 return true
616 }
617
618 p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
619 return false
620 }
621
622
623 func inHeadIM(p *parser) bool {
624 switch p.tok.Type {
625 case TextToken:
626 s := strings.TrimLeft(p.tok.Data, whitespace)
627 if len(s) < len(p.tok.Data) {
628
629 p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
630 if s == "" {
631 return true
632 }
633 p.tok.Data = s
634 }
635 case StartTagToken:
636 switch p.tok.DataAtom {
637 case a.Html:
638 return inBodyIM(p)
639 case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta:
640 p.addElement()
641 p.oe.pop()
642 p.acknowledgeSelfClosingTag()
643 return true
644 case a.Noscript:
645 if p.scripting {
646 p.parseGenericRawTextElement()
647 return true
648 }
649 p.addElement()
650 p.im = inHeadNoscriptIM
651
652 p.tokenizer.NextIsNotRawText()
653 return true
654 case a.Script, a.Title:
655 p.addElement()
656 p.setOriginalIM()
657 p.im = textIM
658 return true
659 case a.Noframes, a.Style:
660 p.parseGenericRawTextElement()
661 return true
662 case a.Head:
663
664 return true
665 case a.Template:
666
667
668
669
670
671
672
673
674
675
676
677 for _, e := range p.oe {
678 if e.Namespace != "" {
679 p.im = ignoreTheRemainingTokens
680 return true
681 }
682 }
683
684 p.addElement()
685 p.afe = append(p.afe, &scopeMarker)
686 p.framesetOK = false
687 p.im = inTemplateIM
688 p.templateStack = append(p.templateStack, inTemplateIM)
689 return true
690 }
691 case EndTagToken:
692 switch p.tok.DataAtom {
693 case a.Head:
694 p.oe.pop()
695 p.im = afterHeadIM
696 return true
697 case a.Body, a.Html, a.Br:
698 p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
699 return false
700 case a.Template:
701 if !p.oe.contains(a.Template) {
702 return true
703 }
704
705
706
707 p.generateImpliedEndTags()
708 for i := len(p.oe) - 1; i >= 0; i-- {
709 if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
710 p.oe = p.oe[:i]
711 break
712 }
713 }
714 p.clearActiveFormattingElements()
715 p.templateStack.pop()
716 p.resetInsertionMode()
717 return true
718 default:
719
720 return true
721 }
722 case CommentToken:
723 p.addChild(&Node{
724 Type: CommentNode,
725 Data: p.tok.Data,
726 })
727 return true
728 case DoctypeToken:
729
730 return true
731 }
732
733 p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
734 return false
735 }
736
737
738 func inHeadNoscriptIM(p *parser) bool {
739 switch p.tok.Type {
740 case DoctypeToken:
741
742 return true
743 case StartTagToken:
744 switch p.tok.DataAtom {
745 case a.Html:
746 return inBodyIM(p)
747 case a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Style:
748 return inHeadIM(p)
749 case a.Head:
750
751 return true
752 case a.Noscript:
753
754
755 p.tokenizer.NextIsNotRawText()
756
757 return true
758 }
759 case EndTagToken:
760 switch p.tok.DataAtom {
761 case a.Noscript, a.Br:
762 default:
763
764 return true
765 }
766 case TextToken:
767 s := strings.TrimLeft(p.tok.Data, whitespace)
768 if len(s) == 0 {
769
770 return inHeadIM(p)
771 }
772 case CommentToken:
773 return inHeadIM(p)
774 }
775 p.oe.pop()
776 if p.top().DataAtom != a.Head {
777 panic("html: the new current node will be a head element.")
778 }
779 p.im = inHeadIM
780 if p.tok.DataAtom == a.Noscript {
781 return true
782 }
783 return false
784 }
785
786
787 func afterHeadIM(p *parser) bool {
788 switch p.tok.Type {
789 case TextToken:
790 s := strings.TrimLeft(p.tok.Data, whitespace)
791 if len(s) < len(p.tok.Data) {
792
793 p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
794 if s == "" {
795 return true
796 }
797 p.tok.Data = s
798 }
799 case StartTagToken:
800 switch p.tok.DataAtom {
801 case a.Html:
802 return inBodyIM(p)
803 case a.Body:
804 p.addElement()
805 p.framesetOK = false
806 p.im = inBodyIM
807 return true
808 case a.Frameset:
809 p.addElement()
810 p.im = inFramesetIM
811 return true
812 case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
813 p.oe = append(p.oe, p.head)
814 defer p.oe.remove(p.head)
815 return inHeadIM(p)
816 case a.Head:
817
818 return true
819 }
820 case EndTagToken:
821 switch p.tok.DataAtom {
822 case a.Body, a.Html, a.Br:
823
824 case a.Template:
825 return inHeadIM(p)
826 default:
827
828 return true
829 }
830 case CommentToken:
831 p.addChild(&Node{
832 Type: CommentNode,
833 Data: p.tok.Data,
834 })
835 return true
836 case DoctypeToken:
837
838 return true
839 }
840
841 p.parseImpliedToken(StartTagToken, a.Body, a.Body.String())
842 p.framesetOK = true
843 return false
844 }
845
846
847 func copyAttributes(dst *Node, src Token) {
848 if len(src.Attr) == 0 {
849 return
850 }
851 attr := map[string]string{}
852 for _, t := range dst.Attr {
853 attr[t.Key] = t.Val
854 }
855 for _, t := range src.Attr {
856 if _, ok := attr[t.Key]; !ok {
857 dst.Attr = append(dst.Attr, t)
858 attr[t.Key] = t.Val
859 }
860 }
861 }
862
863
864 func inBodyIM(p *parser) bool {
865 switch p.tok.Type {
866 case TextToken:
867 d := p.tok.Data
868 switch n := p.oe.top(); n.DataAtom {
869 case a.Pre, a.Listing:
870 if n.FirstChild == nil {
871
872 if d != "" && d[0] == '\r' {
873 d = d[1:]
874 }
875 if d != "" && d[0] == '\n' {
876 d = d[1:]
877 }
878 }
879 }
880 d = strings.Replace(d, "\x00", "", -1)
881 if d == "" {
882 return true
883 }
884 p.reconstructActiveFormattingElements()
885 p.addText(d)
886 if p.framesetOK && strings.TrimLeft(d, whitespace) != "" {
887
888 p.framesetOK = false
889 }
890 case StartTagToken:
891 switch p.tok.DataAtom {
892 case a.Html:
893 if p.oe.contains(a.Template) {
894 return true
895 }
896 copyAttributes(p.oe[0], p.tok)
897 case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
898 return inHeadIM(p)
899 case a.Body:
900 if p.oe.contains(a.Template) {
901 return true
902 }
903 if len(p.oe) >= 2 {
904 body := p.oe[1]
905 if body.Type == ElementNode && body.DataAtom == a.Body {
906 p.framesetOK = false
907 copyAttributes(body, p.tok)
908 }
909 }
910 case a.Frameset:
911 if !p.framesetOK || len(p.oe) < 2 || p.oe[1].DataAtom != a.Body {
912
913 return true
914 }
915 body := p.oe[1]
916 if body.Parent != nil {
917 body.Parent.RemoveChild(body)
918 }
919 p.oe = p.oe[:1]
920 p.addElement()
921 p.im = inFramesetIM
922 return true
923 case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Main, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
924 p.popUntil(buttonScope, a.P)
925 p.addElement()
926 case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
927 p.popUntil(buttonScope, a.P)
928 switch n := p.top(); n.DataAtom {
929 case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
930 p.oe.pop()
931 }
932 p.addElement()
933 case a.Pre, a.Listing:
934 p.popUntil(buttonScope, a.P)
935 p.addElement()
936
937 p.framesetOK = false
938 case a.Form:
939 if p.form != nil && !p.oe.contains(a.Template) {
940
941 return true
942 }
943 p.popUntil(buttonScope, a.P)
944 p.addElement()
945 if !p.oe.contains(a.Template) {
946 p.form = p.top()
947 }
948 case a.Li:
949 p.framesetOK = false
950 for i := len(p.oe) - 1; i >= 0; i-- {
951 node := p.oe[i]
952 switch node.DataAtom {
953 case a.Li:
954 p.oe = p.oe[:i]
955 case a.Address, a.Div, a.P:
956 continue
957 default:
958 if !isSpecialElement(node) {
959 continue
960 }
961 }
962 break
963 }
964 p.popUntil(buttonScope, a.P)
965 p.addElement()
966 case a.Dd, a.Dt:
967 p.framesetOK = false
968 for i := len(p.oe) - 1; i >= 0; i-- {
969 node := p.oe[i]
970 switch node.DataAtom {
971 case a.Dd, a.Dt:
972 p.oe = p.oe[:i]
973 case a.Address, a.Div, a.P:
974 continue
975 default:
976 if !isSpecialElement(node) {
977 continue
978 }
979 }
980 break
981 }
982 p.popUntil(buttonScope, a.P)
983 p.addElement()
984 case a.Plaintext:
985 p.popUntil(buttonScope, a.P)
986 p.addElement()
987 case a.Button:
988 p.popUntil(defaultScope, a.Button)
989 p.reconstructActiveFormattingElements()
990 p.addElement()
991 p.framesetOK = false
992 case a.A:
993 for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
994 if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A {
995 p.inBodyEndTagFormatting(a.A, "a")
996 p.oe.remove(n)
997 p.afe.remove(n)
998 break
999 }
1000 }
1001 p.reconstructActiveFormattingElements()
1002 p.addFormattingElement()
1003 case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
1004 p.reconstructActiveFormattingElements()
1005 p.addFormattingElement()
1006 case a.Nobr:
1007 p.reconstructActiveFormattingElements()
1008 if p.elementInScope(defaultScope, a.Nobr) {
1009 p.inBodyEndTagFormatting(a.Nobr, "nobr")
1010 p.reconstructActiveFormattingElements()
1011 }
1012 p.addFormattingElement()
1013 case a.Applet, a.Marquee, a.Object:
1014 p.reconstructActiveFormattingElements()
1015 p.addElement()
1016 p.afe = append(p.afe, &scopeMarker)
1017 p.framesetOK = false
1018 case a.Table:
1019 if !p.quirks {
1020 p.popUntil(buttonScope, a.P)
1021 }
1022 p.addElement()
1023 p.framesetOK = false
1024 p.im = inTableIM
1025 return true
1026 case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr:
1027 p.reconstructActiveFormattingElements()
1028 p.addElement()
1029 p.oe.pop()
1030 p.acknowledgeSelfClosingTag()
1031 if p.tok.DataAtom == a.Input {
1032 for _, t := range p.tok.Attr {
1033 if t.Key == "type" {
1034 if strings.ToLower(t.Val) == "hidden" {
1035
1036 return true
1037 }
1038 }
1039 }
1040 }
1041 p.framesetOK = false
1042 case a.Param, a.Source, a.Track:
1043 p.addElement()
1044 p.oe.pop()
1045 p.acknowledgeSelfClosingTag()
1046 case a.Hr:
1047 p.popUntil(buttonScope, a.P)
1048 p.addElement()
1049 p.oe.pop()
1050 p.acknowledgeSelfClosingTag()
1051 p.framesetOK = false
1052 case a.Image:
1053 p.tok.DataAtom = a.Img
1054 p.tok.Data = a.Img.String()
1055 return false
1056 case a.Textarea:
1057 p.addElement()
1058 p.setOriginalIM()
1059 p.framesetOK = false
1060 p.im = textIM
1061 case a.Xmp:
1062 p.popUntil(buttonScope, a.P)
1063 p.reconstructActiveFormattingElements()
1064 p.framesetOK = false
1065 p.parseGenericRawTextElement()
1066 case a.Iframe:
1067 p.framesetOK = false
1068 p.parseGenericRawTextElement()
1069 case a.Noembed:
1070 p.parseGenericRawTextElement()
1071 case a.Noscript:
1072 if p.scripting {
1073 p.parseGenericRawTextElement()
1074 return true
1075 }
1076 p.reconstructActiveFormattingElements()
1077 p.addElement()
1078
1079 p.tokenizer.NextIsNotRawText()
1080 case a.Select:
1081 p.reconstructActiveFormattingElements()
1082 p.addElement()
1083 p.framesetOK = false
1084 p.im = inSelectIM
1085 return true
1086 case a.Optgroup, a.Option:
1087 if p.top().DataAtom == a.Option {
1088 p.oe.pop()
1089 }
1090 p.reconstructActiveFormattingElements()
1091 p.addElement()
1092 case a.Rb, a.Rtc:
1093 if p.elementInScope(defaultScope, a.Ruby) {
1094 p.generateImpliedEndTags()
1095 }
1096 p.addElement()
1097 case a.Rp, a.Rt:
1098 if p.elementInScope(defaultScope, a.Ruby) {
1099 p.generateImpliedEndTags("rtc")
1100 }
1101 p.addElement()
1102 case a.Math, a.Svg:
1103 p.reconstructActiveFormattingElements()
1104 if p.tok.DataAtom == a.Math {
1105 adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
1106 } else {
1107 adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
1108 }
1109 adjustForeignAttributes(p.tok.Attr)
1110 p.addElement()
1111 p.top().Namespace = p.tok.Data
1112 if p.hasSelfClosingToken {
1113 p.oe.pop()
1114 p.acknowledgeSelfClosingTag()
1115 }
1116 return true
1117 case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1118
1119 default:
1120 p.reconstructActiveFormattingElements()
1121 p.addElement()
1122 }
1123 case EndTagToken:
1124 switch p.tok.DataAtom {
1125 case a.Body:
1126 if p.elementInScope(defaultScope, a.Body) {
1127 p.im = afterBodyIM
1128 }
1129 case a.Html:
1130 if p.elementInScope(defaultScope, a.Body) {
1131 p.parseImpliedToken(EndTagToken, a.Body, a.Body.String())
1132 return false
1133 }
1134 return true
1135 case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Main, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
1136 p.popUntil(defaultScope, p.tok.DataAtom)
1137 case a.Form:
1138 if p.oe.contains(a.Template) {
1139 i := p.indexOfElementInScope(defaultScope, a.Form)
1140 if i == -1 {
1141
1142 return true
1143 }
1144 p.generateImpliedEndTags()
1145 if p.oe[i].DataAtom != a.Form {
1146
1147 return true
1148 }
1149 p.popUntil(defaultScope, a.Form)
1150 } else {
1151 node := p.form
1152 p.form = nil
1153 i := p.indexOfElementInScope(defaultScope, a.Form)
1154 if node == nil || i == -1 || p.oe[i] != node {
1155
1156 return true
1157 }
1158 p.generateImpliedEndTags()
1159 p.oe.remove(node)
1160 }
1161 case a.P:
1162 if !p.elementInScope(buttonScope, a.P) {
1163 p.parseImpliedToken(StartTagToken, a.P, a.P.String())
1164 }
1165 p.popUntil(buttonScope, a.P)
1166 case a.Li:
1167 p.popUntil(listItemScope, a.Li)
1168 case a.Dd, a.Dt:
1169 p.popUntil(defaultScope, p.tok.DataAtom)
1170 case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
1171 p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6)
1172 case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
1173 p.inBodyEndTagFormatting(p.tok.DataAtom, p.tok.Data)
1174 case a.Applet, a.Marquee, a.Object:
1175 if p.popUntil(defaultScope, p.tok.DataAtom) {
1176 p.clearActiveFormattingElements()
1177 }
1178 case a.Br:
1179 p.tok.Type = StartTagToken
1180 return false
1181 case a.Template:
1182 return inHeadIM(p)
1183 default:
1184 p.inBodyEndTagOther(p.tok.DataAtom, p.tok.Data)
1185 }
1186 case CommentToken:
1187 p.addChild(&Node{
1188 Type: CommentNode,
1189 Data: p.tok.Data,
1190 })
1191 case ErrorToken:
1192
1193 if len(p.templateStack) > 0 {
1194 p.im = inTemplateIM
1195 return false
1196 }
1197 for _, e := range p.oe {
1198 switch e.DataAtom {
1199 case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
1200 a.Thead, a.Tr, a.Body, a.Html:
1201 default:
1202 return true
1203 }
1204 }
1205 }
1206
1207 return true
1208 }
1209
1210 func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
1211
1212
1213
1214
1215
1216
1217
1218
1219 if current := p.oe.top(); current.Data == tagName && p.afe.index(current) == -1 {
1220 p.oe.pop()
1221 return
1222 }
1223
1224
1225 for i := 0; i < 8; i++ {
1226
1227 var formattingElement *Node
1228 for j := len(p.afe) - 1; j >= 0; j-- {
1229 if p.afe[j].Type == scopeMarkerNode {
1230 break
1231 }
1232 if p.afe[j].DataAtom == tagAtom {
1233 formattingElement = p.afe[j]
1234 break
1235 }
1236 }
1237 if formattingElement == nil {
1238 p.inBodyEndTagOther(tagAtom, tagName)
1239 return
1240 }
1241
1242
1243 feIndex := p.oe.index(formattingElement)
1244 if feIndex == -1 {
1245 p.afe.remove(formattingElement)
1246 return
1247 }
1248
1249 if !p.elementInScope(defaultScope, tagAtom) {
1250
1251 return
1252 }
1253
1254
1255
1256
1257 var furthestBlock *Node
1258 for _, e := range p.oe[feIndex:] {
1259 if isSpecialElement(e) {
1260 furthestBlock = e
1261 break
1262 }
1263 }
1264 if furthestBlock == nil {
1265 e := p.oe.pop()
1266 for e != formattingElement {
1267 e = p.oe.pop()
1268 }
1269 p.afe.remove(e)
1270 return
1271 }
1272
1273
1274 commonAncestor := p.oe[feIndex-1]
1275 bookmark := p.afe.index(formattingElement)
1276
1277
1278 lastNode := furthestBlock
1279 node := furthestBlock
1280 x := p.oe.index(node)
1281
1282 j := 0
1283 for {
1284
1285 j++
1286
1287 x--
1288 node = p.oe[x]
1289
1290 if node == formattingElement {
1291 break
1292 }
1293
1294
1295
1296 if ni := p.afe.index(node); j > 3 && ni > -1 {
1297 p.afe.remove(node)
1298
1299
1300
1301
1302 if ni <= bookmark {
1303 bookmark--
1304 }
1305 continue
1306 }
1307
1308
1309 if p.afe.index(node) == -1 {
1310 p.oe.remove(node)
1311 continue
1312 }
1313
1314 clone := node.clone()
1315 p.afe[p.afe.index(node)] = clone
1316 p.oe[p.oe.index(node)] = clone
1317 node = clone
1318
1319 if lastNode == furthestBlock {
1320 bookmark = p.afe.index(node) + 1
1321 }
1322
1323 if lastNode.Parent != nil {
1324 lastNode.Parent.RemoveChild(lastNode)
1325 }
1326 node.AppendChild(lastNode)
1327
1328 lastNode = node
1329 }
1330
1331
1332
1333 if lastNode.Parent != nil {
1334 lastNode.Parent.RemoveChild(lastNode)
1335 }
1336 switch commonAncestor.DataAtom {
1337 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1338 p.fosterParent(lastNode)
1339 default:
1340 commonAncestor.AppendChild(lastNode)
1341 }
1342
1343
1344
1345 clone := formattingElement.clone()
1346 reparentChildren(clone, furthestBlock)
1347 furthestBlock.AppendChild(clone)
1348
1349
1350 if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
1351
1352 bookmark--
1353 }
1354 p.afe.remove(formattingElement)
1355 p.afe.insert(bookmark, clone)
1356
1357
1358 p.oe.remove(formattingElement)
1359 p.oe.insert(p.oe.index(furthestBlock)+1, clone)
1360 }
1361 }
1362
1363
1364
1365
1366 func (p *parser) inBodyEndTagOther(tagAtom a.Atom, tagName string) {
1367 for i := len(p.oe) - 1; i >= 0; i-- {
1368
1369
1370
1371
1372
1373
1374
1375 if (p.oe[i].DataAtom == tagAtom) &&
1376 ((tagAtom != 0) || (p.oe[i].Data == tagName)) {
1377 p.oe = p.oe[:i]
1378 break
1379 }
1380 if isSpecialElement(p.oe[i]) {
1381 break
1382 }
1383 }
1384 }
1385
1386
1387 func textIM(p *parser) bool {
1388 switch p.tok.Type {
1389 case ErrorToken:
1390 p.oe.pop()
1391 case TextToken:
1392 d := p.tok.Data
1393 if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil {
1394
1395 if d != "" && d[0] == '\r' {
1396 d = d[1:]
1397 }
1398 if d != "" && d[0] == '\n' {
1399 d = d[1:]
1400 }
1401 }
1402 if d == "" {
1403 return true
1404 }
1405 p.addText(d)
1406 return true
1407 case EndTagToken:
1408 p.oe.pop()
1409 }
1410 p.im = p.originalIM
1411 p.originalIM = nil
1412 return p.tok.Type == EndTagToken
1413 }
1414
1415
1416 func inTableIM(p *parser) bool {
1417 switch p.tok.Type {
1418 case TextToken:
1419 p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1)
1420 switch p.oe.top().DataAtom {
1421 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1422 if strings.Trim(p.tok.Data, whitespace) == "" {
1423 p.addText(p.tok.Data)
1424 return true
1425 }
1426 }
1427 case StartTagToken:
1428 switch p.tok.DataAtom {
1429 case a.Caption:
1430 p.clearStackToContext(tableScope)
1431 p.afe = append(p.afe, &scopeMarker)
1432 p.addElement()
1433 p.im = inCaptionIM
1434 return true
1435 case a.Colgroup:
1436 p.clearStackToContext(tableScope)
1437 p.addElement()
1438 p.im = inColumnGroupIM
1439 return true
1440 case a.Col:
1441 p.parseImpliedToken(StartTagToken, a.Colgroup, a.Colgroup.String())
1442 return false
1443 case a.Tbody, a.Tfoot, a.Thead:
1444 p.clearStackToContext(tableScope)
1445 p.addElement()
1446 p.im = inTableBodyIM
1447 return true
1448 case a.Td, a.Th, a.Tr:
1449 p.parseImpliedToken(StartTagToken, a.Tbody, a.Tbody.String())
1450 return false
1451 case a.Table:
1452 if p.popUntil(tableScope, a.Table) {
1453 p.resetInsertionMode()
1454 return false
1455 }
1456
1457 return true
1458 case a.Style, a.Script, a.Template:
1459 return inHeadIM(p)
1460 case a.Input:
1461 for _, t := range p.tok.Attr {
1462 if t.Key == "type" && strings.ToLower(t.Val) == "hidden" {
1463 p.addElement()
1464 p.oe.pop()
1465 return true
1466 }
1467 }
1468
1469 case a.Form:
1470 if p.oe.contains(a.Template) || p.form != nil {
1471
1472 return true
1473 }
1474 p.addElement()
1475 p.form = p.oe.pop()
1476 case a.Select:
1477 p.reconstructActiveFormattingElements()
1478 switch p.top().DataAtom {
1479 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1480 p.fosterParenting = true
1481 }
1482 p.addElement()
1483 p.fosterParenting = false
1484 p.framesetOK = false
1485 p.im = inSelectInTableIM
1486 return true
1487 }
1488 case EndTagToken:
1489 switch p.tok.DataAtom {
1490 case a.Table:
1491 if p.popUntil(tableScope, a.Table) {
1492 p.resetInsertionMode()
1493 return true
1494 }
1495
1496 return true
1497 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1498
1499 return true
1500 case a.Template:
1501 return inHeadIM(p)
1502 }
1503 case CommentToken:
1504 p.addChild(&Node{
1505 Type: CommentNode,
1506 Data: p.tok.Data,
1507 })
1508 return true
1509 case DoctypeToken:
1510
1511 return true
1512 case ErrorToken:
1513 return inBodyIM(p)
1514 }
1515
1516 p.fosterParenting = true
1517 defer func() { p.fosterParenting = false }()
1518
1519 return inBodyIM(p)
1520 }
1521
1522
1523 func inCaptionIM(p *parser) bool {
1524 switch p.tok.Type {
1525 case StartTagToken:
1526 switch p.tok.DataAtom {
1527 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:
1528 if !p.popUntil(tableScope, a.Caption) {
1529
1530 return true
1531 }
1532 p.clearActiveFormattingElements()
1533 p.im = inTableIM
1534 return false
1535 case a.Select:
1536 p.reconstructActiveFormattingElements()
1537 p.addElement()
1538 p.framesetOK = false
1539 p.im = inSelectInTableIM
1540 return true
1541 }
1542 case EndTagToken:
1543 switch p.tok.DataAtom {
1544 case a.Caption:
1545 if p.popUntil(tableScope, a.Caption) {
1546 p.clearActiveFormattingElements()
1547 p.im = inTableIM
1548 }
1549 return true
1550 case a.Table:
1551 if !p.popUntil(tableScope, a.Caption) {
1552
1553 return true
1554 }
1555 p.clearActiveFormattingElements()
1556 p.im = inTableIM
1557 return false
1558 case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1559
1560 return true
1561 }
1562 }
1563 return inBodyIM(p)
1564 }
1565
1566
1567 func inColumnGroupIM(p *parser) bool {
1568 switch p.tok.Type {
1569 case TextToken:
1570 s := strings.TrimLeft(p.tok.Data, whitespace)
1571 if len(s) < len(p.tok.Data) {
1572
1573 p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
1574 if s == "" {
1575 return true
1576 }
1577 p.tok.Data = s
1578 }
1579 case CommentToken:
1580 p.addChild(&Node{
1581 Type: CommentNode,
1582 Data: p.tok.Data,
1583 })
1584 return true
1585 case DoctypeToken:
1586
1587 return true
1588 case StartTagToken:
1589 switch p.tok.DataAtom {
1590 case a.Html:
1591 return inBodyIM(p)
1592 case a.Col:
1593 p.addElement()
1594 p.oe.pop()
1595 p.acknowledgeSelfClosingTag()
1596 return true
1597 case a.Template:
1598 return inHeadIM(p)
1599 }
1600 case EndTagToken:
1601 switch p.tok.DataAtom {
1602 case a.Colgroup:
1603 if p.oe.top().DataAtom == a.Colgroup {
1604 p.oe.pop()
1605 p.im = inTableIM
1606 }
1607 return true
1608 case a.Col:
1609
1610 return true
1611 case a.Template:
1612 return inHeadIM(p)
1613 }
1614 case ErrorToken:
1615 return inBodyIM(p)
1616 }
1617 if p.oe.top().DataAtom != a.Colgroup {
1618 return true
1619 }
1620 p.oe.pop()
1621 p.im = inTableIM
1622 return false
1623 }
1624
1625
1626 func inTableBodyIM(p *parser) bool {
1627 switch p.tok.Type {
1628 case StartTagToken:
1629 switch p.tok.DataAtom {
1630 case a.Tr:
1631 p.clearStackToContext(tableBodyScope)
1632 p.addElement()
1633 p.im = inRowIM
1634 return true
1635 case a.Td, a.Th:
1636 p.parseImpliedToken(StartTagToken, a.Tr, a.Tr.String())
1637 return false
1638 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
1639 if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
1640 p.im = inTableIM
1641 return false
1642 }
1643
1644 return true
1645 }
1646 case EndTagToken:
1647 switch p.tok.DataAtom {
1648 case a.Tbody, a.Tfoot, a.Thead:
1649 if p.elementInScope(tableScope, p.tok.DataAtom) {
1650 p.clearStackToContext(tableBodyScope)
1651 p.oe.pop()
1652 p.im = inTableIM
1653 }
1654 return true
1655 case a.Table:
1656 if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
1657 p.im = inTableIM
1658 return false
1659 }
1660
1661 return true
1662 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th, a.Tr:
1663
1664 return true
1665 }
1666 case CommentToken:
1667 p.addChild(&Node{
1668 Type: CommentNode,
1669 Data: p.tok.Data,
1670 })
1671 return true
1672 }
1673
1674 return inTableIM(p)
1675 }
1676
1677
1678 func inRowIM(p *parser) bool {
1679 switch p.tok.Type {
1680 case StartTagToken:
1681 switch p.tok.DataAtom {
1682 case a.Td, a.Th:
1683 p.clearStackToContext(tableRowScope)
1684 p.addElement()
1685 p.afe = append(p.afe, &scopeMarker)
1686 p.im = inCellIM
1687 return true
1688 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1689 if p.popUntil(tableScope, a.Tr) {
1690 p.im = inTableBodyIM
1691 return false
1692 }
1693
1694 return true
1695 }
1696 case EndTagToken:
1697 switch p.tok.DataAtom {
1698 case a.Tr:
1699 if p.popUntil(tableScope, a.Tr) {
1700 p.im = inTableBodyIM
1701 return true
1702 }
1703
1704 return true
1705 case a.Table:
1706 if p.popUntil(tableScope, a.Tr) {
1707 p.im = inTableBodyIM
1708 return false
1709 }
1710
1711 return true
1712 case a.Tbody, a.Tfoot, a.Thead:
1713 if p.elementInScope(tableScope, p.tok.DataAtom) {
1714 p.parseImpliedToken(EndTagToken, a.Tr, a.Tr.String())
1715 return false
1716 }
1717
1718 return true
1719 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th:
1720
1721 return true
1722 }
1723 }
1724
1725 return inTableIM(p)
1726 }
1727
1728
1729 func inCellIM(p *parser) bool {
1730 switch p.tok.Type {
1731 case StartTagToken:
1732 switch p.tok.DataAtom {
1733 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1734 if p.popUntil(tableScope, a.Td, a.Th) {
1735
1736 p.clearActiveFormattingElements()
1737 p.im = inRowIM
1738 return false
1739 }
1740
1741 return true
1742 case a.Select:
1743 p.reconstructActiveFormattingElements()
1744 p.addElement()
1745 p.framesetOK = false
1746 p.im = inSelectInTableIM
1747 return true
1748 }
1749 case EndTagToken:
1750 switch p.tok.DataAtom {
1751 case a.Td, a.Th:
1752 if !p.popUntil(tableScope, p.tok.DataAtom) {
1753
1754 return true
1755 }
1756 p.clearActiveFormattingElements()
1757 p.im = inRowIM
1758 return true
1759 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html:
1760
1761 return true
1762 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1763 if !p.elementInScope(tableScope, p.tok.DataAtom) {
1764
1765 return true
1766 }
1767
1768 if p.popUntil(tableScope, a.Td, a.Th) {
1769 p.clearActiveFormattingElements()
1770 }
1771 p.im = inRowIM
1772 return false
1773 }
1774 }
1775 return inBodyIM(p)
1776 }
1777
1778
1779 func inSelectIM(p *parser) bool {
1780 switch p.tok.Type {
1781 case TextToken:
1782 p.addText(strings.Replace(p.tok.Data, "\x00", "", -1))
1783 case StartTagToken:
1784 switch p.tok.DataAtom {
1785 case a.Html:
1786 return inBodyIM(p)
1787 case a.Option:
1788 if p.top().DataAtom == a.Option {
1789 p.oe.pop()
1790 }
1791 p.addElement()
1792 case a.Optgroup:
1793 if p.top().DataAtom == a.Option {
1794 p.oe.pop()
1795 }
1796 if p.top().DataAtom == a.Optgroup {
1797 p.oe.pop()
1798 }
1799 p.addElement()
1800 case a.Select:
1801 if !p.popUntil(selectScope, a.Select) {
1802
1803 return true
1804 }
1805 p.resetInsertionMode()
1806 case a.Input, a.Keygen, a.Textarea:
1807 if p.elementInScope(selectScope, a.Select) {
1808 p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
1809 return false
1810 }
1811
1812 p.tokenizer.NextIsNotRawText()
1813
1814 return true
1815 case a.Script, a.Template:
1816 return inHeadIM(p)
1817 case a.Iframe, a.Noembed, a.Noframes, a.Noscript, a.Plaintext, a.Style, a.Title, a.Xmp:
1818
1819
1820
1821 p.tokenizer.NextIsNotRawText()
1822
1823 return true
1824 }
1825 case EndTagToken:
1826 switch p.tok.DataAtom {
1827 case a.Option:
1828 if p.top().DataAtom == a.Option {
1829 p.oe.pop()
1830 }
1831 case a.Optgroup:
1832 i := len(p.oe) - 1
1833 if p.oe[i].DataAtom == a.Option {
1834 i--
1835 }
1836 if p.oe[i].DataAtom == a.Optgroup {
1837 p.oe = p.oe[:i]
1838 }
1839 case a.Select:
1840 if !p.popUntil(selectScope, a.Select) {
1841
1842 return true
1843 }
1844 p.resetInsertionMode()
1845 case a.Template:
1846 return inHeadIM(p)
1847 }
1848 case CommentToken:
1849 p.addChild(&Node{
1850 Type: CommentNode,
1851 Data: p.tok.Data,
1852 })
1853 case DoctypeToken:
1854
1855 return true
1856 case ErrorToken:
1857 return inBodyIM(p)
1858 }
1859
1860 return true
1861 }
1862
1863
1864 func inSelectInTableIM(p *parser) bool {
1865 switch p.tok.Type {
1866 case StartTagToken, EndTagToken:
1867 switch p.tok.DataAtom {
1868 case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th:
1869 if p.tok.Type == EndTagToken && !p.elementInScope(tableScope, p.tok.DataAtom) {
1870
1871 return true
1872 }
1873
1874
1875
1876
1877 for i := len(p.oe) - 1; i >= 0; i-- {
1878 if n := p.oe[i]; n.DataAtom == a.Select {
1879 p.oe = p.oe[:i]
1880 break
1881 }
1882 }
1883 p.resetInsertionMode()
1884 return false
1885 }
1886 }
1887 return inSelectIM(p)
1888 }
1889
1890
1891 func inTemplateIM(p *parser) bool {
1892 switch p.tok.Type {
1893 case TextToken, CommentToken, DoctypeToken:
1894 return inBodyIM(p)
1895 case StartTagToken:
1896 switch p.tok.DataAtom {
1897 case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
1898 return inHeadIM(p)
1899 case a.Caption, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
1900 p.templateStack.pop()
1901 p.templateStack = append(p.templateStack, inTableIM)
1902 p.im = inTableIM
1903 return false
1904 case a.Col:
1905 p.templateStack.pop()
1906 p.templateStack = append(p.templateStack, inColumnGroupIM)
1907 p.im = inColumnGroupIM
1908 return false
1909 case a.Tr:
1910 p.templateStack.pop()
1911 p.templateStack = append(p.templateStack, inTableBodyIM)
1912 p.im = inTableBodyIM
1913 return false
1914 case a.Td, a.Th:
1915 p.templateStack.pop()
1916 p.templateStack = append(p.templateStack, inRowIM)
1917 p.im = inRowIM
1918 return false
1919 default:
1920 p.templateStack.pop()
1921 p.templateStack = append(p.templateStack, inBodyIM)
1922 p.im = inBodyIM
1923 return false
1924 }
1925 case EndTagToken:
1926 switch p.tok.DataAtom {
1927 case a.Template:
1928 return inHeadIM(p)
1929 default:
1930
1931 return true
1932 }
1933 case ErrorToken:
1934 if !p.oe.contains(a.Template) {
1935
1936 return true
1937 }
1938
1939
1940
1941 p.generateImpliedEndTags()
1942 for i := len(p.oe) - 1; i >= 0; i-- {
1943 if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
1944 p.oe = p.oe[:i]
1945 break
1946 }
1947 }
1948 p.clearActiveFormattingElements()
1949 p.templateStack.pop()
1950 p.resetInsertionMode()
1951 return false
1952 }
1953 return false
1954 }
1955
1956
1957 func afterBodyIM(p *parser) bool {
1958 switch p.tok.Type {
1959 case ErrorToken:
1960
1961 return true
1962 case TextToken:
1963 s := strings.TrimLeft(p.tok.Data, whitespace)
1964 if len(s) == 0 {
1965
1966 return inBodyIM(p)
1967 }
1968 case StartTagToken:
1969 if p.tok.DataAtom == a.Html {
1970 return inBodyIM(p)
1971 }
1972 case EndTagToken:
1973 if p.tok.DataAtom == a.Html {
1974 if !p.fragment {
1975 p.im = afterAfterBodyIM
1976 }
1977 return true
1978 }
1979 case CommentToken:
1980
1981 if len(p.oe) < 1 || p.oe[0].DataAtom != a.Html {
1982 panic("html: bad parser state: <html> element not found, in the after-body insertion mode")
1983 }
1984 p.oe[0].AppendChild(&Node{
1985 Type: CommentNode,
1986 Data: p.tok.Data,
1987 })
1988 return true
1989 }
1990 p.im = inBodyIM
1991 return false
1992 }
1993
1994
1995 func inFramesetIM(p *parser) bool {
1996 switch p.tok.Type {
1997 case CommentToken:
1998 p.addChild(&Node{
1999 Type: CommentNode,
2000 Data: p.tok.Data,
2001 })
2002 case TextToken:
2003
2004 s := strings.Map(func(c rune) rune {
2005 switch c {
2006 case ' ', '\t', '\n', '\f', '\r':
2007 return c
2008 }
2009 return -1
2010 }, p.tok.Data)
2011 if s != "" {
2012 p.addText(s)
2013 }
2014 case StartTagToken:
2015 switch p.tok.DataAtom {
2016 case a.Html:
2017 return inBodyIM(p)
2018 case a.Frameset:
2019 p.addElement()
2020 case a.Frame:
2021 p.addElement()
2022 p.oe.pop()
2023 p.acknowledgeSelfClosingTag()
2024 case a.Noframes:
2025 return inHeadIM(p)
2026 }
2027 case EndTagToken:
2028 switch p.tok.DataAtom {
2029 case a.Frameset:
2030 if p.oe.top().DataAtom != a.Html {
2031 p.oe.pop()
2032 if p.oe.top().DataAtom != a.Frameset {
2033 p.im = afterFramesetIM
2034 return true
2035 }
2036 }
2037 }
2038 default:
2039
2040 }
2041 return true
2042 }
2043
2044
2045 func afterFramesetIM(p *parser) bool {
2046 switch p.tok.Type {
2047 case CommentToken:
2048 p.addChild(&Node{
2049 Type: CommentNode,
2050 Data: p.tok.Data,
2051 })
2052 case TextToken:
2053
2054 s := strings.Map(func(c rune) rune {
2055 switch c {
2056 case ' ', '\t', '\n', '\f', '\r':
2057 return c
2058 }
2059 return -1
2060 }, p.tok.Data)
2061 if s != "" {
2062 p.addText(s)
2063 }
2064 case StartTagToken:
2065 switch p.tok.DataAtom {
2066 case a.Html:
2067 return inBodyIM(p)
2068 case a.Noframes:
2069 return inHeadIM(p)
2070 }
2071 case EndTagToken:
2072 switch p.tok.DataAtom {
2073 case a.Html:
2074 p.im = afterAfterFramesetIM
2075 return true
2076 }
2077 default:
2078
2079 }
2080 return true
2081 }
2082
2083
2084 func afterAfterBodyIM(p *parser) bool {
2085 switch p.tok.Type {
2086 case ErrorToken:
2087
2088 return true
2089 case TextToken:
2090 s := strings.TrimLeft(p.tok.Data, whitespace)
2091 if len(s) == 0 {
2092
2093 return inBodyIM(p)
2094 }
2095 case StartTagToken:
2096 if p.tok.DataAtom == a.Html {
2097 return inBodyIM(p)
2098 }
2099 case CommentToken:
2100 p.doc.AppendChild(&Node{
2101 Type: CommentNode,
2102 Data: p.tok.Data,
2103 })
2104 return true
2105 case DoctypeToken:
2106 return inBodyIM(p)
2107 }
2108 p.im = inBodyIM
2109 return false
2110 }
2111
2112
2113 func afterAfterFramesetIM(p *parser) bool {
2114 switch p.tok.Type {
2115 case CommentToken:
2116 p.doc.AppendChild(&Node{
2117 Type: CommentNode,
2118 Data: p.tok.Data,
2119 })
2120 case TextToken:
2121
2122 s := strings.Map(func(c rune) rune {
2123 switch c {
2124 case ' ', '\t', '\n', '\f', '\r':
2125 return c
2126 }
2127 return -1
2128 }, p.tok.Data)
2129 if s != "" {
2130 p.tok.Data = s
2131 return inBodyIM(p)
2132 }
2133 case StartTagToken:
2134 switch p.tok.DataAtom {
2135 case a.Html:
2136 return inBodyIM(p)
2137 case a.Noframes:
2138 return inHeadIM(p)
2139 }
2140 case DoctypeToken:
2141 return inBodyIM(p)
2142 default:
2143
2144 }
2145 return true
2146 }
2147
2148 func ignoreTheRemainingTokens(p *parser) bool {
2149 return true
2150 }
2151
2152 const whitespaceOrNUL = whitespace + "\x00"
2153
2154
2155 func parseForeignContent(p *parser) bool {
2156 switch p.tok.Type {
2157 case TextToken:
2158 if p.framesetOK {
2159 p.framesetOK = strings.TrimLeft(p.tok.Data, whitespaceOrNUL) == ""
2160 }
2161 p.tok.Data = strings.Replace(p.tok.Data, "\x00", "\ufffd", -1)
2162 p.addText(p.tok.Data)
2163 case CommentToken:
2164 p.addChild(&Node{
2165 Type: CommentNode,
2166 Data: p.tok.Data,
2167 })
2168 case StartTagToken:
2169 if !p.fragment {
2170 b := breakout[p.tok.Data]
2171 if p.tok.DataAtom == a.Font {
2172 loop:
2173 for _, attr := range p.tok.Attr {
2174 switch attr.Key {
2175 case "color", "face", "size":
2176 b = true
2177 break loop
2178 }
2179 }
2180 }
2181 if b {
2182 for i := len(p.oe) - 1; i >= 0; i-- {
2183 n := p.oe[i]
2184 if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
2185 p.oe = p.oe[:i+1]
2186 break
2187 }
2188 }
2189 return false
2190 }
2191 }
2192 current := p.adjustedCurrentNode()
2193 switch current.Namespace {
2194 case "math":
2195 adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
2196 case "svg":
2197
2198
2199 if x := svgTagNameAdjustments[p.tok.Data]; x != "" {
2200 p.tok.DataAtom = a.Lookup([]byte(x))
2201 p.tok.Data = x
2202 }
2203 adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
2204 default:
2205 panic("html: bad parser state: unexpected namespace")
2206 }
2207 adjustForeignAttributes(p.tok.Attr)
2208 namespace := current.Namespace
2209 p.addElement()
2210 p.top().Namespace = namespace
2211 if namespace != "" {
2212
2213
2214 p.tokenizer.NextIsNotRawText()
2215 }
2216 if p.hasSelfClosingToken {
2217 p.oe.pop()
2218 p.acknowledgeSelfClosingTag()
2219 }
2220 case EndTagToken:
2221 for i := len(p.oe) - 1; i >= 0; i-- {
2222 if p.oe[i].Namespace == "" {
2223 return p.im(p)
2224 }
2225 if strings.EqualFold(p.oe[i].Data, p.tok.Data) {
2226 p.oe = p.oe[:i]
2227 break
2228 }
2229 }
2230 return true
2231 default:
2232
2233 }
2234 return true
2235 }
2236
2237
2238 func (p *parser) adjustedCurrentNode() *Node {
2239 if len(p.oe) == 1 && p.fragment && p.context != nil {
2240 return p.context
2241 }
2242 return p.oe.top()
2243 }
2244
2245
2246 func (p *parser) inForeignContent() bool {
2247 if len(p.oe) == 0 {
2248 return false
2249 }
2250 n := p.adjustedCurrentNode()
2251 if n.Namespace == "" {
2252 return false
2253 }
2254 if mathMLTextIntegrationPoint(n) {
2255 if p.tok.Type == StartTagToken && p.tok.DataAtom != a.Mglyph && p.tok.DataAtom != a.Malignmark {
2256 return false
2257 }
2258 if p.tok.Type == TextToken {
2259 return false
2260 }
2261 }
2262 if n.Namespace == "math" && n.DataAtom == a.AnnotationXml && p.tok.Type == StartTagToken && p.tok.DataAtom == a.Svg {
2263 return false
2264 }
2265 if htmlIntegrationPoint(n) && (p.tok.Type == StartTagToken || p.tok.Type == TextToken) {
2266 return false
2267 }
2268 if p.tok.Type == ErrorToken {
2269 return false
2270 }
2271 return true
2272 }
2273
2274
2275
2276 func (p *parser) parseImpliedToken(t TokenType, dataAtom a.Atom, data string) {
2277 realToken, selfClosing := p.tok, p.hasSelfClosingToken
2278 p.tok = Token{
2279 Type: t,
2280 DataAtom: dataAtom,
2281 Data: data,
2282 }
2283 p.hasSelfClosingToken = false
2284 p.parseCurrentToken()
2285 p.tok, p.hasSelfClosingToken = realToken, selfClosing
2286 }
2287
2288
2289
2290 func (p *parser) parseCurrentToken() {
2291 if p.tok.Type == SelfClosingTagToken {
2292 p.hasSelfClosingToken = true
2293 p.tok.Type = StartTagToken
2294 }
2295
2296 consumed := false
2297 for !consumed {
2298 if p.inForeignContent() {
2299 consumed = parseForeignContent(p)
2300 } else {
2301 consumed = p.im(p)
2302 }
2303 }
2304
2305 if p.hasSelfClosingToken {
2306
2307 p.hasSelfClosingToken = false
2308 }
2309 }
2310
2311 func (p *parser) parse() error {
2312
2313 var err error
2314 for err != io.EOF {
2315
2316 n := p.oe.top()
2317 p.tokenizer.AllowCDATA(n != nil && n.Namespace != "")
2318
2319 p.tokenizer.Next()
2320 p.tok = p.tokenizer.Token()
2321 if p.tok.Type == ErrorToken {
2322 err = p.tokenizer.Err()
2323 if err != nil && err != io.EOF {
2324 return err
2325 }
2326 }
2327 p.parseCurrentToken()
2328 }
2329 return nil
2330 }
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343 func Parse(r io.Reader) (*Node, error) {
2344 return ParseWithOptions(r)
2345 }
2346
2347
2348
2349
2350
2351
2352 func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
2353 return ParseFragmentWithOptions(r, context)
2354 }
2355
2356
2357 type ParseOption func(p *parser)
2358
2359
2360
2361
2362
2363 func ParseOptionEnableScripting(enable bool) ParseOption {
2364 return func(p *parser) {
2365 p.scripting = enable
2366 }
2367 }
2368
2369
2370 func ParseWithOptions(r io.Reader, opts ...ParseOption) (*Node, error) {
2371 p := &parser{
2372 tokenizer: NewTokenizer(r),
2373 doc: &Node{
2374 Type: DocumentNode,
2375 },
2376 scripting: true,
2377 framesetOK: true,
2378 im: initialIM,
2379 }
2380
2381 for _, f := range opts {
2382 f(p)
2383 }
2384
2385 if err := p.parse(); err != nil {
2386 return nil, err
2387 }
2388 return p.doc, nil
2389 }
2390
2391
2392 func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) ([]*Node, error) {
2393 contextTag := ""
2394 if context != nil {
2395 if context.Type != ElementNode {
2396 return nil, errors.New("html: ParseFragment of non-element Node")
2397 }
2398
2399
2400
2401 if context.DataAtom != a.Lookup([]byte(context.Data)) {
2402 return nil, fmt.Errorf("html: inconsistent Node: DataAtom=%q, Data=%q", context.DataAtom, context.Data)
2403 }
2404 contextTag = context.DataAtom.String()
2405 }
2406 p := &parser{
2407 doc: &Node{
2408 Type: DocumentNode,
2409 },
2410 scripting: true,
2411 fragment: true,
2412 context: context,
2413 }
2414 if context != nil && context.Namespace != "" {
2415 p.tokenizer = NewTokenizer(r)
2416 } else {
2417 p.tokenizer = NewTokenizerFragment(r, contextTag)
2418 }
2419
2420 for _, f := range opts {
2421 f(p)
2422 }
2423
2424 root := &Node{
2425 Type: ElementNode,
2426 DataAtom: a.Html,
2427 Data: a.Html.String(),
2428 }
2429 p.doc.AppendChild(root)
2430 p.oe = nodeStack{root}
2431 if context != nil && context.DataAtom == a.Template {
2432 p.templateStack = append(p.templateStack, inTemplateIM)
2433 }
2434 p.resetInsertionMode()
2435
2436 for n := context; n != nil; n = n.Parent {
2437 if n.Type == ElementNode && n.DataAtom == a.Form {
2438 p.form = n
2439 break
2440 }
2441 }
2442
2443 if err := p.parse(); err != nil {
2444 return nil, err
2445 }
2446
2447 parent := p.doc
2448 if context != nil {
2449 parent = root
2450 }
2451
2452 var result []*Node
2453 for c := parent.FirstChild; c != nil; {
2454 next := c.NextSibling
2455 parent.RemoveChild(c)
2456 result = append(result, c)
2457 c = next
2458 }
2459 return result, nil
2460 }
2461
View as plain text