...

Source file src/golang.org/x/net/html/parse.go

Documentation: golang.org/x/net/html

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package html
     6  
     7  import (
     8  	"errors"
     9  	"fmt"
    10  	"io"
    11  	"strings"
    12  
    13  	a "golang.org/x/net/html/atom"
    14  )
    15  
    16  // A parser implements the HTML5 parsing algorithm:
    17  // https://html.spec.whatwg.org/multipage/syntax.html#tree-construction
    18  type parser struct {
    19  	// tokenizer provides the tokens for the parser.
    20  	tokenizer *Tokenizer
    21  	// tok is the most recently read token.
    22  	tok Token
    23  	// Self-closing tags like <hr/> are treated as start tags, except that
    24  	// hasSelfClosingToken is set while they are being processed.
    25  	hasSelfClosingToken bool
    26  	// doc is the document root element.
    27  	doc *Node
    28  	// The stack of open elements (section 12.2.4.2) and active formatting
    29  	// elements (section 12.2.4.3).
    30  	oe, afe nodeStack
    31  	// Element pointers (section 12.2.4.4).
    32  	head, form *Node
    33  	// Other parsing state flags (section 12.2.4.5).
    34  	scripting, framesetOK bool
    35  	// The stack of template insertion modes
    36  	templateStack insertionModeStack
    37  	// im is the current insertion mode.
    38  	im insertionMode
    39  	// originalIM is the insertion mode to go back to after completing a text
    40  	// or inTableText insertion mode.
    41  	originalIM insertionMode
    42  	// fosterParenting is whether new elements should be inserted according to
    43  	// the foster parenting rules (section 12.2.6.1).
    44  	fosterParenting bool
    45  	// quirks is whether the parser is operating in "quirks mode."
    46  	quirks bool
    47  	// fragment is whether the parser is parsing an HTML fragment.
    48  	fragment bool
    49  	// context is the context element when parsing an HTML fragment
    50  	// (section 12.4).
    51  	context *Node
    52  }
    53  
    54  func (p *parser) top() *Node {
    55  	if n := p.oe.top(); n != nil {
    56  		return n
    57  	}
    58  	return p.doc
    59  }
    60  
    61  // Stop tags for use in popUntil. These come from section 12.2.4.2.
    62  var (
    63  	defaultScopeStopTags = map[string][]a.Atom{
    64  		"":     {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template},
    65  		"math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext},
    66  		"svg":  {a.Desc, a.ForeignObject, a.Title},
    67  	}
    68  )
    69  
    70  type scope int
    71  
    72  const (
    73  	defaultScope scope = iota
    74  	listItemScope
    75  	buttonScope
    76  	tableScope
    77  	tableRowScope
    78  	tableBodyScope
    79  	selectScope
    80  )
    81  
    82  // popUntil pops the stack of open elements at the highest element whose tag
    83  // is in matchTags, provided there is no higher element in the scope's stop
    84  // tags (as defined in section 12.2.4.2). It returns whether or not there was
    85  // such an element. If there was not, popUntil leaves the stack unchanged.
    86  //
    87  // For example, the set of stop tags for table scope is: "html", "table". If
    88  // the stack was:
    89  // ["html", "body", "font", "table", "b", "i", "u"]
    90  // then popUntil(tableScope, "font") would return false, but
    91  // popUntil(tableScope, "i") would return true and the stack would become:
    92  // ["html", "body", "font", "table", "b"]
    93  //
    94  // If an element's tag is in both the stop tags and matchTags, then the stack
    95  // will be popped and the function returns true (provided, of course, there was
    96  // no higher element in the stack that was also in the stop tags). For example,
    97  // popUntil(tableScope, "table") returns true and leaves:
    98  // ["html", "body", "font"]
    99  func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool {
   100  	if i := p.indexOfElementInScope(s, matchTags...); i != -1 {
   101  		p.oe = p.oe[:i]
   102  		return true
   103  	}
   104  	return false
   105  }
   106  
   107  // indexOfElementInScope returns the index in p.oe of the highest element whose
   108  // tag is in matchTags that is in scope. If no matching element is in scope, it
   109  // returns -1.
   110  func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int {
   111  	for i := len(p.oe) - 1; i >= 0; i-- {
   112  		tagAtom := p.oe[i].DataAtom
   113  		if p.oe[i].Namespace == "" {
   114  			for _, t := range matchTags {
   115  				if t == tagAtom {
   116  					return i
   117  				}
   118  			}
   119  			switch s {
   120  			case defaultScope:
   121  				// No-op.
   122  			case listItemScope:
   123  				if tagAtom == a.Ol || tagAtom == a.Ul {
   124  					return -1
   125  				}
   126  			case buttonScope:
   127  				if tagAtom == a.Button {
   128  					return -1
   129  				}
   130  			case tableScope:
   131  				if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
   132  					return -1
   133  				}
   134  			case selectScope:
   135  				if tagAtom != a.Optgroup && tagAtom != a.Option {
   136  					return -1
   137  				}
   138  			default:
   139  				panic("unreachable")
   140  			}
   141  		}
   142  		switch s {
   143  		case defaultScope, listItemScope, buttonScope:
   144  			for _, t := range defaultScopeStopTags[p.oe[i].Namespace] {
   145  				if t == tagAtom {
   146  					return -1
   147  				}
   148  			}
   149  		}
   150  	}
   151  	return -1
   152  }
   153  
   154  // elementInScope is like popUntil, except that it doesn't modify the stack of
   155  // open elements.
   156  func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool {
   157  	return p.indexOfElementInScope(s, matchTags...) != -1
   158  }
   159  
   160  // clearStackToContext pops elements off the stack of open elements until a
   161  // scope-defined element is found.
   162  func (p *parser) clearStackToContext(s scope) {
   163  	for i := len(p.oe) - 1; i >= 0; i-- {
   164  		tagAtom := p.oe[i].DataAtom
   165  		switch s {
   166  		case tableScope:
   167  			if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
   168  				p.oe = p.oe[:i+1]
   169  				return
   170  			}
   171  		case tableRowScope:
   172  			if tagAtom == a.Html || tagAtom == a.Tr || tagAtom == a.Template {
   173  				p.oe = p.oe[:i+1]
   174  				return
   175  			}
   176  		case tableBodyScope:
   177  			if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead || tagAtom == a.Template {
   178  				p.oe = p.oe[:i+1]
   179  				return
   180  			}
   181  		default:
   182  			panic("unreachable")
   183  		}
   184  	}
   185  }
   186  
   187  // parseGenericRawTextElement implements the generic raw text element parsing
   188  // algorithm defined in 12.2.6.2.
   189  // https://html.spec.whatwg.org/multipage/parsing.html#parsing-elements-that-contain-only-text
   190  // TODO: Since both RAWTEXT and RCDATA states are treated as tokenizer's part
   191  // officially, need to make tokenizer consider both states.
   192  func (p *parser) parseGenericRawTextElement() {
   193  	p.addElement()
   194  	p.originalIM = p.im
   195  	p.im = textIM
   196  }
   197  
   198  // generateImpliedEndTags pops nodes off the stack of open elements as long as
   199  // the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc.
   200  // If exceptions are specified, nodes with that name will not be popped off.
   201  func (p *parser) generateImpliedEndTags(exceptions ...string) {
   202  	var i int
   203  loop:
   204  	for i = len(p.oe) - 1; i >= 0; i-- {
   205  		n := p.oe[i]
   206  		if n.Type != ElementNode {
   207  			break
   208  		}
   209  		switch n.DataAtom {
   210  		case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:
   211  			for _, except := range exceptions {
   212  				if n.Data == except {
   213  					break loop
   214  				}
   215  			}
   216  			continue
   217  		}
   218  		break
   219  	}
   220  
   221  	p.oe = p.oe[:i+1]
   222  }
   223  
   224  // addChild adds a child node n to the top element, and pushes n onto the stack
   225  // of open elements if it is an element node.
   226  func (p *parser) addChild(n *Node) {
   227  	if p.shouldFosterParent() {
   228  		p.fosterParent(n)
   229  	} else {
   230  		p.top().AppendChild(n)
   231  	}
   232  
   233  	if n.Type == ElementNode {
   234  		p.oe = append(p.oe, n)
   235  	}
   236  }
   237  
   238  // shouldFosterParent returns whether the next node to be added should be
   239  // foster parented.
   240  func (p *parser) shouldFosterParent() bool {
   241  	if p.fosterParenting {
   242  		switch p.top().DataAtom {
   243  		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
   244  			return true
   245  		}
   246  	}
   247  	return false
   248  }
   249  
   250  // fosterParent adds a child node according to the foster parenting rules.
   251  // Section 12.2.6.1, "foster parenting".
   252  func (p *parser) fosterParent(n *Node) {
   253  	var table, parent, prev, template *Node
   254  	var i int
   255  	for i = len(p.oe) - 1; i >= 0; i-- {
   256  		if p.oe[i].DataAtom == a.Table {
   257  			table = p.oe[i]
   258  			break
   259  		}
   260  	}
   261  
   262  	var j int
   263  	for j = len(p.oe) - 1; j >= 0; j-- {
   264  		if p.oe[j].DataAtom == a.Template {
   265  			template = p.oe[j]
   266  			break
   267  		}
   268  	}
   269  
   270  	if template != nil && (table == nil || j > i) {
   271  		template.AppendChild(n)
   272  		return
   273  	}
   274  
   275  	if table == nil {
   276  		// The foster parent is the html element.
   277  		parent = p.oe[0]
   278  	} else {
   279  		parent = table.Parent
   280  	}
   281  	if parent == nil {
   282  		parent = p.oe[i-1]
   283  	}
   284  
   285  	if table != nil {
   286  		prev = table.PrevSibling
   287  	} else {
   288  		prev = parent.LastChild
   289  	}
   290  	if prev != nil && prev.Type == TextNode && n.Type == TextNode {
   291  		prev.Data += n.Data
   292  		return
   293  	}
   294  
   295  	parent.InsertBefore(n, table)
   296  }
   297  
   298  // addText adds text to the preceding node if it is a text node, or else it
   299  // calls addChild with a new text node.
   300  func (p *parser) addText(text string) {
   301  	if text == "" {
   302  		return
   303  	}
   304  
   305  	if p.shouldFosterParent() {
   306  		p.fosterParent(&Node{
   307  			Type: TextNode,
   308  			Data: text,
   309  		})
   310  		return
   311  	}
   312  
   313  	t := p.top()
   314  	if n := t.LastChild; n != nil && n.Type == TextNode {
   315  		n.Data += text
   316  		return
   317  	}
   318  	p.addChild(&Node{
   319  		Type: TextNode,
   320  		Data: text,
   321  	})
   322  }
   323  
   324  // addElement adds a child element based on the current token.
   325  func (p *parser) addElement() {
   326  	p.addChild(&Node{
   327  		Type:     ElementNode,
   328  		DataAtom: p.tok.DataAtom,
   329  		Data:     p.tok.Data,
   330  		Attr:     p.tok.Attr,
   331  	})
   332  }
   333  
   334  // Section 12.2.4.3.
   335  func (p *parser) addFormattingElement() {
   336  	tagAtom, attr := p.tok.DataAtom, p.tok.Attr
   337  	p.addElement()
   338  
   339  	// Implement the Noah's Ark clause, but with three per family instead of two.
   340  	identicalElements := 0
   341  findIdenticalElements:
   342  	for i := len(p.afe) - 1; i >= 0; i-- {
   343  		n := p.afe[i]
   344  		if n.Type == scopeMarkerNode {
   345  			break
   346  		}
   347  		if n.Type != ElementNode {
   348  			continue
   349  		}
   350  		if n.Namespace != "" {
   351  			continue
   352  		}
   353  		if n.DataAtom != tagAtom {
   354  			continue
   355  		}
   356  		if len(n.Attr) != len(attr) {
   357  			continue
   358  		}
   359  	compareAttributes:
   360  		for _, t0 := range n.Attr {
   361  			for _, t1 := range attr {
   362  				if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val {
   363  					// Found a match for this attribute, continue with the next attribute.
   364  					continue compareAttributes
   365  				}
   366  			}
   367  			// If we get here, there is no attribute that matches a.
   368  			// Therefore the element is not identical to the new one.
   369  			continue findIdenticalElements
   370  		}
   371  
   372  		identicalElements++
   373  		if identicalElements >= 3 {
   374  			p.afe.remove(n)
   375  		}
   376  	}
   377  
   378  	p.afe = append(p.afe, p.top())
   379  }
   380  
   381  // Section 12.2.4.3.
   382  func (p *parser) clearActiveFormattingElements() {
   383  	for {
   384  		if n := p.afe.pop(); len(p.afe) == 0 || n.Type == scopeMarkerNode {
   385  			return
   386  		}
   387  	}
   388  }
   389  
   390  // Section 12.2.4.3.
   391  func (p *parser) reconstructActiveFormattingElements() {
   392  	n := p.afe.top()
   393  	if n == nil {
   394  		return
   395  	}
   396  	if n.Type == scopeMarkerNode || p.oe.index(n) != -1 {
   397  		return
   398  	}
   399  	i := len(p.afe) - 1
   400  	for n.Type != scopeMarkerNode && p.oe.index(n) == -1 {
   401  		if i == 0 {
   402  			i = -1
   403  			break
   404  		}
   405  		i--
   406  		n = p.afe[i]
   407  	}
   408  	for {
   409  		i++
   410  		clone := p.afe[i].clone()
   411  		p.addChild(clone)
   412  		p.afe[i] = clone
   413  		if i == len(p.afe)-1 {
   414  			break
   415  		}
   416  	}
   417  }
   418  
   419  // Section 12.2.5.
   420  func (p *parser) acknowledgeSelfClosingTag() {
   421  	p.hasSelfClosingToken = false
   422  }
   423  
   424  // An insertion mode (section 12.2.4.1) is the state transition function from
   425  // a particular state in the HTML5 parser's state machine. It updates the
   426  // parser's fields depending on parser.tok (where ErrorToken means EOF).
   427  // It returns whether the token was consumed.
   428  type insertionMode func(*parser) bool
   429  
   430  // setOriginalIM sets the insertion mode to return to after completing a text or
   431  // inTableText insertion mode.
   432  // Section 12.2.4.1, "using the rules for".
   433  func (p *parser) setOriginalIM() {
   434  	if p.originalIM != nil {
   435  		panic("html: bad parser state: originalIM was set twice")
   436  	}
   437  	p.originalIM = p.im
   438  }
   439  
   440  // Section 12.2.4.1, "reset the insertion mode".
   441  func (p *parser) resetInsertionMode() {
   442  	for i := len(p.oe) - 1; i >= 0; i-- {
   443  		n := p.oe[i]
   444  		last := i == 0
   445  		if last && p.context != nil {
   446  			n = p.context
   447  		}
   448  
   449  		switch n.DataAtom {
   450  		case a.Select:
   451  			if !last {
   452  				for ancestor, first := n, p.oe[0]; ancestor != first; {
   453  					ancestor = p.oe[p.oe.index(ancestor)-1]
   454  					switch ancestor.DataAtom {
   455  					case a.Template:
   456  						p.im = inSelectIM
   457  						return
   458  					case a.Table:
   459  						p.im = inSelectInTableIM
   460  						return
   461  					}
   462  				}
   463  			}
   464  			p.im = inSelectIM
   465  		case a.Td, a.Th:
   466  			// TODO: remove this divergence from the HTML5 spec.
   467  			//
   468  			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
   469  			p.im = inCellIM
   470  		case a.Tr:
   471  			p.im = inRowIM
   472  		case a.Tbody, a.Thead, a.Tfoot:
   473  			p.im = inTableBodyIM
   474  		case a.Caption:
   475  			p.im = inCaptionIM
   476  		case a.Colgroup:
   477  			p.im = inColumnGroupIM
   478  		case a.Table:
   479  			p.im = inTableIM
   480  		case a.Template:
   481  			// TODO: remove this divergence from the HTML5 spec.
   482  			if n.Namespace != "" {
   483  				continue
   484  			}
   485  			p.im = p.templateStack.top()
   486  		case a.Head:
   487  			// TODO: remove this divergence from the HTML5 spec.
   488  			//
   489  			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
   490  			p.im = inHeadIM
   491  		case a.Body:
   492  			p.im = inBodyIM
   493  		case a.Frameset:
   494  			p.im = inFramesetIM
   495  		case a.Html:
   496  			if p.head == nil {
   497  				p.im = beforeHeadIM
   498  			} else {
   499  				p.im = afterHeadIM
   500  			}
   501  		default:
   502  			if last {
   503  				p.im = inBodyIM
   504  				return
   505  			}
   506  			continue
   507  		}
   508  		return
   509  	}
   510  }
   511  
   512  const whitespace = " \t\r\n\f"
   513  
   514  // Section 12.2.6.4.1.
   515  func initialIM(p *parser) bool {
   516  	switch p.tok.Type {
   517  	case TextToken:
   518  		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
   519  		if len(p.tok.Data) == 0 {
   520  			// It was all whitespace, so ignore it.
   521  			return true
   522  		}
   523  	case CommentToken:
   524  		p.doc.AppendChild(&Node{
   525  			Type: CommentNode,
   526  			Data: p.tok.Data,
   527  		})
   528  		return true
   529  	case DoctypeToken:
   530  		n, quirks := parseDoctype(p.tok.Data)
   531  		p.doc.AppendChild(n)
   532  		p.quirks = quirks
   533  		p.im = beforeHTMLIM
   534  		return true
   535  	}
   536  	p.quirks = true
   537  	p.im = beforeHTMLIM
   538  	return false
   539  }
   540  
   541  // Section 12.2.6.4.2.
   542  func beforeHTMLIM(p *parser) bool {
   543  	switch p.tok.Type {
   544  	case DoctypeToken:
   545  		// Ignore the token.
   546  		return true
   547  	case TextToken:
   548  		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
   549  		if len(p.tok.Data) == 0 {
   550  			// It was all whitespace, so ignore it.
   551  			return true
   552  		}
   553  	case StartTagToken:
   554  		if p.tok.DataAtom == a.Html {
   555  			p.addElement()
   556  			p.im = beforeHeadIM
   557  			return true
   558  		}
   559  	case EndTagToken:
   560  		switch p.tok.DataAtom {
   561  		case a.Head, a.Body, a.Html, a.Br:
   562  			p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
   563  			return false
   564  		default:
   565  			// Ignore the token.
   566  			return true
   567  		}
   568  	case CommentToken:
   569  		p.doc.AppendChild(&Node{
   570  			Type: CommentNode,
   571  			Data: p.tok.Data,
   572  		})
   573  		return true
   574  	}
   575  	p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
   576  	return false
   577  }
   578  
   579  // Section 12.2.6.4.3.
   580  func beforeHeadIM(p *parser) bool {
   581  	switch p.tok.Type {
   582  	case TextToken:
   583  		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
   584  		if len(p.tok.Data) == 0 {
   585  			// It was all whitespace, so ignore it.
   586  			return true
   587  		}
   588  	case StartTagToken:
   589  		switch p.tok.DataAtom {
   590  		case a.Head:
   591  			p.addElement()
   592  			p.head = p.top()
   593  			p.im = inHeadIM
   594  			return true
   595  		case a.Html:
   596  			return inBodyIM(p)
   597  		}
   598  	case EndTagToken:
   599  		switch p.tok.DataAtom {
   600  		case a.Head, a.Body, a.Html, a.Br:
   601  			p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
   602  			return false
   603  		default:
   604  			// Ignore the token.
   605  			return true
   606  		}
   607  	case CommentToken:
   608  		p.addChild(&Node{
   609  			Type: CommentNode,
   610  			Data: p.tok.Data,
   611  		})
   612  		return true
   613  	case DoctypeToken:
   614  		// Ignore the token.
   615  		return true
   616  	}
   617  
   618  	p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
   619  	return false
   620  }
   621  
   622  // Section 12.2.6.4.4.
   623  func inHeadIM(p *parser) bool {
   624  	switch p.tok.Type {
   625  	case TextToken:
   626  		s := strings.TrimLeft(p.tok.Data, whitespace)
   627  		if len(s) < len(p.tok.Data) {
   628  			// Add the initial whitespace to the current node.
   629  			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
   630  			if s == "" {
   631  				return true
   632  			}
   633  			p.tok.Data = s
   634  		}
   635  	case StartTagToken:
   636  		switch p.tok.DataAtom {
   637  		case a.Html:
   638  			return inBodyIM(p)
   639  		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta:
   640  			p.addElement()
   641  			p.oe.pop()
   642  			p.acknowledgeSelfClosingTag()
   643  			return true
   644  		case a.Noscript:
   645  			if p.scripting {
   646  				p.parseGenericRawTextElement()
   647  				return true
   648  			}
   649  			p.addElement()
   650  			p.im = inHeadNoscriptIM
   651  			// Don't let the tokenizer go into raw text mode when scripting is disabled.
   652  			p.tokenizer.NextIsNotRawText()
   653  			return true
   654  		case a.Script, a.Title:
   655  			p.addElement()
   656  			p.setOriginalIM()
   657  			p.im = textIM
   658  			return true
   659  		case a.Noframes, a.Style:
   660  			p.parseGenericRawTextElement()
   661  			return true
   662  		case a.Head:
   663  			// Ignore the token.
   664  			return true
   665  		case a.Template:
   666  			// TODO: remove this divergence from the HTML5 spec.
   667  			//
   668  			// We don't handle all of the corner cases when mixing foreign
   669  			// content (i.e. <math> or <svg>) with <template>. Without this
   670  			// early return, we can get into an infinite loop, possibly because
   671  			// of the "TODO... further divergence" a little below.
   672  			//
   673  			// As a workaround, if we are mixing foreign content and templates,
   674  			// just ignore the rest of the HTML. Foreign content is rare and a
   675  			// relatively old HTML feature. Templates are also rare and a
   676  			// relatively new HTML feature. Their combination is very rare.
   677  			for _, e := range p.oe {
   678  				if e.Namespace != "" {
   679  					p.im = ignoreTheRemainingTokens
   680  					return true
   681  				}
   682  			}
   683  
   684  			p.addElement()
   685  			p.afe = append(p.afe, &scopeMarker)
   686  			p.framesetOK = false
   687  			p.im = inTemplateIM
   688  			p.templateStack = append(p.templateStack, inTemplateIM)
   689  			return true
   690  		}
   691  	case EndTagToken:
   692  		switch p.tok.DataAtom {
   693  		case a.Head:
   694  			p.oe.pop()
   695  			p.im = afterHeadIM
   696  			return true
   697  		case a.Body, a.Html, a.Br:
   698  			p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
   699  			return false
   700  		case a.Template:
   701  			if !p.oe.contains(a.Template) {
   702  				return true
   703  			}
   704  			// TODO: remove this further divergence from the HTML5 spec.
   705  			//
   706  			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
   707  			p.generateImpliedEndTags()
   708  			for i := len(p.oe) - 1; i >= 0; i-- {
   709  				if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
   710  					p.oe = p.oe[:i]
   711  					break
   712  				}
   713  			}
   714  			p.clearActiveFormattingElements()
   715  			p.templateStack.pop()
   716  			p.resetInsertionMode()
   717  			return true
   718  		default:
   719  			// Ignore the token.
   720  			return true
   721  		}
   722  	case CommentToken:
   723  		p.addChild(&Node{
   724  			Type: CommentNode,
   725  			Data: p.tok.Data,
   726  		})
   727  		return true
   728  	case DoctypeToken:
   729  		// Ignore the token.
   730  		return true
   731  	}
   732  
   733  	p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
   734  	return false
   735  }
   736  
   737  // Section 12.2.6.4.5.
   738  func inHeadNoscriptIM(p *parser) bool {
   739  	switch p.tok.Type {
   740  	case DoctypeToken:
   741  		// Ignore the token.
   742  		return true
   743  	case StartTagToken:
   744  		switch p.tok.DataAtom {
   745  		case a.Html:
   746  			return inBodyIM(p)
   747  		case a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Style:
   748  			return inHeadIM(p)
   749  		case a.Head:
   750  			// Ignore the token.
   751  			return true
   752  		case a.Noscript:
   753  			// Don't let the tokenizer go into raw text mode even when a <noscript>
   754  			// tag is in "in head noscript" insertion mode.
   755  			p.tokenizer.NextIsNotRawText()
   756  			// Ignore the token.
   757  			return true
   758  		}
   759  	case EndTagToken:
   760  		switch p.tok.DataAtom {
   761  		case a.Noscript, a.Br:
   762  		default:
   763  			// Ignore the token.
   764  			return true
   765  		}
   766  	case TextToken:
   767  		s := strings.TrimLeft(p.tok.Data, whitespace)
   768  		if len(s) == 0 {
   769  			// It was all whitespace.
   770  			return inHeadIM(p)
   771  		}
   772  	case CommentToken:
   773  		return inHeadIM(p)
   774  	}
   775  	p.oe.pop()
   776  	if p.top().DataAtom != a.Head {
   777  		panic("html: the new current node will be a head element.")
   778  	}
   779  	p.im = inHeadIM
   780  	if p.tok.DataAtom == a.Noscript {
   781  		return true
   782  	}
   783  	return false
   784  }
   785  
   786  // Section 12.2.6.4.6.
   787  func afterHeadIM(p *parser) bool {
   788  	switch p.tok.Type {
   789  	case TextToken:
   790  		s := strings.TrimLeft(p.tok.Data, whitespace)
   791  		if len(s) < len(p.tok.Data) {
   792  			// Add the initial whitespace to the current node.
   793  			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
   794  			if s == "" {
   795  				return true
   796  			}
   797  			p.tok.Data = s
   798  		}
   799  	case StartTagToken:
   800  		switch p.tok.DataAtom {
   801  		case a.Html:
   802  			return inBodyIM(p)
   803  		case a.Body:
   804  			p.addElement()
   805  			p.framesetOK = false
   806  			p.im = inBodyIM
   807  			return true
   808  		case a.Frameset:
   809  			p.addElement()
   810  			p.im = inFramesetIM
   811  			return true
   812  		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
   813  			p.oe = append(p.oe, p.head)
   814  			defer p.oe.remove(p.head)
   815  			return inHeadIM(p)
   816  		case a.Head:
   817  			// Ignore the token.
   818  			return true
   819  		}
   820  	case EndTagToken:
   821  		switch p.tok.DataAtom {
   822  		case a.Body, a.Html, a.Br:
   823  			// Drop down to creating an implied <body> tag.
   824  		case a.Template:
   825  			return inHeadIM(p)
   826  		default:
   827  			// Ignore the token.
   828  			return true
   829  		}
   830  	case CommentToken:
   831  		p.addChild(&Node{
   832  			Type: CommentNode,
   833  			Data: p.tok.Data,
   834  		})
   835  		return true
   836  	case DoctypeToken:
   837  		// Ignore the token.
   838  		return true
   839  	}
   840  
   841  	p.parseImpliedToken(StartTagToken, a.Body, a.Body.String())
   842  	p.framesetOK = true
   843  	return false
   844  }
   845  
   846  // copyAttributes copies attributes of src not found on dst to dst.
   847  func copyAttributes(dst *Node, src Token) {
   848  	if len(src.Attr) == 0 {
   849  		return
   850  	}
   851  	attr := map[string]string{}
   852  	for _, t := range dst.Attr {
   853  		attr[t.Key] = t.Val
   854  	}
   855  	for _, t := range src.Attr {
   856  		if _, ok := attr[t.Key]; !ok {
   857  			dst.Attr = append(dst.Attr, t)
   858  			attr[t.Key] = t.Val
   859  		}
   860  	}
   861  }
   862  
   863  // Section 12.2.6.4.7.
   864  func inBodyIM(p *parser) bool {
   865  	switch p.tok.Type {
   866  	case TextToken:
   867  		d := p.tok.Data
   868  		switch n := p.oe.top(); n.DataAtom {
   869  		case a.Pre, a.Listing:
   870  			if n.FirstChild == nil {
   871  				// Ignore a newline at the start of a <pre> block.
   872  				if d != "" && d[0] == '\r' {
   873  					d = d[1:]
   874  				}
   875  				if d != "" && d[0] == '\n' {
   876  					d = d[1:]
   877  				}
   878  			}
   879  		}
   880  		d = strings.Replace(d, "\x00", "", -1)
   881  		if d == "" {
   882  			return true
   883  		}
   884  		p.reconstructActiveFormattingElements()
   885  		p.addText(d)
   886  		if p.framesetOK && strings.TrimLeft(d, whitespace) != "" {
   887  			// There were non-whitespace characters inserted.
   888  			p.framesetOK = false
   889  		}
   890  	case StartTagToken:
   891  		switch p.tok.DataAtom {
   892  		case a.Html:
   893  			if p.oe.contains(a.Template) {
   894  				return true
   895  			}
   896  			copyAttributes(p.oe[0], p.tok)
   897  		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
   898  			return inHeadIM(p)
   899  		case a.Body:
   900  			if p.oe.contains(a.Template) {
   901  				return true
   902  			}
   903  			if len(p.oe) >= 2 {
   904  				body := p.oe[1]
   905  				if body.Type == ElementNode && body.DataAtom == a.Body {
   906  					p.framesetOK = false
   907  					copyAttributes(body, p.tok)
   908  				}
   909  			}
   910  		case a.Frameset:
   911  			if !p.framesetOK || len(p.oe) < 2 || p.oe[1].DataAtom != a.Body {
   912  				// Ignore the token.
   913  				return true
   914  			}
   915  			body := p.oe[1]
   916  			if body.Parent != nil {
   917  				body.Parent.RemoveChild(body)
   918  			}
   919  			p.oe = p.oe[:1]
   920  			p.addElement()
   921  			p.im = inFramesetIM
   922  			return true
   923  		case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Main, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
   924  			p.popUntil(buttonScope, a.P)
   925  			p.addElement()
   926  		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
   927  			p.popUntil(buttonScope, a.P)
   928  			switch n := p.top(); n.DataAtom {
   929  			case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
   930  				p.oe.pop()
   931  			}
   932  			p.addElement()
   933  		case a.Pre, a.Listing:
   934  			p.popUntil(buttonScope, a.P)
   935  			p.addElement()
   936  			// The newline, if any, will be dealt with by the TextToken case.
   937  			p.framesetOK = false
   938  		case a.Form:
   939  			if p.form != nil && !p.oe.contains(a.Template) {
   940  				// Ignore the token
   941  				return true
   942  			}
   943  			p.popUntil(buttonScope, a.P)
   944  			p.addElement()
   945  			if !p.oe.contains(a.Template) {
   946  				p.form = p.top()
   947  			}
   948  		case a.Li:
   949  			p.framesetOK = false
   950  			for i := len(p.oe) - 1; i >= 0; i-- {
   951  				node := p.oe[i]
   952  				switch node.DataAtom {
   953  				case a.Li:
   954  					p.oe = p.oe[:i]
   955  				case a.Address, a.Div, a.P:
   956  					continue
   957  				default:
   958  					if !isSpecialElement(node) {
   959  						continue
   960  					}
   961  				}
   962  				break
   963  			}
   964  			p.popUntil(buttonScope, a.P)
   965  			p.addElement()
   966  		case a.Dd, a.Dt:
   967  			p.framesetOK = false
   968  			for i := len(p.oe) - 1; i >= 0; i-- {
   969  				node := p.oe[i]
   970  				switch node.DataAtom {
   971  				case a.Dd, a.Dt:
   972  					p.oe = p.oe[:i]
   973  				case a.Address, a.Div, a.P:
   974  					continue
   975  				default:
   976  					if !isSpecialElement(node) {
   977  						continue
   978  					}
   979  				}
   980  				break
   981  			}
   982  			p.popUntil(buttonScope, a.P)
   983  			p.addElement()
   984  		case a.Plaintext:
   985  			p.popUntil(buttonScope, a.P)
   986  			p.addElement()
   987  		case a.Button:
   988  			p.popUntil(defaultScope, a.Button)
   989  			p.reconstructActiveFormattingElements()
   990  			p.addElement()
   991  			p.framesetOK = false
   992  		case a.A:
   993  			for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
   994  				if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A {
   995  					p.inBodyEndTagFormatting(a.A, "a")
   996  					p.oe.remove(n)
   997  					p.afe.remove(n)
   998  					break
   999  				}
  1000  			}
  1001  			p.reconstructActiveFormattingElements()
  1002  			p.addFormattingElement()
  1003  		case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
  1004  			p.reconstructActiveFormattingElements()
  1005  			p.addFormattingElement()
  1006  		case a.Nobr:
  1007  			p.reconstructActiveFormattingElements()
  1008  			if p.elementInScope(defaultScope, a.Nobr) {
  1009  				p.inBodyEndTagFormatting(a.Nobr, "nobr")
  1010  				p.reconstructActiveFormattingElements()
  1011  			}
  1012  			p.addFormattingElement()
  1013  		case a.Applet, a.Marquee, a.Object:
  1014  			p.reconstructActiveFormattingElements()
  1015  			p.addElement()
  1016  			p.afe = append(p.afe, &scopeMarker)
  1017  			p.framesetOK = false
  1018  		case a.Table:
  1019  			if !p.quirks {
  1020  				p.popUntil(buttonScope, a.P)
  1021  			}
  1022  			p.addElement()
  1023  			p.framesetOK = false
  1024  			p.im = inTableIM
  1025  			return true
  1026  		case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr:
  1027  			p.reconstructActiveFormattingElements()
  1028  			p.addElement()
  1029  			p.oe.pop()
  1030  			p.acknowledgeSelfClosingTag()
  1031  			if p.tok.DataAtom == a.Input {
  1032  				for _, t := range p.tok.Attr {
  1033  					if t.Key == "type" {
  1034  						if strings.ToLower(t.Val) == "hidden" {
  1035  							// Skip setting framesetOK = false
  1036  							return true
  1037  						}
  1038  					}
  1039  				}
  1040  			}
  1041  			p.framesetOK = false
  1042  		case a.Param, a.Source, a.Track:
  1043  			p.addElement()
  1044  			p.oe.pop()
  1045  			p.acknowledgeSelfClosingTag()
  1046  		case a.Hr:
  1047  			p.popUntil(buttonScope, a.P)
  1048  			p.addElement()
  1049  			p.oe.pop()
  1050  			p.acknowledgeSelfClosingTag()
  1051  			p.framesetOK = false
  1052  		case a.Image:
  1053  			p.tok.DataAtom = a.Img
  1054  			p.tok.Data = a.Img.String()
  1055  			return false
  1056  		case a.Textarea:
  1057  			p.addElement()
  1058  			p.setOriginalIM()
  1059  			p.framesetOK = false
  1060  			p.im = textIM
  1061  		case a.Xmp:
  1062  			p.popUntil(buttonScope, a.P)
  1063  			p.reconstructActiveFormattingElements()
  1064  			p.framesetOK = false
  1065  			p.parseGenericRawTextElement()
  1066  		case a.Iframe:
  1067  			p.framesetOK = false
  1068  			p.parseGenericRawTextElement()
  1069  		case a.Noembed:
  1070  			p.parseGenericRawTextElement()
  1071  		case a.Noscript:
  1072  			if p.scripting {
  1073  				p.parseGenericRawTextElement()
  1074  				return true
  1075  			}
  1076  			p.reconstructActiveFormattingElements()
  1077  			p.addElement()
  1078  			// Don't let the tokenizer go into raw text mode when scripting is disabled.
  1079  			p.tokenizer.NextIsNotRawText()
  1080  		case a.Select:
  1081  			p.reconstructActiveFormattingElements()
  1082  			p.addElement()
  1083  			p.framesetOK = false
  1084  			p.im = inSelectIM
  1085  			return true
  1086  		case a.Optgroup, a.Option:
  1087  			if p.top().DataAtom == a.Option {
  1088  				p.oe.pop()
  1089  			}
  1090  			p.reconstructActiveFormattingElements()
  1091  			p.addElement()
  1092  		case a.Rb, a.Rtc:
  1093  			if p.elementInScope(defaultScope, a.Ruby) {
  1094  				p.generateImpliedEndTags()
  1095  			}
  1096  			p.addElement()
  1097  		case a.Rp, a.Rt:
  1098  			if p.elementInScope(defaultScope, a.Ruby) {
  1099  				p.generateImpliedEndTags("rtc")
  1100  			}
  1101  			p.addElement()
  1102  		case a.Math, a.Svg:
  1103  			p.reconstructActiveFormattingElements()
  1104  			if p.tok.DataAtom == a.Math {
  1105  				adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
  1106  			} else {
  1107  				adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
  1108  			}
  1109  			adjustForeignAttributes(p.tok.Attr)
  1110  			p.addElement()
  1111  			p.top().Namespace = p.tok.Data
  1112  			if p.hasSelfClosingToken {
  1113  				p.oe.pop()
  1114  				p.acknowledgeSelfClosingTag()
  1115  			}
  1116  			return true
  1117  		case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
  1118  			// Ignore the token.
  1119  		default:
  1120  			p.reconstructActiveFormattingElements()
  1121  			p.addElement()
  1122  		}
  1123  	case EndTagToken:
  1124  		switch p.tok.DataAtom {
  1125  		case a.Body:
  1126  			if p.elementInScope(defaultScope, a.Body) {
  1127  				p.im = afterBodyIM
  1128  			}
  1129  		case a.Html:
  1130  			if p.elementInScope(defaultScope, a.Body) {
  1131  				p.parseImpliedToken(EndTagToken, a.Body, a.Body.String())
  1132  				return false
  1133  			}
  1134  			return true
  1135  		case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Main, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
  1136  			p.popUntil(defaultScope, p.tok.DataAtom)
  1137  		case a.Form:
  1138  			if p.oe.contains(a.Template) {
  1139  				i := p.indexOfElementInScope(defaultScope, a.Form)
  1140  				if i == -1 {
  1141  					// Ignore the token.
  1142  					return true
  1143  				}
  1144  				p.generateImpliedEndTags()
  1145  				if p.oe[i].DataAtom != a.Form {
  1146  					// Ignore the token.
  1147  					return true
  1148  				}
  1149  				p.popUntil(defaultScope, a.Form)
  1150  			} else {
  1151  				node := p.form
  1152  				p.form = nil
  1153  				i := p.indexOfElementInScope(defaultScope, a.Form)
  1154  				if node == nil || i == -1 || p.oe[i] != node {
  1155  					// Ignore the token.
  1156  					return true
  1157  				}
  1158  				p.generateImpliedEndTags()
  1159  				p.oe.remove(node)
  1160  			}
  1161  		case a.P:
  1162  			if !p.elementInScope(buttonScope, a.P) {
  1163  				p.parseImpliedToken(StartTagToken, a.P, a.P.String())
  1164  			}
  1165  			p.popUntil(buttonScope, a.P)
  1166  		case a.Li:
  1167  			p.popUntil(listItemScope, a.Li)
  1168  		case a.Dd, a.Dt:
  1169  			p.popUntil(defaultScope, p.tok.DataAtom)
  1170  		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
  1171  			p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6)
  1172  		case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
  1173  			p.inBodyEndTagFormatting(p.tok.DataAtom, p.tok.Data)
  1174  		case a.Applet, a.Marquee, a.Object:
  1175  			if p.popUntil(defaultScope, p.tok.DataAtom) {
  1176  				p.clearActiveFormattingElements()
  1177  			}
  1178  		case a.Br:
  1179  			p.tok.Type = StartTagToken
  1180  			return false
  1181  		case a.Template:
  1182  			return inHeadIM(p)
  1183  		default:
  1184  			p.inBodyEndTagOther(p.tok.DataAtom, p.tok.Data)
  1185  		}
  1186  	case CommentToken:
  1187  		p.addChild(&Node{
  1188  			Type: CommentNode,
  1189  			Data: p.tok.Data,
  1190  		})
  1191  	case ErrorToken:
  1192  		// TODO: remove this divergence from the HTML5 spec.
  1193  		if len(p.templateStack) > 0 {
  1194  			p.im = inTemplateIM
  1195  			return false
  1196  		}
  1197  		for _, e := range p.oe {
  1198  			switch e.DataAtom {
  1199  			case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
  1200  				a.Thead, a.Tr, a.Body, a.Html:
  1201  			default:
  1202  				return true
  1203  			}
  1204  		}
  1205  	}
  1206  
  1207  	return true
  1208  }
  1209  
  1210  func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
  1211  	// This is the "adoption agency" algorithm, described at
  1212  	// https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency
  1213  
  1214  	// TODO: this is a fairly literal line-by-line translation of that algorithm.
  1215  	// Once the code successfully parses the comprehensive test suite, we should
  1216  	// refactor this code to be more idiomatic.
  1217  
  1218  	// Steps 1-2
  1219  	if current := p.oe.top(); current.Data == tagName && p.afe.index(current) == -1 {
  1220  		p.oe.pop()
  1221  		return
  1222  	}
  1223  
  1224  	// Steps 3-5. The outer loop.
  1225  	for i := 0; i < 8; i++ {
  1226  		// Step 6. Find the formatting element.
  1227  		var formattingElement *Node
  1228  		for j := len(p.afe) - 1; j >= 0; j-- {
  1229  			if p.afe[j].Type == scopeMarkerNode {
  1230  				break
  1231  			}
  1232  			if p.afe[j].DataAtom == tagAtom {
  1233  				formattingElement = p.afe[j]
  1234  				break
  1235  			}
  1236  		}
  1237  		if formattingElement == nil {
  1238  			p.inBodyEndTagOther(tagAtom, tagName)
  1239  			return
  1240  		}
  1241  
  1242  		// Step 7. Ignore the tag if formatting element is not in the stack of open elements.
  1243  		feIndex := p.oe.index(formattingElement)
  1244  		if feIndex == -1 {
  1245  			p.afe.remove(formattingElement)
  1246  			return
  1247  		}
  1248  		// Step 8. Ignore the tag if formatting element is not in the scope.
  1249  		if !p.elementInScope(defaultScope, tagAtom) {
  1250  			// Ignore the tag.
  1251  			return
  1252  		}
  1253  
  1254  		// Step 9. This step is omitted because it's just a parse error but no need to return.
  1255  
  1256  		// Steps 10-11. Find the furthest block.
  1257  		var furthestBlock *Node
  1258  		for _, e := range p.oe[feIndex:] {
  1259  			if isSpecialElement(e) {
  1260  				furthestBlock = e
  1261  				break
  1262  			}
  1263  		}
  1264  		if furthestBlock == nil {
  1265  			e := p.oe.pop()
  1266  			for e != formattingElement {
  1267  				e = p.oe.pop()
  1268  			}
  1269  			p.afe.remove(e)
  1270  			return
  1271  		}
  1272  
  1273  		// Steps 12-13. Find the common ancestor and bookmark node.
  1274  		commonAncestor := p.oe[feIndex-1]
  1275  		bookmark := p.afe.index(formattingElement)
  1276  
  1277  		// Step 14. The inner loop. Find the lastNode to reparent.
  1278  		lastNode := furthestBlock
  1279  		node := furthestBlock
  1280  		x := p.oe.index(node)
  1281  		// Step 14.1.
  1282  		j := 0
  1283  		for {
  1284  			// Step 14.2.
  1285  			j++
  1286  			// Step. 14.3.
  1287  			x--
  1288  			node = p.oe[x]
  1289  			// Step 14.4. Go to the next step if node is formatting element.
  1290  			if node == formattingElement {
  1291  				break
  1292  			}
  1293  			// Step 14.5. Remove node from the list of active formatting elements if
  1294  			// inner loop counter is greater than three and node is in the list of
  1295  			// active formatting elements.
  1296  			if ni := p.afe.index(node); j > 3 && ni > -1 {
  1297  				p.afe.remove(node)
  1298  				// If any element of the list of active formatting elements is removed,
  1299  				// we need to take care whether bookmark should be decremented or not.
  1300  				// This is because the value of bookmark may exceed the size of the
  1301  				// list by removing elements from the list.
  1302  				if ni <= bookmark {
  1303  					bookmark--
  1304  				}
  1305  				continue
  1306  			}
  1307  			// Step 14.6. Continue the next inner loop if node is not in the list of
  1308  			// active formatting elements.
  1309  			if p.afe.index(node) == -1 {
  1310  				p.oe.remove(node)
  1311  				continue
  1312  			}
  1313  			// Step 14.7.
  1314  			clone := node.clone()
  1315  			p.afe[p.afe.index(node)] = clone
  1316  			p.oe[p.oe.index(node)] = clone
  1317  			node = clone
  1318  			// Step 14.8.
  1319  			if lastNode == furthestBlock {
  1320  				bookmark = p.afe.index(node) + 1
  1321  			}
  1322  			// Step 14.9.
  1323  			if lastNode.Parent != nil {
  1324  				lastNode.Parent.RemoveChild(lastNode)
  1325  			}
  1326  			node.AppendChild(lastNode)
  1327  			// Step 14.10.
  1328  			lastNode = node
  1329  		}
  1330  
  1331  		// Step 15. Reparent lastNode to the common ancestor,
  1332  		// or for misnested table nodes, to the foster parent.
  1333  		if lastNode.Parent != nil {
  1334  			lastNode.Parent.RemoveChild(lastNode)
  1335  		}
  1336  		switch commonAncestor.DataAtom {
  1337  		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
  1338  			p.fosterParent(lastNode)
  1339  		default:
  1340  			commonAncestor.AppendChild(lastNode)
  1341  		}
  1342  
  1343  		// Steps 16-18. Reparent nodes from the furthest block's children
  1344  		// to a clone of the formatting element.
  1345  		clone := formattingElement.clone()
  1346  		reparentChildren(clone, furthestBlock)
  1347  		furthestBlock.AppendChild(clone)
  1348  
  1349  		// Step 19. Fix up the list of active formatting elements.
  1350  		if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
  1351  			// Move the bookmark with the rest of the list.
  1352  			bookmark--
  1353  		}
  1354  		p.afe.remove(formattingElement)
  1355  		p.afe.insert(bookmark, clone)
  1356  
  1357  		// Step 20. Fix up the stack of open elements.
  1358  		p.oe.remove(formattingElement)
  1359  		p.oe.insert(p.oe.index(furthestBlock)+1, clone)
  1360  	}
  1361  }
  1362  
  1363  // inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
  1364  // "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content
  1365  // https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign
  1366  func (p *parser) inBodyEndTagOther(tagAtom a.Atom, tagName string) {
  1367  	for i := len(p.oe) - 1; i >= 0; i-- {
  1368  		// Two element nodes have the same tag if they have the same Data (a
  1369  		// string-typed field). As an optimization, for common HTML tags, each
  1370  		// Data string is assigned a unique, non-zero DataAtom (a uint32-typed
  1371  		// field), since integer comparison is faster than string comparison.
  1372  		// Uncommon (custom) tags get a zero DataAtom.
  1373  		//
  1374  		// The if condition here is equivalent to (p.oe[i].Data == tagName).
  1375  		if (p.oe[i].DataAtom == tagAtom) &&
  1376  			((tagAtom != 0) || (p.oe[i].Data == tagName)) {
  1377  			p.oe = p.oe[:i]
  1378  			break
  1379  		}
  1380  		if isSpecialElement(p.oe[i]) {
  1381  			break
  1382  		}
  1383  	}
  1384  }
  1385  
  1386  // Section 12.2.6.4.8.
  1387  func textIM(p *parser) bool {
  1388  	switch p.tok.Type {
  1389  	case ErrorToken:
  1390  		p.oe.pop()
  1391  	case TextToken:
  1392  		d := p.tok.Data
  1393  		if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil {
  1394  			// Ignore a newline at the start of a <textarea> block.
  1395  			if d != "" && d[0] == '\r' {
  1396  				d = d[1:]
  1397  			}
  1398  			if d != "" && d[0] == '\n' {
  1399  				d = d[1:]
  1400  			}
  1401  		}
  1402  		if d == "" {
  1403  			return true
  1404  		}
  1405  		p.addText(d)
  1406  		return true
  1407  	case EndTagToken:
  1408  		p.oe.pop()
  1409  	}
  1410  	p.im = p.originalIM
  1411  	p.originalIM = nil
  1412  	return p.tok.Type == EndTagToken
  1413  }
  1414  
  1415  // Section 12.2.6.4.9.
  1416  func inTableIM(p *parser) bool {
  1417  	switch p.tok.Type {
  1418  	case TextToken:
  1419  		p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1)
  1420  		switch p.oe.top().DataAtom {
  1421  		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
  1422  			if strings.Trim(p.tok.Data, whitespace) == "" {
  1423  				p.addText(p.tok.Data)
  1424  				return true
  1425  			}
  1426  		}
  1427  	case StartTagToken:
  1428  		switch p.tok.DataAtom {
  1429  		case a.Caption:
  1430  			p.clearStackToContext(tableScope)
  1431  			p.afe = append(p.afe, &scopeMarker)
  1432  			p.addElement()
  1433  			p.im = inCaptionIM
  1434  			return true
  1435  		case a.Colgroup:
  1436  			p.clearStackToContext(tableScope)
  1437  			p.addElement()
  1438  			p.im = inColumnGroupIM
  1439  			return true
  1440  		case a.Col:
  1441  			p.parseImpliedToken(StartTagToken, a.Colgroup, a.Colgroup.String())
  1442  			return false
  1443  		case a.Tbody, a.Tfoot, a.Thead:
  1444  			p.clearStackToContext(tableScope)
  1445  			p.addElement()
  1446  			p.im = inTableBodyIM
  1447  			return true
  1448  		case a.Td, a.Th, a.Tr:
  1449  			p.parseImpliedToken(StartTagToken, a.Tbody, a.Tbody.String())
  1450  			return false
  1451  		case a.Table:
  1452  			if p.popUntil(tableScope, a.Table) {
  1453  				p.resetInsertionMode()
  1454  				return false
  1455  			}
  1456  			// Ignore the token.
  1457  			return true
  1458  		case a.Style, a.Script, a.Template:
  1459  			return inHeadIM(p)
  1460  		case a.Input:
  1461  			for _, t := range p.tok.Attr {
  1462  				if t.Key == "type" && strings.ToLower(t.Val) == "hidden" {
  1463  					p.addElement()
  1464  					p.oe.pop()
  1465  					return true
  1466  				}
  1467  			}
  1468  			// Otherwise drop down to the default action.
  1469  		case a.Form:
  1470  			if p.oe.contains(a.Template) || p.form != nil {
  1471  				// Ignore the token.
  1472  				return true
  1473  			}
  1474  			p.addElement()
  1475  			p.form = p.oe.pop()
  1476  		case a.Select:
  1477  			p.reconstructActiveFormattingElements()
  1478  			switch p.top().DataAtom {
  1479  			case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
  1480  				p.fosterParenting = true
  1481  			}
  1482  			p.addElement()
  1483  			p.fosterParenting = false
  1484  			p.framesetOK = false
  1485  			p.im = inSelectInTableIM
  1486  			return true
  1487  		}
  1488  	case EndTagToken:
  1489  		switch p.tok.DataAtom {
  1490  		case a.Table:
  1491  			if p.popUntil(tableScope, a.Table) {
  1492  				p.resetInsertionMode()
  1493  				return true
  1494  			}
  1495  			// Ignore the token.
  1496  			return true
  1497  		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
  1498  			// Ignore the token.
  1499  			return true
  1500  		case a.Template:
  1501  			return inHeadIM(p)
  1502  		}
  1503  	case CommentToken:
  1504  		p.addChild(&Node{
  1505  			Type: CommentNode,
  1506  			Data: p.tok.Data,
  1507  		})
  1508  		return true
  1509  	case DoctypeToken:
  1510  		// Ignore the token.
  1511  		return true
  1512  	case ErrorToken:
  1513  		return inBodyIM(p)
  1514  	}
  1515  
  1516  	p.fosterParenting = true
  1517  	defer func() { p.fosterParenting = false }()
  1518  
  1519  	return inBodyIM(p)
  1520  }
  1521  
  1522  // Section 12.2.6.4.11.
  1523  func inCaptionIM(p *parser) bool {
  1524  	switch p.tok.Type {
  1525  	case StartTagToken:
  1526  		switch p.tok.DataAtom {
  1527  		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:
  1528  			if !p.popUntil(tableScope, a.Caption) {
  1529  				// Ignore the token.
  1530  				return true
  1531  			}
  1532  			p.clearActiveFormattingElements()
  1533  			p.im = inTableIM
  1534  			return false
  1535  		case a.Select:
  1536  			p.reconstructActiveFormattingElements()
  1537  			p.addElement()
  1538  			p.framesetOK = false
  1539  			p.im = inSelectInTableIM
  1540  			return true
  1541  		}
  1542  	case EndTagToken:
  1543  		switch p.tok.DataAtom {
  1544  		case a.Caption:
  1545  			if p.popUntil(tableScope, a.Caption) {
  1546  				p.clearActiveFormattingElements()
  1547  				p.im = inTableIM
  1548  			}
  1549  			return true
  1550  		case a.Table:
  1551  			if !p.popUntil(tableScope, a.Caption) {
  1552  				// Ignore the token.
  1553  				return true
  1554  			}
  1555  			p.clearActiveFormattingElements()
  1556  			p.im = inTableIM
  1557  			return false
  1558  		case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
  1559  			// Ignore the token.
  1560  			return true
  1561  		}
  1562  	}
  1563  	return inBodyIM(p)
  1564  }
  1565  
  1566  // Section 12.2.6.4.12.
  1567  func inColumnGroupIM(p *parser) bool {
  1568  	switch p.tok.Type {
  1569  	case TextToken:
  1570  		s := strings.TrimLeft(p.tok.Data, whitespace)
  1571  		if len(s) < len(p.tok.Data) {
  1572  			// Add the initial whitespace to the current node.
  1573  			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
  1574  			if s == "" {
  1575  				return true
  1576  			}
  1577  			p.tok.Data = s
  1578  		}
  1579  	case CommentToken:
  1580  		p.addChild(&Node{
  1581  			Type: CommentNode,
  1582  			Data: p.tok.Data,
  1583  		})
  1584  		return true
  1585  	case DoctypeToken:
  1586  		// Ignore the token.
  1587  		return true
  1588  	case StartTagToken:
  1589  		switch p.tok.DataAtom {
  1590  		case a.Html:
  1591  			return inBodyIM(p)
  1592  		case a.Col:
  1593  			p.addElement()
  1594  			p.oe.pop()
  1595  			p.acknowledgeSelfClosingTag()
  1596  			return true
  1597  		case a.Template:
  1598  			return inHeadIM(p)
  1599  		}
  1600  	case EndTagToken:
  1601  		switch p.tok.DataAtom {
  1602  		case a.Colgroup:
  1603  			if p.oe.top().DataAtom == a.Colgroup {
  1604  				p.oe.pop()
  1605  				p.im = inTableIM
  1606  			}
  1607  			return true
  1608  		case a.Col:
  1609  			// Ignore the token.
  1610  			return true
  1611  		case a.Template:
  1612  			return inHeadIM(p)
  1613  		}
  1614  	case ErrorToken:
  1615  		return inBodyIM(p)
  1616  	}
  1617  	if p.oe.top().DataAtom != a.Colgroup {
  1618  		return true
  1619  	}
  1620  	p.oe.pop()
  1621  	p.im = inTableIM
  1622  	return false
  1623  }
  1624  
  1625  // Section 12.2.6.4.13.
  1626  func inTableBodyIM(p *parser) bool {
  1627  	switch p.tok.Type {
  1628  	case StartTagToken:
  1629  		switch p.tok.DataAtom {
  1630  		case a.Tr:
  1631  			p.clearStackToContext(tableBodyScope)
  1632  			p.addElement()
  1633  			p.im = inRowIM
  1634  			return true
  1635  		case a.Td, a.Th:
  1636  			p.parseImpliedToken(StartTagToken, a.Tr, a.Tr.String())
  1637  			return false
  1638  		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
  1639  			if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
  1640  				p.im = inTableIM
  1641  				return false
  1642  			}
  1643  			// Ignore the token.
  1644  			return true
  1645  		}
  1646  	case EndTagToken:
  1647  		switch p.tok.DataAtom {
  1648  		case a.Tbody, a.Tfoot, a.Thead:
  1649  			if p.elementInScope(tableScope, p.tok.DataAtom) {
  1650  				p.clearStackToContext(tableBodyScope)
  1651  				p.oe.pop()
  1652  				p.im = inTableIM
  1653  			}
  1654  			return true
  1655  		case a.Table:
  1656  			if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
  1657  				p.im = inTableIM
  1658  				return false
  1659  			}
  1660  			// Ignore the token.
  1661  			return true
  1662  		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th, a.Tr:
  1663  			// Ignore the token.
  1664  			return true
  1665  		}
  1666  	case CommentToken:
  1667  		p.addChild(&Node{
  1668  			Type: CommentNode,
  1669  			Data: p.tok.Data,
  1670  		})
  1671  		return true
  1672  	}
  1673  
  1674  	return inTableIM(p)
  1675  }
  1676  
  1677  // Section 12.2.6.4.14.
  1678  func inRowIM(p *parser) bool {
  1679  	switch p.tok.Type {
  1680  	case StartTagToken:
  1681  		switch p.tok.DataAtom {
  1682  		case a.Td, a.Th:
  1683  			p.clearStackToContext(tableRowScope)
  1684  			p.addElement()
  1685  			p.afe = append(p.afe, &scopeMarker)
  1686  			p.im = inCellIM
  1687  			return true
  1688  		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead, a.Tr:
  1689  			if p.popUntil(tableScope, a.Tr) {
  1690  				p.im = inTableBodyIM
  1691  				return false
  1692  			}
  1693  			// Ignore the token.
  1694  			return true
  1695  		}
  1696  	case EndTagToken:
  1697  		switch p.tok.DataAtom {
  1698  		case a.Tr:
  1699  			if p.popUntil(tableScope, a.Tr) {
  1700  				p.im = inTableBodyIM
  1701  				return true
  1702  			}
  1703  			// Ignore the token.
  1704  			return true
  1705  		case a.Table:
  1706  			if p.popUntil(tableScope, a.Tr) {
  1707  				p.im = inTableBodyIM
  1708  				return false
  1709  			}
  1710  			// Ignore the token.
  1711  			return true
  1712  		case a.Tbody, a.Tfoot, a.Thead:
  1713  			if p.elementInScope(tableScope, p.tok.DataAtom) {
  1714  				p.parseImpliedToken(EndTagToken, a.Tr, a.Tr.String())
  1715  				return false
  1716  			}
  1717  			// Ignore the token.
  1718  			return true
  1719  		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th:
  1720  			// Ignore the token.
  1721  			return true
  1722  		}
  1723  	}
  1724  
  1725  	return inTableIM(p)
  1726  }
  1727  
  1728  // Section 12.2.6.4.15.
  1729  func inCellIM(p *parser) bool {
  1730  	switch p.tok.Type {
  1731  	case StartTagToken:
  1732  		switch p.tok.DataAtom {
  1733  		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
  1734  			if p.popUntil(tableScope, a.Td, a.Th) {
  1735  				// Close the cell and reprocess.
  1736  				p.clearActiveFormattingElements()
  1737  				p.im = inRowIM
  1738  				return false
  1739  			}
  1740  			// Ignore the token.
  1741  			return true
  1742  		case a.Select:
  1743  			p.reconstructActiveFormattingElements()
  1744  			p.addElement()
  1745  			p.framesetOK = false
  1746  			p.im = inSelectInTableIM
  1747  			return true
  1748  		}
  1749  	case EndTagToken:
  1750  		switch p.tok.DataAtom {
  1751  		case a.Td, a.Th:
  1752  			if !p.popUntil(tableScope, p.tok.DataAtom) {
  1753  				// Ignore the token.
  1754  				return true
  1755  			}
  1756  			p.clearActiveFormattingElements()
  1757  			p.im = inRowIM
  1758  			return true
  1759  		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html:
  1760  			// Ignore the token.
  1761  			return true
  1762  		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
  1763  			if !p.elementInScope(tableScope, p.tok.DataAtom) {
  1764  				// Ignore the token.
  1765  				return true
  1766  			}
  1767  			// Close the cell and reprocess.
  1768  			if p.popUntil(tableScope, a.Td, a.Th) {
  1769  				p.clearActiveFormattingElements()
  1770  			}
  1771  			p.im = inRowIM
  1772  			return false
  1773  		}
  1774  	}
  1775  	return inBodyIM(p)
  1776  }
  1777  
  1778  // Section 12.2.6.4.16.
  1779  func inSelectIM(p *parser) bool {
  1780  	switch p.tok.Type {
  1781  	case TextToken:
  1782  		p.addText(strings.Replace(p.tok.Data, "\x00", "", -1))
  1783  	case StartTagToken:
  1784  		switch p.tok.DataAtom {
  1785  		case a.Html:
  1786  			return inBodyIM(p)
  1787  		case a.Option:
  1788  			if p.top().DataAtom == a.Option {
  1789  				p.oe.pop()
  1790  			}
  1791  			p.addElement()
  1792  		case a.Optgroup:
  1793  			if p.top().DataAtom == a.Option {
  1794  				p.oe.pop()
  1795  			}
  1796  			if p.top().DataAtom == a.Optgroup {
  1797  				p.oe.pop()
  1798  			}
  1799  			p.addElement()
  1800  		case a.Select:
  1801  			if !p.popUntil(selectScope, a.Select) {
  1802  				// Ignore the token.
  1803  				return true
  1804  			}
  1805  			p.resetInsertionMode()
  1806  		case a.Input, a.Keygen, a.Textarea:
  1807  			if p.elementInScope(selectScope, a.Select) {
  1808  				p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
  1809  				return false
  1810  			}
  1811  			// In order to properly ignore <textarea>, we need to change the tokenizer mode.
  1812  			p.tokenizer.NextIsNotRawText()
  1813  			// Ignore the token.
  1814  			return true
  1815  		case a.Script, a.Template:
  1816  			return inHeadIM(p)
  1817  		case a.Iframe, a.Noembed, a.Noframes, a.Noscript, a.Plaintext, a.Style, a.Title, a.Xmp:
  1818  			// Don't let the tokenizer go into raw text mode when there are raw tags
  1819  			// to be ignored. These tags should be ignored from the tokenizer
  1820  			// properly.
  1821  			p.tokenizer.NextIsNotRawText()
  1822  			// Ignore the token.
  1823  			return true
  1824  		}
  1825  	case EndTagToken:
  1826  		switch p.tok.DataAtom {
  1827  		case a.Option:
  1828  			if p.top().DataAtom == a.Option {
  1829  				p.oe.pop()
  1830  			}
  1831  		case a.Optgroup:
  1832  			i := len(p.oe) - 1
  1833  			if p.oe[i].DataAtom == a.Option {
  1834  				i--
  1835  			}
  1836  			if p.oe[i].DataAtom == a.Optgroup {
  1837  				p.oe = p.oe[:i]
  1838  			}
  1839  		case a.Select:
  1840  			if !p.popUntil(selectScope, a.Select) {
  1841  				// Ignore the token.
  1842  				return true
  1843  			}
  1844  			p.resetInsertionMode()
  1845  		case a.Template:
  1846  			return inHeadIM(p)
  1847  		}
  1848  	case CommentToken:
  1849  		p.addChild(&Node{
  1850  			Type: CommentNode,
  1851  			Data: p.tok.Data,
  1852  		})
  1853  	case DoctypeToken:
  1854  		// Ignore the token.
  1855  		return true
  1856  	case ErrorToken:
  1857  		return inBodyIM(p)
  1858  	}
  1859  
  1860  	return true
  1861  }
  1862  
  1863  // Section 12.2.6.4.17.
  1864  func inSelectInTableIM(p *parser) bool {
  1865  	switch p.tok.Type {
  1866  	case StartTagToken, EndTagToken:
  1867  		switch p.tok.DataAtom {
  1868  		case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th:
  1869  			if p.tok.Type == EndTagToken && !p.elementInScope(tableScope, p.tok.DataAtom) {
  1870  				// Ignore the token.
  1871  				return true
  1872  			}
  1873  			// This is like p.popUntil(selectScope, a.Select), but it also
  1874  			// matches <math select>, not just <select>. Matching the MathML
  1875  			// tag is arguably incorrect (conceptually), but it mimics what
  1876  			// Chromium does.
  1877  			for i := len(p.oe) - 1; i >= 0; i-- {
  1878  				if n := p.oe[i]; n.DataAtom == a.Select {
  1879  					p.oe = p.oe[:i]
  1880  					break
  1881  				}
  1882  			}
  1883  			p.resetInsertionMode()
  1884  			return false
  1885  		}
  1886  	}
  1887  	return inSelectIM(p)
  1888  }
  1889  
  1890  // Section 12.2.6.4.18.
  1891  func inTemplateIM(p *parser) bool {
  1892  	switch p.tok.Type {
  1893  	case TextToken, CommentToken, DoctypeToken:
  1894  		return inBodyIM(p)
  1895  	case StartTagToken:
  1896  		switch p.tok.DataAtom {
  1897  		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
  1898  			return inHeadIM(p)
  1899  		case a.Caption, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
  1900  			p.templateStack.pop()
  1901  			p.templateStack = append(p.templateStack, inTableIM)
  1902  			p.im = inTableIM
  1903  			return false
  1904  		case a.Col:
  1905  			p.templateStack.pop()
  1906  			p.templateStack = append(p.templateStack, inColumnGroupIM)
  1907  			p.im = inColumnGroupIM
  1908  			return false
  1909  		case a.Tr:
  1910  			p.templateStack.pop()
  1911  			p.templateStack = append(p.templateStack, inTableBodyIM)
  1912  			p.im = inTableBodyIM
  1913  			return false
  1914  		case a.Td, a.Th:
  1915  			p.templateStack.pop()
  1916  			p.templateStack = append(p.templateStack, inRowIM)
  1917  			p.im = inRowIM
  1918  			return false
  1919  		default:
  1920  			p.templateStack.pop()
  1921  			p.templateStack = append(p.templateStack, inBodyIM)
  1922  			p.im = inBodyIM
  1923  			return false
  1924  		}
  1925  	case EndTagToken:
  1926  		switch p.tok.DataAtom {
  1927  		case a.Template:
  1928  			return inHeadIM(p)
  1929  		default:
  1930  			// Ignore the token.
  1931  			return true
  1932  		}
  1933  	case ErrorToken:
  1934  		if !p.oe.contains(a.Template) {
  1935  			// Ignore the token.
  1936  			return true
  1937  		}
  1938  		// TODO: remove this divergence from the HTML5 spec.
  1939  		//
  1940  		// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
  1941  		p.generateImpliedEndTags()
  1942  		for i := len(p.oe) - 1; i >= 0; i-- {
  1943  			if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
  1944  				p.oe = p.oe[:i]
  1945  				break
  1946  			}
  1947  		}
  1948  		p.clearActiveFormattingElements()
  1949  		p.templateStack.pop()
  1950  		p.resetInsertionMode()
  1951  		return false
  1952  	}
  1953  	return false
  1954  }
  1955  
  1956  // Section 12.2.6.4.19.
  1957  func afterBodyIM(p *parser) bool {
  1958  	switch p.tok.Type {
  1959  	case ErrorToken:
  1960  		// Stop parsing.
  1961  		return true
  1962  	case TextToken:
  1963  		s := strings.TrimLeft(p.tok.Data, whitespace)
  1964  		if len(s) == 0 {
  1965  			// It was all whitespace.
  1966  			return inBodyIM(p)
  1967  		}
  1968  	case StartTagToken:
  1969  		if p.tok.DataAtom == a.Html {
  1970  			return inBodyIM(p)
  1971  		}
  1972  	case EndTagToken:
  1973  		if p.tok.DataAtom == a.Html {
  1974  			if !p.fragment {
  1975  				p.im = afterAfterBodyIM
  1976  			}
  1977  			return true
  1978  		}
  1979  	case CommentToken:
  1980  		// The comment is attached to the <html> element.
  1981  		if len(p.oe) < 1 || p.oe[0].DataAtom != a.Html {
  1982  			panic("html: bad parser state: <html> element not found, in the after-body insertion mode")
  1983  		}
  1984  		p.oe[0].AppendChild(&Node{
  1985  			Type: CommentNode,
  1986  			Data: p.tok.Data,
  1987  		})
  1988  		return true
  1989  	}
  1990  	p.im = inBodyIM
  1991  	return false
  1992  }
  1993  
  1994  // Section 12.2.6.4.20.
  1995  func inFramesetIM(p *parser) bool {
  1996  	switch p.tok.Type {
  1997  	case CommentToken:
  1998  		p.addChild(&Node{
  1999  			Type: CommentNode,
  2000  			Data: p.tok.Data,
  2001  		})
  2002  	case TextToken:
  2003  		// Ignore all text but whitespace.
  2004  		s := strings.Map(func(c rune) rune {
  2005  			switch c {
  2006  			case ' ', '\t', '\n', '\f', '\r':
  2007  				return c
  2008  			}
  2009  			return -1
  2010  		}, p.tok.Data)
  2011  		if s != "" {
  2012  			p.addText(s)
  2013  		}
  2014  	case StartTagToken:
  2015  		switch p.tok.DataAtom {
  2016  		case a.Html:
  2017  			return inBodyIM(p)
  2018  		case a.Frameset:
  2019  			p.addElement()
  2020  		case a.Frame:
  2021  			p.addElement()
  2022  			p.oe.pop()
  2023  			p.acknowledgeSelfClosingTag()
  2024  		case a.Noframes:
  2025  			return inHeadIM(p)
  2026  		}
  2027  	case EndTagToken:
  2028  		switch p.tok.DataAtom {
  2029  		case a.Frameset:
  2030  			if p.oe.top().DataAtom != a.Html {
  2031  				p.oe.pop()
  2032  				if p.oe.top().DataAtom != a.Frameset {
  2033  					p.im = afterFramesetIM
  2034  					return true
  2035  				}
  2036  			}
  2037  		}
  2038  	default:
  2039  		// Ignore the token.
  2040  	}
  2041  	return true
  2042  }
  2043  
  2044  // Section 12.2.6.4.21.
  2045  func afterFramesetIM(p *parser) bool {
  2046  	switch p.tok.Type {
  2047  	case CommentToken:
  2048  		p.addChild(&Node{
  2049  			Type: CommentNode,
  2050  			Data: p.tok.Data,
  2051  		})
  2052  	case TextToken:
  2053  		// Ignore all text but whitespace.
  2054  		s := strings.Map(func(c rune) rune {
  2055  			switch c {
  2056  			case ' ', '\t', '\n', '\f', '\r':
  2057  				return c
  2058  			}
  2059  			return -1
  2060  		}, p.tok.Data)
  2061  		if s != "" {
  2062  			p.addText(s)
  2063  		}
  2064  	case StartTagToken:
  2065  		switch p.tok.DataAtom {
  2066  		case a.Html:
  2067  			return inBodyIM(p)
  2068  		case a.Noframes:
  2069  			return inHeadIM(p)
  2070  		}
  2071  	case EndTagToken:
  2072  		switch p.tok.DataAtom {
  2073  		case a.Html:
  2074  			p.im = afterAfterFramesetIM
  2075  			return true
  2076  		}
  2077  	default:
  2078  		// Ignore the token.
  2079  	}
  2080  	return true
  2081  }
  2082  
  2083  // Section 12.2.6.4.22.
  2084  func afterAfterBodyIM(p *parser) bool {
  2085  	switch p.tok.Type {
  2086  	case ErrorToken:
  2087  		// Stop parsing.
  2088  		return true
  2089  	case TextToken:
  2090  		s := strings.TrimLeft(p.tok.Data, whitespace)
  2091  		if len(s) == 0 {
  2092  			// It was all whitespace.
  2093  			return inBodyIM(p)
  2094  		}
  2095  	case StartTagToken:
  2096  		if p.tok.DataAtom == a.Html {
  2097  			return inBodyIM(p)
  2098  		}
  2099  	case CommentToken:
  2100  		p.doc.AppendChild(&Node{
  2101  			Type: CommentNode,
  2102  			Data: p.tok.Data,
  2103  		})
  2104  		return true
  2105  	case DoctypeToken:
  2106  		return inBodyIM(p)
  2107  	}
  2108  	p.im = inBodyIM
  2109  	return false
  2110  }
  2111  
  2112  // Section 12.2.6.4.23.
  2113  func afterAfterFramesetIM(p *parser) bool {
  2114  	switch p.tok.Type {
  2115  	case CommentToken:
  2116  		p.doc.AppendChild(&Node{
  2117  			Type: CommentNode,
  2118  			Data: p.tok.Data,
  2119  		})
  2120  	case TextToken:
  2121  		// Ignore all text but whitespace.
  2122  		s := strings.Map(func(c rune) rune {
  2123  			switch c {
  2124  			case ' ', '\t', '\n', '\f', '\r':
  2125  				return c
  2126  			}
  2127  			return -1
  2128  		}, p.tok.Data)
  2129  		if s != "" {
  2130  			p.tok.Data = s
  2131  			return inBodyIM(p)
  2132  		}
  2133  	case StartTagToken:
  2134  		switch p.tok.DataAtom {
  2135  		case a.Html:
  2136  			return inBodyIM(p)
  2137  		case a.Noframes:
  2138  			return inHeadIM(p)
  2139  		}
  2140  	case DoctypeToken:
  2141  		return inBodyIM(p)
  2142  	default:
  2143  		// Ignore the token.
  2144  	}
  2145  	return true
  2146  }
  2147  
  2148  func ignoreTheRemainingTokens(p *parser) bool {
  2149  	return true
  2150  }
  2151  
  2152  const whitespaceOrNUL = whitespace + "\x00"
  2153  
  2154  // Section 12.2.6.5
  2155  func parseForeignContent(p *parser) bool {
  2156  	switch p.tok.Type {
  2157  	case TextToken:
  2158  		if p.framesetOK {
  2159  			p.framesetOK = strings.TrimLeft(p.tok.Data, whitespaceOrNUL) == ""
  2160  		}
  2161  		p.tok.Data = strings.Replace(p.tok.Data, "\x00", "\ufffd", -1)
  2162  		p.addText(p.tok.Data)
  2163  	case CommentToken:
  2164  		p.addChild(&Node{
  2165  			Type: CommentNode,
  2166  			Data: p.tok.Data,
  2167  		})
  2168  	case StartTagToken:
  2169  		if !p.fragment {
  2170  			b := breakout[p.tok.Data]
  2171  			if p.tok.DataAtom == a.Font {
  2172  			loop:
  2173  				for _, attr := range p.tok.Attr {
  2174  					switch attr.Key {
  2175  					case "color", "face", "size":
  2176  						b = true
  2177  						break loop
  2178  					}
  2179  				}
  2180  			}
  2181  			if b {
  2182  				for i := len(p.oe) - 1; i >= 0; i-- {
  2183  					n := p.oe[i]
  2184  					if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
  2185  						p.oe = p.oe[:i+1]
  2186  						break
  2187  					}
  2188  				}
  2189  				return false
  2190  			}
  2191  		}
  2192  		current := p.adjustedCurrentNode()
  2193  		switch current.Namespace {
  2194  		case "math":
  2195  			adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
  2196  		case "svg":
  2197  			// Adjust SVG tag names. The tokenizer lower-cases tag names, but
  2198  			// SVG wants e.g. "foreignObject" with a capital second "O".
  2199  			if x := svgTagNameAdjustments[p.tok.Data]; x != "" {
  2200  				p.tok.DataAtom = a.Lookup([]byte(x))
  2201  				p.tok.Data = x
  2202  			}
  2203  			adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
  2204  		default:
  2205  			panic("html: bad parser state: unexpected namespace")
  2206  		}
  2207  		adjustForeignAttributes(p.tok.Attr)
  2208  		namespace := current.Namespace
  2209  		p.addElement()
  2210  		p.top().Namespace = namespace
  2211  		if namespace != "" {
  2212  			// Don't let the tokenizer go into raw text mode in foreign content
  2213  			// (e.g. in an SVG <title> tag).
  2214  			p.tokenizer.NextIsNotRawText()
  2215  		}
  2216  		if p.hasSelfClosingToken {
  2217  			p.oe.pop()
  2218  			p.acknowledgeSelfClosingTag()
  2219  		}
  2220  	case EndTagToken:
  2221  		for i := len(p.oe) - 1; i >= 0; i-- {
  2222  			if p.oe[i].Namespace == "" {
  2223  				return p.im(p)
  2224  			}
  2225  			if strings.EqualFold(p.oe[i].Data, p.tok.Data) {
  2226  				p.oe = p.oe[:i]
  2227  				break
  2228  			}
  2229  		}
  2230  		return true
  2231  	default:
  2232  		// Ignore the token.
  2233  	}
  2234  	return true
  2235  }
  2236  
  2237  // Section 12.2.4.2.
  2238  func (p *parser) adjustedCurrentNode() *Node {
  2239  	if len(p.oe) == 1 && p.fragment && p.context != nil {
  2240  		return p.context
  2241  	}
  2242  	return p.oe.top()
  2243  }
  2244  
  2245  // Section 12.2.6.
  2246  func (p *parser) inForeignContent() bool {
  2247  	if len(p.oe) == 0 {
  2248  		return false
  2249  	}
  2250  	n := p.adjustedCurrentNode()
  2251  	if n.Namespace == "" {
  2252  		return false
  2253  	}
  2254  	if mathMLTextIntegrationPoint(n) {
  2255  		if p.tok.Type == StartTagToken && p.tok.DataAtom != a.Mglyph && p.tok.DataAtom != a.Malignmark {
  2256  			return false
  2257  		}
  2258  		if p.tok.Type == TextToken {
  2259  			return false
  2260  		}
  2261  	}
  2262  	if n.Namespace == "math" && n.DataAtom == a.AnnotationXml && p.tok.Type == StartTagToken && p.tok.DataAtom == a.Svg {
  2263  		return false
  2264  	}
  2265  	if htmlIntegrationPoint(n) && (p.tok.Type == StartTagToken || p.tok.Type == TextToken) {
  2266  		return false
  2267  	}
  2268  	if p.tok.Type == ErrorToken {
  2269  		return false
  2270  	}
  2271  	return true
  2272  }
  2273  
  2274  // parseImpliedToken parses a token as though it had appeared in the parser's
  2275  // input.
  2276  func (p *parser) parseImpliedToken(t TokenType, dataAtom a.Atom, data string) {
  2277  	realToken, selfClosing := p.tok, p.hasSelfClosingToken
  2278  	p.tok = Token{
  2279  		Type:     t,
  2280  		DataAtom: dataAtom,
  2281  		Data:     data,
  2282  	}
  2283  	p.hasSelfClosingToken = false
  2284  	p.parseCurrentToken()
  2285  	p.tok, p.hasSelfClosingToken = realToken, selfClosing
  2286  }
  2287  
  2288  // parseCurrentToken runs the current token through the parsing routines
  2289  // until it is consumed.
  2290  func (p *parser) parseCurrentToken() {
  2291  	if p.tok.Type == SelfClosingTagToken {
  2292  		p.hasSelfClosingToken = true
  2293  		p.tok.Type = StartTagToken
  2294  	}
  2295  
  2296  	consumed := false
  2297  	for !consumed {
  2298  		if p.inForeignContent() {
  2299  			consumed = parseForeignContent(p)
  2300  		} else {
  2301  			consumed = p.im(p)
  2302  		}
  2303  	}
  2304  
  2305  	if p.hasSelfClosingToken {
  2306  		// This is a parse error, but ignore it.
  2307  		p.hasSelfClosingToken = false
  2308  	}
  2309  }
  2310  
  2311  func (p *parser) parse() error {
  2312  	// Iterate until EOF. Any other error will cause an early return.
  2313  	var err error
  2314  	for err != io.EOF {
  2315  		// CDATA sections are allowed only in foreign content.
  2316  		n := p.oe.top()
  2317  		p.tokenizer.AllowCDATA(n != nil && n.Namespace != "")
  2318  		// Read and parse the next token.
  2319  		p.tokenizer.Next()
  2320  		p.tok = p.tokenizer.Token()
  2321  		if p.tok.Type == ErrorToken {
  2322  			err = p.tokenizer.Err()
  2323  			if err != nil && err != io.EOF {
  2324  				return err
  2325  			}
  2326  		}
  2327  		p.parseCurrentToken()
  2328  	}
  2329  	return nil
  2330  }
  2331  
  2332  // Parse returns the parse tree for the HTML from the given Reader.
  2333  //
  2334  // It implements the HTML5 parsing algorithm
  2335  // (https://html.spec.whatwg.org/multipage/syntax.html#tree-construction),
  2336  // which is very complicated. The resultant tree can contain implicitly created
  2337  // nodes that have no explicit <tag> listed in r's data, and nodes' parents can
  2338  // differ from the nesting implied by a naive processing of start and end
  2339  // <tag>s. Conversely, explicit <tag>s in r's data can be silently dropped,
  2340  // with no corresponding node in the resulting tree.
  2341  //
  2342  // The input is assumed to be UTF-8 encoded.
  2343  func Parse(r io.Reader) (*Node, error) {
  2344  	return ParseWithOptions(r)
  2345  }
  2346  
  2347  // ParseFragment parses a fragment of HTML and returns the nodes that were
  2348  // found. If the fragment is the InnerHTML for an existing element, pass that
  2349  // element in context.
  2350  //
  2351  // It has the same intricacies as Parse.
  2352  func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
  2353  	return ParseFragmentWithOptions(r, context)
  2354  }
  2355  
  2356  // ParseOption configures a parser.
  2357  type ParseOption func(p *parser)
  2358  
  2359  // ParseOptionEnableScripting configures the scripting flag.
  2360  // https://html.spec.whatwg.org/multipage/webappapis.html#enabling-and-disabling-scripting
  2361  //
  2362  // By default, scripting is enabled.
  2363  func ParseOptionEnableScripting(enable bool) ParseOption {
  2364  	return func(p *parser) {
  2365  		p.scripting = enable
  2366  	}
  2367  }
  2368  
  2369  // ParseWithOptions is like Parse, with options.
  2370  func ParseWithOptions(r io.Reader, opts ...ParseOption) (*Node, error) {
  2371  	p := &parser{
  2372  		tokenizer: NewTokenizer(r),
  2373  		doc: &Node{
  2374  			Type: DocumentNode,
  2375  		},
  2376  		scripting:  true,
  2377  		framesetOK: true,
  2378  		im:         initialIM,
  2379  	}
  2380  
  2381  	for _, f := range opts {
  2382  		f(p)
  2383  	}
  2384  
  2385  	if err := p.parse(); err != nil {
  2386  		return nil, err
  2387  	}
  2388  	return p.doc, nil
  2389  }
  2390  
  2391  // ParseFragmentWithOptions is like ParseFragment, with options.
  2392  func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) ([]*Node, error) {
  2393  	contextTag := ""
  2394  	if context != nil {
  2395  		if context.Type != ElementNode {
  2396  			return nil, errors.New("html: ParseFragment of non-element Node")
  2397  		}
  2398  		// The next check isn't just context.DataAtom.String() == context.Data because
  2399  		// it is valid to pass an element whose tag isn't a known atom. For example,
  2400  		// DataAtom == 0 and Data = "tagfromthefuture" is perfectly consistent.
  2401  		if context.DataAtom != a.Lookup([]byte(context.Data)) {
  2402  			return nil, fmt.Errorf("html: inconsistent Node: DataAtom=%q, Data=%q", context.DataAtom, context.Data)
  2403  		}
  2404  		contextTag = context.DataAtom.String()
  2405  	}
  2406  	p := &parser{
  2407  		doc: &Node{
  2408  			Type: DocumentNode,
  2409  		},
  2410  		scripting: true,
  2411  		fragment:  true,
  2412  		context:   context,
  2413  	}
  2414  	if context != nil && context.Namespace != "" {
  2415  		p.tokenizer = NewTokenizer(r)
  2416  	} else {
  2417  		p.tokenizer = NewTokenizerFragment(r, contextTag)
  2418  	}
  2419  
  2420  	for _, f := range opts {
  2421  		f(p)
  2422  	}
  2423  
  2424  	root := &Node{
  2425  		Type:     ElementNode,
  2426  		DataAtom: a.Html,
  2427  		Data:     a.Html.String(),
  2428  	}
  2429  	p.doc.AppendChild(root)
  2430  	p.oe = nodeStack{root}
  2431  	if context != nil && context.DataAtom == a.Template {
  2432  		p.templateStack = append(p.templateStack, inTemplateIM)
  2433  	}
  2434  	p.resetInsertionMode()
  2435  
  2436  	for n := context; n != nil; n = n.Parent {
  2437  		if n.Type == ElementNode && n.DataAtom == a.Form {
  2438  			p.form = n
  2439  			break
  2440  		}
  2441  	}
  2442  
  2443  	if err := p.parse(); err != nil {
  2444  		return nil, err
  2445  	}
  2446  
  2447  	parent := p.doc
  2448  	if context != nil {
  2449  		parent = root
  2450  	}
  2451  
  2452  	var result []*Node
  2453  	for c := parent.FirstChild; c != nil; {
  2454  		next := c.NextSibling
  2455  		parent.RemoveChild(c)
  2456  		result = append(result, c)
  2457  		c = next
  2458  	}
  2459  	return result, nil
  2460  }
  2461  

View as plain text