...

Source file src/golang.org/x/net/html/render.go

Documentation: golang.org/x/net/html

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package html
     6  
     7  import (
     8  	"bufio"
     9  	"errors"
    10  	"fmt"
    11  	"io"
    12  	"strings"
    13  )
    14  
    15  type writer interface {
    16  	io.Writer
    17  	io.ByteWriter
    18  	WriteString(string) (int, error)
    19  }
    20  
    21  // Render renders the parse tree n to the given writer.
    22  //
    23  // Rendering is done on a 'best effort' basis: calling Parse on the output of
    24  // Render will always result in something similar to the original tree, but it
    25  // is not necessarily an exact clone unless the original tree was 'well-formed'.
    26  // 'Well-formed' is not easily specified; the HTML5 specification is
    27  // complicated.
    28  //
    29  // Calling Parse on arbitrary input typically results in a 'well-formed' parse
    30  // tree. However, it is possible for Parse to yield a 'badly-formed' parse tree.
    31  // For example, in a 'well-formed' parse tree, no <a> element is a child of
    32  // another <a> element: parsing "<a><a>" results in two sibling elements.
    33  // Similarly, in a 'well-formed' parse tree, no <a> element is a child of a
    34  // <table> element: parsing "<p><table><a>" results in a <p> with two sibling
    35  // children; the <a> is reparented to the <table>'s parent. However, calling
    36  // Parse on "<a><table><a>" does not return an error, but the result has an <a>
    37  // element with an <a> child, and is therefore not 'well-formed'.
    38  //
    39  // Programmatically constructed trees are typically also 'well-formed', but it
    40  // is possible to construct a tree that looks innocuous but, when rendered and
    41  // re-parsed, results in a different tree. A simple example is that a solitary
    42  // text node would become a tree containing <html>, <head> and <body> elements.
    43  // Another example is that the programmatic equivalent of "a<head>b</head>c"
    44  // becomes "<html><head><head/><body>abc</body></html>".
    45  func Render(w io.Writer, n *Node) error {
    46  	if x, ok := w.(writer); ok {
    47  		return render(x, n)
    48  	}
    49  	buf := bufio.NewWriter(w)
    50  	if err := render(buf, n); err != nil {
    51  		return err
    52  	}
    53  	return buf.Flush()
    54  }
    55  
    56  // plaintextAbort is returned from render1 when a <plaintext> element
    57  // has been rendered. No more end tags should be rendered after that.
    58  var plaintextAbort = errors.New("html: internal error (plaintext abort)")
    59  
    60  func render(w writer, n *Node) error {
    61  	err := render1(w, n)
    62  	if err == plaintextAbort {
    63  		err = nil
    64  	}
    65  	return err
    66  }
    67  
    68  func render1(w writer, n *Node) error {
    69  	// Render non-element nodes; these are the easy cases.
    70  	switch n.Type {
    71  	case ErrorNode:
    72  		return errors.New("html: cannot render an ErrorNode node")
    73  	case TextNode:
    74  		return escape(w, n.Data)
    75  	case DocumentNode:
    76  		for c := n.FirstChild; c != nil; c = c.NextSibling {
    77  			if err := render1(w, c); err != nil {
    78  				return err
    79  			}
    80  		}
    81  		return nil
    82  	case ElementNode:
    83  		// No-op.
    84  	case CommentNode:
    85  		if _, err := w.WriteString("<!--"); err != nil {
    86  			return err
    87  		}
    88  		if err := escapeComment(w, n.Data); err != nil {
    89  			return err
    90  		}
    91  		if _, err := w.WriteString("-->"); err != nil {
    92  			return err
    93  		}
    94  		return nil
    95  	case DoctypeNode:
    96  		if _, err := w.WriteString("<!DOCTYPE "); err != nil {
    97  			return err
    98  		}
    99  		if err := escape(w, n.Data); err != nil {
   100  			return err
   101  		}
   102  		if n.Attr != nil {
   103  			var p, s string
   104  			for _, a := range n.Attr {
   105  				switch a.Key {
   106  				case "public":
   107  					p = a.Val
   108  				case "system":
   109  					s = a.Val
   110  				}
   111  			}
   112  			if p != "" {
   113  				if _, err := w.WriteString(" PUBLIC "); err != nil {
   114  					return err
   115  				}
   116  				if err := writeQuoted(w, p); err != nil {
   117  					return err
   118  				}
   119  				if s != "" {
   120  					if err := w.WriteByte(' '); err != nil {
   121  						return err
   122  					}
   123  					if err := writeQuoted(w, s); err != nil {
   124  						return err
   125  					}
   126  				}
   127  			} else if s != "" {
   128  				if _, err := w.WriteString(" SYSTEM "); err != nil {
   129  					return err
   130  				}
   131  				if err := writeQuoted(w, s); err != nil {
   132  					return err
   133  				}
   134  			}
   135  		}
   136  		return w.WriteByte('>')
   137  	case RawNode:
   138  		_, err := w.WriteString(n.Data)
   139  		return err
   140  	default:
   141  		return errors.New("html: unknown node type")
   142  	}
   143  
   144  	// Render the <xxx> opening tag.
   145  	if err := w.WriteByte('<'); err != nil {
   146  		return err
   147  	}
   148  	if _, err := w.WriteString(n.Data); err != nil {
   149  		return err
   150  	}
   151  	for _, a := range n.Attr {
   152  		if err := w.WriteByte(' '); err != nil {
   153  			return err
   154  		}
   155  		if a.Namespace != "" {
   156  			if _, err := w.WriteString(a.Namespace); err != nil {
   157  				return err
   158  			}
   159  			if err := w.WriteByte(':'); err != nil {
   160  				return err
   161  			}
   162  		}
   163  		if _, err := w.WriteString(a.Key); err != nil {
   164  			return err
   165  		}
   166  		if _, err := w.WriteString(`="`); err != nil {
   167  			return err
   168  		}
   169  		if err := escape(w, a.Val); err != nil {
   170  			return err
   171  		}
   172  		if err := w.WriteByte('"'); err != nil {
   173  			return err
   174  		}
   175  	}
   176  	if voidElements[n.Data] {
   177  		if n.FirstChild != nil {
   178  			return fmt.Errorf("html: void element <%s> has child nodes", n.Data)
   179  		}
   180  		_, err := w.WriteString("/>")
   181  		return err
   182  	}
   183  	if err := w.WriteByte('>'); err != nil {
   184  		return err
   185  	}
   186  
   187  	// Add initial newline where there is danger of a newline beging ignored.
   188  	if c := n.FirstChild; c != nil && c.Type == TextNode && strings.HasPrefix(c.Data, "\n") {
   189  		switch n.Data {
   190  		case "pre", "listing", "textarea":
   191  			if err := w.WriteByte('\n'); err != nil {
   192  				return err
   193  			}
   194  		}
   195  	}
   196  
   197  	// Render any child nodes
   198  	if childTextNodesAreLiteral(n) {
   199  		for c := n.FirstChild; c != nil; c = c.NextSibling {
   200  			if c.Type == TextNode {
   201  				if _, err := w.WriteString(c.Data); err != nil {
   202  					return err
   203  				}
   204  			} else {
   205  				if err := render1(w, c); err != nil {
   206  					return err
   207  				}
   208  			}
   209  		}
   210  		if n.Data == "plaintext" {
   211  			// Don't render anything else. <plaintext> must be the
   212  			// last element in the file, with no closing tag.
   213  			return plaintextAbort
   214  		}
   215  	} else {
   216  		for c := n.FirstChild; c != nil; c = c.NextSibling {
   217  			if err := render1(w, c); err != nil {
   218  				return err
   219  			}
   220  		}
   221  	}
   222  
   223  	// Render the </xxx> closing tag.
   224  	if _, err := w.WriteString("</"); err != nil {
   225  		return err
   226  	}
   227  	if _, err := w.WriteString(n.Data); err != nil {
   228  		return err
   229  	}
   230  	return w.WriteByte('>')
   231  }
   232  
   233  func childTextNodesAreLiteral(n *Node) bool {
   234  	// Per WHATWG HTML 13.3, if the parent of the current node is a style,
   235  	// script, xmp, iframe, noembed, noframes, or plaintext element, and the
   236  	// current node is a text node, append the value of the node's data
   237  	// literally. The specification is not explicit about it, but we only
   238  	// enforce this if we are in the HTML namespace (i.e. when the namespace is
   239  	// "").
   240  	// NOTE: we also always include noscript elements, although the
   241  	// specification states that they should only be rendered as such if
   242  	// scripting is enabled for the node (which is not something we track).
   243  	if n.Namespace != "" {
   244  		return false
   245  	}
   246  	switch n.Data {
   247  	case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
   248  		return true
   249  	default:
   250  		return false
   251  	}
   252  }
   253  
   254  // writeQuoted writes s to w surrounded by quotes. Normally it will use double
   255  // quotes, but if s contains a double quote, it will use single quotes.
   256  // It is used for writing the identifiers in a doctype declaration.
   257  // In valid HTML, they can't contain both types of quotes.
   258  func writeQuoted(w writer, s string) error {
   259  	var q byte = '"'
   260  	if strings.Contains(s, `"`) {
   261  		q = '\''
   262  	}
   263  	if err := w.WriteByte(q); err != nil {
   264  		return err
   265  	}
   266  	if _, err := w.WriteString(s); err != nil {
   267  		return err
   268  	}
   269  	if err := w.WriteByte(q); err != nil {
   270  		return err
   271  	}
   272  	return nil
   273  }
   274  
   275  // Section 12.1.2, "Elements", gives this list of void elements. Void elements
   276  // are those that can't have any contents.
   277  var voidElements = map[string]bool{
   278  	"area":   true,
   279  	"base":   true,
   280  	"br":     true,
   281  	"col":    true,
   282  	"embed":  true,
   283  	"hr":     true,
   284  	"img":    true,
   285  	"input":  true,
   286  	"keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility.
   287  	"link":   true,
   288  	"meta":   true,
   289  	"param":  true,
   290  	"source": true,
   291  	"track":  true,
   292  	"wbr":    true,
   293  }
   294  

View as plain text