// Copyright 2010 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package html import ( "bufio" "bytes" "errors" "fmt" "io" "io/ioutil" "os" "path/filepath" "runtime" "sort" "strings" "testing" "golang.org/x/net/html/atom" ) type testAttrs struct { text, want, context string scripting bool } // readParseTest reads a single test case from r. func readParseTest(r *bufio.Reader) (*testAttrs, error) { ta := &testAttrs{scripting: true} line, err := r.ReadSlice('\n') if err != nil { return nil, err } var b []byte // Read the HTML. if string(line) != "#data\n" { return nil, fmt.Errorf(`got %q want "#data\n"`, line) } for { line, err = r.ReadSlice('\n') if err != nil { return nil, err } if line[0] == '#' { break } b = append(b, line...) } ta.text = strings.TrimSuffix(string(b), "\n") b = b[:0] // Skip the error list. if string(line) != "#errors\n" { return nil, fmt.Errorf(`got %q want "#errors\n"`, line) } for { line, err = r.ReadSlice('\n') if err != nil { return nil, err } if line[0] == '#' { break } } // Skip the new-errors list. if string(line) == "#new-errors\n" { for { line, err = r.ReadSlice('\n') if err != nil { return nil, err } if line[0] == '#' { break } } } if ls := string(line); strings.HasPrefix(ls, "#script-") { switch { case strings.HasSuffix(ls, "-on\n"): ta.scripting = true case strings.HasSuffix(ls, "-off\n"): ta.scripting = false default: return nil, fmt.Errorf(`got %q, want "#script-on" or "#script-off"`, line) } for { line, err = r.ReadSlice('\n') if err != nil { return nil, err } if line[0] == '#' { break } } } if string(line) == "#document-fragment\n" { line, err = r.ReadSlice('\n') if err != nil { return nil, err } ta.context = strings.TrimSpace(string(line)) line, err = r.ReadSlice('\n') if err != nil { return nil, err } } // Read the dump of what the parse tree should be. if string(line) != "#document\n" { return nil, fmt.Errorf(`got %q want "#document\n"`, line) } inQuote := false for { line, err = r.ReadSlice('\n') if err != nil && err != io.EOF { return nil, err } trimmed := bytes.Trim(line, "| \n") if len(trimmed) > 0 { if line[0] == '|' && trimmed[0] == '"' { inQuote = true } if trimmed[len(trimmed)-1] == '"' && !(line[0] == '|' && len(trimmed) == 1) { inQuote = false } } if len(line) == 0 || len(line) == 1 && line[0] == '\n' && !inQuote { break } b = append(b, line...) } ta.want = string(b) return ta, nil } func dumpIndent(w io.Writer, level int) { io.WriteString(w, "| ") for i := 0; i < level; i++ { io.WriteString(w, " ") } } type sortedAttributes []Attribute func (a sortedAttributes) Len() int { return len(a) } func (a sortedAttributes) Less(i, j int) bool { if a[i].Namespace != a[j].Namespace { return a[i].Namespace < a[j].Namespace } return a[i].Key < a[j].Key } func (a sortedAttributes) Swap(i, j int) { a[i], a[j] = a[j], a[i] } func dumpLevel(w io.Writer, n *Node, level int) error { dumpIndent(w, level) level++ switch n.Type { case ErrorNode: return errors.New("unexpected ErrorNode") case DocumentNode: return errors.New("unexpected DocumentNode") case ElementNode: if n.Namespace != "" { fmt.Fprintf(w, "<%s %s>", n.Namespace, n.Data) } else { fmt.Fprintf(w, "<%s>", n.Data) } attr := sortedAttributes(n.Attr) sort.Sort(attr) for _, a := range attr { io.WriteString(w, "\n") dumpIndent(w, level) if a.Namespace != "" { fmt.Fprintf(w, `%s %s="%s"`, a.Namespace, a.Key, a.Val) } else { fmt.Fprintf(w, `%s="%s"`, a.Key, a.Val) } } if n.Namespace == "" && n.DataAtom == atom.Template { io.WriteString(w, "\n") dumpIndent(w, level) level++ io.WriteString(w, "content") } case TextNode: fmt.Fprintf(w, `"%s"`, n.Data) case CommentNode: fmt.Fprintf(w, "", n.Data) case DoctypeNode: fmt.Fprintf(w, "") case scopeMarkerNode: return errors.New("unexpected scopeMarkerNode") default: return errors.New("unknown node type") } io.WriteString(w, "\n") for c := n.FirstChild; c != nil; c = c.NextSibling { if err := dumpLevel(w, c, level); err != nil { return err } } return nil } func dump(n *Node) (string, error) { if n == nil || n.FirstChild == nil { return "", nil } var b bytes.Buffer for c := n.FirstChild; c != nil; c = c.NextSibling { if err := dumpLevel(&b, c, 0); err != nil { return "", err } } return b.String(), nil } var testDataDirs = []string{"testdata/webkit/", "testdata/go/"} func TestParser(t *testing.T) { for _, testDataDir := range testDataDirs { testFiles, err := filepath.Glob(testDataDir + "*.dat") if err != nil { t.Fatal(err) } for _, tf := range testFiles { f, err := os.Open(tf) if err != nil { t.Fatal(err) } defer f.Close() r := bufio.NewReader(f) for i := 0; ; i++ { ta, err := readParseTest(r) if err == io.EOF { break } if err != nil { t.Fatal(err) } if parseTestBlacklist[ta.text] { continue } err = testParseCase(ta.text, ta.want, ta.context, ParseOptionEnableScripting(ta.scripting)) if err != nil { t.Errorf("%s test #%d %q, %s", tf, i, ta.text, err) } } } } } // Issue 16318 func TestParserWithoutScripting(t *testing.T) { text := `
` want := `| | |