1 // Copyright 2010 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
21 "golang.org/x/net/html/atom"
24 // readParseTest reads a single test case from r.
25 func readParseTest(r *bufio.Reader) (text, want, context string, err error) {
26 line, err := r.ReadSlice('\n')
28 return "", "", "", err
33 if string(line) != "#data\n" {
34 return "", "", "", fmt.Errorf(`got %q want "#data\n"`, line)
37 line, err = r.ReadSlice('\n')
39 return "", "", "", err
44 b = append(b, line...)
46 text = strings.TrimSuffix(string(b), "\n")
49 // Skip the error list.
50 if string(line) != "#errors\n" {
51 return "", "", "", fmt.Errorf(`got %q want "#errors\n"`, line)
54 line, err = r.ReadSlice('\n')
56 return "", "", "", err
63 if string(line) == "#document-fragment\n" {
64 line, err = r.ReadSlice('\n')
66 return "", "", "", err
68 context = strings.TrimSpace(string(line))
69 line, err = r.ReadSlice('\n')
71 return "", "", "", err
75 // Read the dump of what the parse tree should be.
76 if string(line) != "#document\n" {
77 return "", "", "", fmt.Errorf(`got %q want "#document\n"`, line)
81 line, err = r.ReadSlice('\n')
82 if err != nil && err != io.EOF {
83 return "", "", "", err
85 trimmed := bytes.Trim(line, "| \n")
87 if line[0] == '|' && trimmed[0] == '"' {
90 if trimmed[len(trimmed)-1] == '"' && !(line[0] == '|' && len(trimmed) == 1) {
94 if len(line) == 0 || len(line) == 1 && line[0] == '\n' && !inQuote {
97 b = append(b, line...)
99 return text, string(b), context, nil
102 func dumpIndent(w io.Writer, level int) {
103 io.WriteString(w, "| ")
104 for i := 0; i < level; i++ {
105 io.WriteString(w, " ")
109 type sortedAttributes []Attribute
111 func (a sortedAttributes) Len() int {
115 func (a sortedAttributes) Less(i, j int) bool {
116 if a[i].Namespace != a[j].Namespace {
117 return a[i].Namespace < a[j].Namespace
119 return a[i].Key < a[j].Key
122 func (a sortedAttributes) Swap(i, j int) {
123 a[i], a[j] = a[j], a[i]
126 func dumpLevel(w io.Writer, n *Node, level int) error {
131 return errors.New("unexpected ErrorNode")
133 return errors.New("unexpected DocumentNode")
135 if n.Namespace != "" {
136 fmt.Fprintf(w, "<%s %s>", n.Namespace, n.Data)
138 fmt.Fprintf(w, "<%s>", n.Data)
140 attr := sortedAttributes(n.Attr)
142 for _, a := range attr {
143 io.WriteString(w, "\n")
145 if a.Namespace != "" {
146 fmt.Fprintf(w, `%s %s="%s"`, a.Namespace, a.Key, a.Val)
148 fmt.Fprintf(w, `%s="%s"`, a.Key, a.Val)
151 if n.Namespace == "" && n.DataAtom == atom.Template {
152 io.WriteString(w, "\n")
155 io.WriteString(w, "content")
158 fmt.Fprintf(w, `"%s"`, n.Data)
160 fmt.Fprintf(w, "<!-- %s -->", n.Data)
162 fmt.Fprintf(w, "<!DOCTYPE %s", n.Data)
165 for _, a := range n.Attr {
173 if p != "" || s != "" {
174 fmt.Fprintf(w, ` "%s"`, p)
175 fmt.Fprintf(w, ` "%s"`, s)
178 io.WriteString(w, ">")
179 case scopeMarkerNode:
180 return errors.New("unexpected scopeMarkerNode")
182 return errors.New("unknown node type")
184 io.WriteString(w, "\n")
185 for c := n.FirstChild; c != nil; c = c.NextSibling {
186 if err := dumpLevel(w, c, level); err != nil {
193 func dump(n *Node) (string, error) {
194 if n == nil || n.FirstChild == nil {
198 for c := n.FirstChild; c != nil; c = c.NextSibling {
199 if err := dumpLevel(&b, c, 0); err != nil {
203 return b.String(), nil
206 var testDataDirs = []string{"testdata/webkit/", "testdata/go/"}
208 func TestParser(t *testing.T) {
209 for _, testDataDir := range testDataDirs {
210 testFiles, err := filepath.Glob(testDataDir + "*.dat")
214 for _, tf := range testFiles {
215 f, err := os.Open(tf)
220 r := bufio.NewReader(f)
223 text, want, context, err := readParseTest(r)
231 err = testParseCase(text, want, context)
234 t.Errorf("%s test #%d %q, %s", tf, i, text, err)
241 // testParseCase tests one test case from the test files. If the test does not
242 // pass, it returns an error that explains the failure.
243 // text is the HTML to be parsed, want is a dump of the correct parse tree,
244 // and context is the name of the context node, if any.
245 func testParseCase(text, want, context string) (err error) {
247 if x := recover(); x != nil {
248 switch e := x.(type) {
252 err = fmt.Errorf("%v", e)
259 doc, err = Parse(strings.NewReader(text))
264 contextNode := &Node{
266 DataAtom: atom.Lookup([]byte(context)),
269 nodes, err := ParseFragment(strings.NewReader(text), contextNode)
276 for _, n := range nodes {
281 if err := checkTreeConsistency(doc); err != nil {
285 got, err := dump(doc)
289 // Compare the parsed tree to the #document section.
291 return fmt.Errorf("got vs want:\n----\n%s----\n%s----", got, want)
294 if renderTestBlacklist[text] || context != "" {
298 // Check that rendering and re-parsing results in an identical tree.
301 pw.CloseWithError(Render(pw, doc))
303 doc1, err := Parse(pr)
307 got1, err := dump(doc1)
312 return fmt.Errorf("got vs got1:\n----\n%s----\n%s----", got, got1)
318 // Some test input result in parse trees are not 'well-formed' despite
319 // following the HTML5 recovery algorithms. Rendering and re-parsing such a
320 // tree will not result in an exact clone of that tree. We blacklist such
321 // inputs from the render test.
322 var renderTestBlacklist = map[string]bool{
323 // The second <a> will be reparented to the first <table>'s parent. This
324 // results in an <a> whose parent is an <a>, which is not 'well-formed'.
325 `<a><table><td><a><table></table><a></tr><a></table><b>X</b>C<a>Y`: true,
326 // The same thing with a <p>:
327 `<p><table></p>`: true,
328 // More cases of <a> being reparented:
329 `<a href="blah">aba<table><a href="foo">br<tr><td></td></tr>x</table>aoe`: true,
330 `<a><table><a></table><p><a><div><a>`: true,
331 `<a><table><td><a><table></table><a></tr><a></table><a>`: true,
332 `<template><a><table><a>`: true,
333 // A similar reparenting situation involving <nobr>:
334 `<!DOCTYPE html><body><b><nobr>1<table><nobr></b><i><nobr>2<nobr></i>3`: true,
335 // A <plaintext> element is reparented, putting it before a table.
336 // A <plaintext> element can't have anything after it in HTML.
337 `<table><plaintext><td>`: true,
338 `<!doctype html><table><plaintext></plaintext>`: true,
339 `<!doctype html><table><tbody><plaintext></plaintext>`: true,
340 `<!doctype html><table><tbody><tr><plaintext></plaintext>`: true,
341 // A form inside a table inside a form doesn't work either.
342 `<!doctype html><form><table></form><form></table></form>`: true,
343 // A script that ends at EOF may escape its own closing tag when rendered.
344 `<!doctype html><script><!--<script `: true,
345 `<!doctype html><script><!--<script <`: true,
346 `<!doctype html><script><!--<script <a`: true,
347 `<!doctype html><script><!--<script </`: true,
348 `<!doctype html><script><!--<script </s`: true,
349 `<!doctype html><script><!--<script </script`: true,
350 `<!doctype html><script><!--<script </scripta`: true,
351 `<!doctype html><script><!--<script -`: true,
352 `<!doctype html><script><!--<script -a`: true,
353 `<!doctype html><script><!--<script -<`: true,
354 `<!doctype html><script><!--<script --`: true,
355 `<!doctype html><script><!--<script --a`: true,
356 `<!doctype html><script><!--<script --<`: true,
357 `<script><!--<script `: true,
358 `<script><!--<script <a`: true,
359 `<script><!--<script </script`: true,
360 `<script><!--<script </scripta`: true,
361 `<script><!--<script -`: true,
362 `<script><!--<script -a`: true,
363 `<script><!--<script --`: true,
364 `<script><!--<script --a`: true,
365 `<script><!--<script <`: true,
366 `<script><!--<script </`: true,
367 `<script><!--<script </s`: true,
368 // Reconstructing the active formatting elements results in a <plaintext>
369 // element that contains an <a> element.
370 `<!doctype html><p><a><plaintext>b`: true,
371 `<table><math><select><mi><select></table>`: true,
374 func TestNodeConsistency(t *testing.T) {
375 // inconsistentNode is a Node whose DataAtom and Data do not agree.
376 inconsistentNode := &Node{
378 DataAtom: atom.Frameset,
381 _, err := ParseFragment(strings.NewReader("<p>hello</p>"), inconsistentNode)
383 t.Errorf("got nil error, want non-nil")
387 func TestParseFragmentWithNilContext(t *testing.T) {
388 // This shouldn't panic.
389 ParseFragment(strings.NewReader("<p>hello</p>"), nil)
392 func BenchmarkParser(b *testing.B) {
393 buf, err := ioutil.ReadFile("testdata/go1.html")
395 b.Fatalf("could not read testdata/go1.html: %v", err)
397 b.SetBytes(int64(len(buf)))
401 for i := 0; i < b.N; i++ {
402 Parse(bytes.NewBuffer(buf))