1 # -*- test-case-name: twisted.web.test.test_xml -*-
2 # Copyright (c) Twisted Matrix Laboratories.
3 # See LICENSE for details.
6 Some fairly inadequate testcases for Twisted XML support.
9 from twisted.trial.unittest import TestCase
10 from twisted.web import sux
11 from twisted.web import microdom
12 from twisted.web import domhelpers
15 class Sux0r(sux.XMLParser):
19 def getTagStarts(self):
20 return [token for token in self.tokens if token[0] == 'start']
22 def gotTagStart(self, name, attrs):
23 self.tokens.append(("start", name, attrs))
25 def gotText(self, text):
26 self.tokens.append(("text", text))
28 class SUXTest(TestCase):
31 s = "<bork><bork><bork>"
35 self.assertEqual(len(ms.getTagStarts()),3)
38 class MicroDOMTest(TestCase):
40 def test_leadingTextDropping(self):
42 Make sure that if there's no top-level node lenient-mode won't
43 drop leading text that's outside of any elements.
45 s = "Hi orders! <br>Well. <br>"
46 d = microdom.parseString(s, beExtremelyLenient=True)
47 self.assertEqual(d.firstChild().toxml(),
48 '<html>Hi orders! <br />Well. <br /></html>')
50 def test_trailingTextDropping(self):
52 Ensure that no *trailing* text in a mal-formed
53 no-top-level-element document(s) will not be dropped.
56 d = microdom.parseString(s, beExtremelyLenient=True)
57 self.assertEqual(d.firstChild().toxml(),
58 '<html><br />Hi orders!</html>')
61 def test_noTags(self):
63 A string with nothing that looks like a tag at all should just
64 be parsed as body text.
67 d = microdom.parseString(s, beExtremelyLenient=True)
68 self.assertEqual(d.firstChild().toxml(),
69 "<html>Hi orders!</html>")
72 def test_surroundingCrap(self):
74 If a document is surrounded by non-xml text, the text should
78 d = microdom.parseString(s, beExtremelyLenient=True)
79 self.assertEqual(d.firstChild().toxml(),
80 "<html>Hi<br /> orders!</html>")
83 def testCaseSensitiveSoonCloser(self):
87 <A HREF="http://www.apache.org/"><IMG SRC="/icons/apache_pb.gif"></A>
91 This is an insane set of text nodes that should NOT be gathered under
96 d = microdom.parseString(s, beExtremelyLenient=1)
97 l = domhelpers.findNodesNamed(d.documentElement, 'a')
98 n = domhelpers.gatherTextNodes(l[0],1).replace(' ',' ')
99 self.assertEqual(n.find('insane'), -1)
102 def test_lenientParenting(self):
104 Test that C{parentNode} attributes are set to meaningful values when
105 we are parsing HTML that lacks a root node.
107 # Spare the rod, ruin the child.
109 d = microdom.parseString(s, beExtremelyLenient=1)
110 self.assertIdentical(d.documentElement,
111 d.documentElement.firstChild().parentNode)
114 def test_lenientParentSingle(self):
116 Test that the C{parentNode} attribute is set to a meaningful value
117 when we parse an HTML document that has a non-Element root node.
120 d = microdom.parseString(s, beExtremelyLenient=1)
121 self.assertIdentical(d.documentElement,
122 d.documentElement.firstChild().parentNode)
125 def testUnEntities(self):
128 This HTML goes between Stupid <=CrAzY!=> Dumb.
131 d = microdom.parseString(s, beExtremelyLenient=1)
132 n = domhelpers.gatherTextNodes(d)
133 self.assertNotEquals(n.find('>'), -1)
135 def testEmptyError(self):
136 self.assertRaises(sux.ParseError, microdom.parseString, "")
138 def testTameDocument(self):
150 d = microdom.parseString(s)
152 domhelpers.gatherTextNodes(d.documentElement).strip() ,'test')
154 def testAwfulTagSoup(self):
157 <head><title> I send you this message to have your advice!!!!</titl e
160 <body bgcolor alink hlink vlink>
162 <h1><BLINK>SALE</blINK> TWENTY MILLION EMAILS & FUR COAT NOW
163 FREE WITH `ENLARGER'</h1>
165 YES THIS WONDERFUL AWFER IS NOW HERER!!!
167 <script LANGUAGE="javascript">
168 function give_answers() {
172 </script><a href=/foo.com/lalal name=foo>lalal</a>
176 d = microdom.parseString(s, beExtremelyLenient=1)
177 l = domhelpers.findNodesNamed(d.documentElement, 'blink')
178 self.assertEqual(len(l), 1)
180 def testScriptLeniency(self):
182 <script>(foo < bar) and (bar > foo)</script>
183 <script language="javascript">foo </scrip bar </script>
185 <script src="foo">baz</script>
186 <script /><script></script>
188 d = microdom.parseString(s, beExtremelyLenient=1)
189 self.assertEqual(d.firstChild().firstChild().firstChild().data,
190 "(foo < bar) and (bar > foo)")
192 d.firstChild().getElementsByTagName("script")[1].firstChild().data,
195 def testScriptLeniencyIntelligence(self):
196 # if there is comment or CDATA in script, the autoquoting in bEL mode
198 s = """<script><!-- lalal --></script>"""
200 microdom.parseString(s, beExtremelyLenient=1).firstChild().toxml(), s)
201 s = """<script><![CDATA[lalal]]></script>"""
203 microdom.parseString(s, beExtremelyLenient=1).firstChild().toxml(), s)
204 s = """<script> // <![CDATA[
208 microdom.parseString(s, beExtremelyLenient=1).firstChild().toxml(), s)
210 def testPreserveCase(self):
211 s = '<eNcApSuLaTe><sUxor></sUxor><bOrk><w00T>TeXt</W00t></BoRk></EnCaPsUlAtE>'
212 s2 = s.lower().replace('text', 'TeXt')
213 # these are the only two option permutations that *can* parse the above
214 d = microdom.parseString(s, caseInsensitive=1, preserveCase=1)
215 d2 = microdom.parseString(s, caseInsensitive=1, preserveCase=0)
216 # caseInsensitive=0 preserveCase=0 is not valid, it's converted to
217 # caseInsensitive=0 preserveCase=1
218 d3 = microdom.parseString(s2, caseInsensitive=0, preserveCase=1)
219 d4 = microdom.parseString(s2, caseInsensitive=1, preserveCase=0)
220 d5 = microdom.parseString(s2, caseInsensitive=1, preserveCase=1)
221 # this is slightly contrived, toxml() doesn't need to be identical
222 # for the documents to be equivalent (i.e. <b></b> to <b/>),
223 # however this assertion tests preserving case for start and
224 # end tags while still matching stuff like <bOrk></BoRk>
225 self.assertEqual(d.documentElement.toxml(), s)
226 self.assert_(d.isEqualToDocument(d2), "%r != %r" % (d.toxml(), d2.toxml()))
227 self.assert_(d2.isEqualToDocument(d3), "%r != %r" % (d2.toxml(), d3.toxml()))
228 # caseInsensitive=0 on the left, NOT perserveCase=1 on the right
229 ## XXX THIS TEST IS TURNED OFF UNTIL SOMEONE WHO CARES ABOUT FIXING IT DOES
230 #self.failIf(d3.isEqualToDocument(d2), "%r == %r" % (d3.toxml(), d2.toxml()))
231 self.assert_(d3.isEqualToDocument(d4), "%r != %r" % (d3.toxml(), d4.toxml()))
232 self.assert_(d4.isEqualToDocument(d5), "%r != %r" % (d4.toxml(), d5.toxml()))
234 def testDifferentQuotes(self):
235 s = '<test a="a" b=\'b\' />'
236 d = microdom.parseString(s)
237 e = d.documentElement
238 self.assertEqual(e.getAttribute('a'), 'a')
239 self.assertEqual(e.getAttribute('b'), 'b')
241 def testLinebreaks(self):
242 s = '<test \na="a"\n\tb="#b" />'
243 d = microdom.parseString(s)
244 e = d.documentElement
245 self.assertEqual(e.getAttribute('a'), 'a')
246 self.assertEqual(e.getAttribute('b'), '#b')
248 def testMismatchedTags(self):
249 for s in '<test>', '<test> </tset>', '</test>':
250 self.assertRaises(microdom.MismatchedTags, microdom.parseString, s)
252 def testComment(self):
253 s = "<bar><!--<foo />--></bar>"
254 d = microdom.parseString(s)
255 e = d.documentElement
256 self.assertEqual(e.nodeName, "bar")
258 self.assert_(isinstance(c, microdom.Comment))
259 self.assertEqual(c.value, "<foo />")
261 self.assert_(c is not c2)
262 self.assertEqual(c2.toxml(), "<!--<foo />-->")
265 d = microdom.parseString("<bar>xxxx</bar>").documentElement
266 text = d.childNodes[0]
267 self.assert_(isinstance(text, microdom.Text))
268 self.assertEqual(text.value, "xxxx")
269 clone = text.cloneNode()
270 self.assert_(clone is not text)
271 self.assertEqual(clone.toxml(), "xxxx")
273 def testEntities(self):
274 nodes = microdom.parseString("<b>&AB;</b>").documentElement.childNodes
275 self.assertEqual(len(nodes), 2)
276 self.assertEqual(nodes[0].data, "&")
277 self.assertEqual(nodes[1].data, "AB;")
278 self.assertEqual(nodes[0].cloneNode().toxml(), "&")
280 self.assert_(isinstance(n, microdom.EntityReference))
283 s = '<x><![CDATA[</x>\r\n & foo]]></x>'
284 cdata = microdom.parseString(s).documentElement.childNodes[0]
285 self.assert_(isinstance(cdata, microdom.CDATASection))
286 self.assertEqual(cdata.data, "</x>\r\n & foo")
287 self.assertEqual(cdata.cloneNode().toxml(), "<![CDATA[</x>\r\n & foo]]>")
289 def testSingletons(self):
290 s = "<foo><b/><b /><b\n/></foo>"
291 s2 = "<foo><b/><b/><b/></foo>"
292 nodes = microdom.parseString(s).documentElement.childNodes
293 nodes2 = microdom.parseString(s2).documentElement.childNodes
294 self.assertEqual(len(nodes), 3)
295 for (n, n2) in zip(nodes, nodes2):
296 self.assert_(isinstance(n, microdom.Element))
297 self.assertEqual(n.nodeName, "b")
298 self.assert_(n.isEqualToNode(n2))
300 def testAttributes(self):
302 node = microdom.parseString(s).documentElement
304 self.assertEqual(node.getAttribute("a"), "b")
305 self.assertEqual(node.getAttribute("c"), None)
306 self.assert_(node.hasAttribute("a"))
307 self.assert_(not node.hasAttribute("c"))
308 a = node.getAttributeNode("a")
309 self.assertEqual(a.value, "b")
311 node.setAttribute("foo", "bar")
312 self.assertEqual(node.getAttribute("foo"), "bar")
314 def testChildren(self):
315 s = "<foo><bar /><baz /><bax>foo</bax></foo>"
316 d = microdom.parseString(s).documentElement
317 self.assertEqual([n.nodeName for n in d.childNodes], ["bar", "baz", "bax"])
318 self.assertEqual(d.lastChild().nodeName, "bax")
319 self.assertEqual(d.firstChild().nodeName, "bar")
320 self.assert_(d.hasChildNodes())
321 self.assert_(not d.firstChild().hasChildNodes())
323 def testMutate(self):
325 s1 = '<foo a="b"><bar/><foo/></foo>'
326 s2 = '<foo a="b">foo</foo>'
327 d = microdom.parseString(s).documentElement
328 d1 = microdom.parseString(s1).documentElement
329 d2 = microdom.parseString(s2).documentElement
331 d.appendChild(d.cloneNode())
332 d.setAttribute("a", "b")
333 child = d.childNodes[0]
334 self.assertEqual(child.getAttribute("a"), None)
335 self.assertEqual(child.nodeName, "foo")
337 d.insertBefore(microdom.Element("bar"), child)
338 self.assertEqual(d.childNodes[0].nodeName, "bar")
339 self.assertEqual(d.childNodes[1], child)
340 for n in d.childNodes:
341 self.assertEqual(n.parentNode, d)
342 self.assert_(d.isEqualToNode(d1))
345 self.assertEqual(len(d.childNodes), 1)
346 self.assertEqual(d.childNodes[0].nodeName, "bar")
348 t = microdom.Text("foo")
349 d.replaceChild(t, d.firstChild())
350 self.assertEqual(d.firstChild(), t)
351 self.assert_(d.isEqualToNode(d2))
354 def test_replaceNonChild(self):
356 L{Node.replaceChild} raises L{ValueError} if the node given to be
357 replaced is not a child of the node C{replaceChild} is called on.
359 parent = microdom.parseString('<foo />')
360 orphan = microdom.parseString('<bar />')
361 replacement = microdom.parseString('<baz />')
364 ValueError, parent.replaceChild, replacement, orphan)
367 def testSearch(self):
368 s = "<foo><bar id='me' /><baz><foo /></baz></foo>"
369 s2 = "<fOo><bAr id='me' /><bAz><fOO /></bAz></fOo>"
370 d = microdom.parseString(s)
371 d2 = microdom.parseString(s2, caseInsensitive=0, preserveCase=1)
372 d3 = microdom.parseString(s2, caseInsensitive=1, preserveCase=1)
374 root = d.documentElement
375 self.assertEqual(root.firstChild(), d.getElementById('me'))
376 self.assertEqual(d.getElementsByTagName("foo"),
377 [root, root.lastChild().firstChild()])
379 root = d2.documentElement
380 self.assertEqual(root.firstChild(), d2.getElementById('me'))
381 self.assertEqual(d2.getElementsByTagName('fOo'), [root])
382 self.assertEqual(d2.getElementsByTagName('fOO'),
383 [root.lastChild().firstChild()])
384 self.assertEqual(d2.getElementsByTagName('foo'), [])
386 root = d3.documentElement
387 self.assertEqual(root.firstChild(), d3.getElementById('me'))
388 self.assertEqual(d3.getElementsByTagName('FOO'),
389 [root, root.lastChild().firstChild()])
390 self.assertEqual(d3.getElementsByTagName('fOo'),
391 [root, root.lastChild().firstChild()])
393 def testDoctype(self):
394 s = ('<?xml version="1.0"?>'
395 '<!DOCTYPE foo PUBLIC "baz" "http://www.example.com/example.dtd">'
398 d = microdom.parseString(s)
399 d2 = microdom.parseString(s2)
400 self.assertEqual(d.doctype,
401 'foo PUBLIC "baz" "http://www.example.com/example.dtd"')
402 self.assertEqual(d.toxml(), s)
403 self.failIf(d.isEqualToDocument(d2))
404 self.failUnless(d.documentElement.isEqualToNode(d2.documentElement))
406 samples = [("<img/>", "<img />"),
407 ("<foo A='b'>x</foo>", '<foo A="b">x</foo>'),
408 ("<foo><BAR /></foo>", "<foo><BAR></BAR></foo>"),
409 ("<foo>hello there & yoyoy</foo>",
410 "<foo>hello there & yoyoy</foo>"),
413 def testOutput(self):
414 for s, out in self.samples:
415 d = microdom.parseString(s, caseInsensitive=0)
416 d2 = microdom.parseString(out, caseInsensitive=0)
417 testOut = d.documentElement.toxml()
418 self.assertEqual(out, testOut)
419 self.assert_(d.isEqualToDocument(d2))
421 def testErrors(self):
422 for s in ["<foo>&am</foo>", "<foo", "<f>&</f>", "<() />"]:
423 self.assertRaises(Exception, microdom.parseString, s)
425 def testCaseInsensitive(self):
426 s = "<foo a='b'><BAx>x</bax></FOO>"
427 s2 = '<foo a="b"><bax>x</bax></foo>'
428 s3 = "<FOO a='b'><BAx>x</BAx></FOO>"
429 s4 = "<foo A='b'>x</foo>"
430 d = microdom.parseString(s)
431 d2 = microdom.parseString(s2)
432 d3 = microdom.parseString(s3, caseInsensitive=1)
433 d4 = microdom.parseString(s4, caseInsensitive=1, preserveCase=1)
434 d5 = microdom.parseString(s4, caseInsensitive=1, preserveCase=0)
435 d6 = microdom.parseString(s4, caseInsensitive=0, preserveCase=0)
436 out = microdom.parseString(s).documentElement.toxml()
437 self.assertRaises(microdom.MismatchedTags, microdom.parseString,
438 s, caseInsensitive=0)
439 self.assertEqual(out, s2)
440 self.failUnless(d.isEqualToDocument(d2))
441 self.failUnless(d.isEqualToDocument(d3))
442 self.failUnless(d4.documentElement.hasAttribute('a'))
443 self.failIf(d6.documentElement.hasAttribute('a'))
444 self.assertEqual(d4.documentElement.toxml(), '<foo A="b">x</foo>')
445 self.assertEqual(d5.documentElement.toxml(), '<foo a="b">x</foo>')
446 def testEatingWhitespace(self):
449 d = microdom.parseString(s)
450 self.failUnless(not d.documentElement.hasChildNodes(),
451 d.documentElement.childNodes)
452 self.failUnless(d.isEqualToDocument(microdom.parseString('<hello></hello>')))
454 def testLenientAmpersand(self):
455 prefix = "<?xml version='1.0'?>"
456 # we use <pre> so space will be preserved
457 for i, o in [("&", "&"),
460 ("&hello monkey", "&hello monkey")]:
461 d = microdom.parseString("%s<pre>%s</pre>"
462 % (prefix, i), beExtremelyLenient=1)
463 self.assertEqual(d.documentElement.toxml(), "<pre>%s</pre>" % o)
464 # non-space preserving
465 d = microdom.parseString("<t>hello & there</t>", beExtremelyLenient=1)
466 self.assertEqual(d.documentElement.toxml(), "<t>hello & there</t>")
468 def testInsensitiveLenient(self):
470 d = microdom.parseString(
471 "<?xml version='1.0'?><bar><xA><y>c</Xa> <foo></bar>",
472 beExtremelyLenient=1)
473 self.assertEqual(d.documentElement.firstChild().toxml(), "<xa><y>c</y></xa>")
475 def testLaterCloserSimple(self):
476 s = "<ul><li>foo<li>bar<li>baz</ul>"
477 d = microdom.parseString(s, beExtremelyLenient=1)
478 expected = "<ul><li>foo</li><li>bar</li><li>baz</li></ul>"
479 actual = d.documentElement.toxml()
480 self.assertEqual(expected, actual)
482 def testLaterCloserCaseInsensitive(self):
483 s = "<DL><p><DT>foo<DD>bar</DL>"
484 d = microdom.parseString(s, beExtremelyLenient=1)
485 expected = "<dl><p></p><dt>foo</dt><dd>bar</dd></dl>"
486 actual = d.documentElement.toxml()
487 self.assertEqual(expected, actual)
489 def testLaterCloserTable(self):
491 "<tr><th>name<th>value<th>comment"
492 "<tr><th>this<td>tag<td>soup"
493 "<tr><th>must<td>be<td>handled"
495 expected = ("<table>"
496 "<tr><th>name</th><th>value</th><th>comment</th></tr>"
497 "<tr><th>this</th><td>tag</td><td>soup</td></tr>"
498 "<tr><th>must</th><td>be</td><td>handled</td></tr>"
500 d = microdom.parseString(s, beExtremelyLenient=1)
501 actual = d.documentElement.toxml()
502 self.assertEqual(expected, actual)
503 testLaterCloserTable.todo = "Table parsing needs to be fixed."
505 def testLaterCloserDL(self):
507 "<dt>word<dd>definition"
508 "<dt>word<dt>word<dd>definition<dd>definition"
511 "<dt>word</dt><dd>definition</dd>"
512 "<dt>word</dt><dt>word</dt><dd>definition</dd><dd>definition</dd>"
514 d = microdom.parseString(s, beExtremelyLenient=1)
515 actual = d.documentElement.toxml()
516 self.assertEqual(expected, actual)
518 def testLaterCloserDL2(self):
520 "<dt>word<dd>definition<p>more definition"
524 "<dt>word</dt><dd>definition<p>more definition</p></dd>"
527 d = microdom.parseString(s, beExtremelyLenient=1)
528 actual = d.documentElement.toxml()
529 self.assertEqual(expected, actual)
531 testLaterCloserDL2.todo = "unclosed <p> messes it up."
533 def testUnicodeTolerance(self):
535 s = '<foo><bar><baz /></bar></foo>'
536 j =(u'<?xml version="1.0" encoding="UCS-2" ?>\r\n<JAPANESE>\r\n'
537 u'<TITLE>\u5c02\u9580\u5bb6\u30ea\u30b9\u30c8 </TITLE></JAPANESE>')
538 j2=('\xff\xfe<\x00?\x00x\x00m\x00l\x00 \x00v\x00e\x00r\x00s\x00i\x00o'
539 '\x00n\x00=\x00"\x001\x00.\x000\x00"\x00 \x00e\x00n\x00c\x00o\x00d'
540 '\x00i\x00n\x00g\x00=\x00"\x00U\x00C\x00S\x00-\x002\x00"\x00 \x00?'
541 '\x00>\x00\r\x00\n\x00<\x00J\x00A\x00P\x00A\x00N\x00E\x00S\x00E'
542 '\x00>\x00\r\x00\n\x00<\x00T\x00I\x00T\x00L\x00E\x00>\x00\x02\\'
543 '\x80\x95\xb6[\xea0\xb90\xc80 \x00<\x00/\x00T\x00I\x00T\x00L\x00E'
544 '\x00>\x00<\x00/\x00J\x00A\x00P\x00A\x00N\x00E\x00S\x00E\x00>\x00')
546 fmt = str(len(s) / 2) + 'H'
547 return struct.pack('<' + fmt, *struct.unpack('>' + fmt, s))
548 urd = microdom.parseString(reverseBytes(s.encode('UTF-16')))
549 ud = microdom.parseString(s.encode('UTF-16'))
550 sd = microdom.parseString(s)
551 self.assert_(ud.isEqualToDocument(sd))
552 self.assert_(ud.isEqualToDocument(urd))
553 ud = microdom.parseString(j)
554 urd = microdom.parseString(reverseBytes(j2))
555 sd = microdom.parseString(j2)
556 self.assert_(ud.isEqualToDocument(sd))
557 self.assert_(ud.isEqualToDocument(urd))
559 # test that raw text still gets encoded
560 # test that comments get encoded
561 j3=microdom.parseString(u'<foo/>')
562 hdr='<?xml version="1.0"?>'
563 div=microdom.lmx().text(u'\u221a', raw=1).node
564 de=j3.documentElement
566 de.appendChild(j3.createComment(u'\u221a'))
567 self.assertEqual(j3.toxml(), hdr+
568 u'<foo><div>\u221a</div><!--\u221a--></foo>'.encode('utf8'))
570 def testNamedChildren(self):
571 tests = {"<foo><bar /><bar unf='1' /><bar>asdfadsf</bar>"
573 '<foo>asdf</foo>' : 0,
574 '<foo><bar><bar></bar></bar></foo>' : 1,
576 for t in tests.keys():
577 node = microdom.parseString(t).documentElement
578 result = domhelpers.namedChildren(node, 'bar')
579 self.assertEqual(len(result), tests[t])
581 self.assert_(hasattr(result[0], 'tagName'))
583 def testCloneNode(self):
584 s = '<foo a="b"><bax>x</bax></foo>'
585 node = microdom.parseString(s).documentElement
586 clone = node.cloneNode(deep=1)
587 self.failIfEquals(node, clone)
588 self.assertEqual(len(node.childNodes), len(clone.childNodes))
589 c1, c2 = node.firstChild(), clone.firstChild()
590 self.failIfEquals(c1, c2)
591 self.assertEqual(len(c1.childNodes), len(c2.childNodes))
592 self.failIfEquals(c1.firstChild(), c2.firstChild())
593 self.assertEqual(s, clone.toxml())
594 self.assertEqual(node.namespace, clone.namespace)
596 def testCloneDocument(self):
597 s = ('<?xml version="1.0"?>'
598 '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'
599 '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><foo></foo>')
601 node = microdom.parseString(s)
602 clone = node.cloneNode(deep=1)
603 self.failIfEquals(node, clone)
604 self.assertEqual(len(node.childNodes), len(clone.childNodes))
605 self.assertEqual(s, clone.toxml())
607 self.failUnless(clone.isEqualToDocument(node))
608 self.failUnless(node.isEqualToDocument(clone))
612 n = microdom.Element("p")
613 lmx = microdom.lmx(n)
620 s = '<p>foo<b a="c"><foo z="foo"></foo><foo></foo><bar c="y"></bar></b></p>'
621 self.assertEqual(s, n.toxml())
624 n = microdom.Element("p")
625 d = {n : 1} # will fail if Element is unhashable
627 def testEscaping(self):
629 raw = "&'some \"stuff\"', <what up?>"
630 cooked = "&'some "stuff"', <what up?>"
631 esc1 = microdom.escape(raw)
632 self.assertEqual(esc1, cooked)
633 self.assertEqual(microdom.unescape(esc1), raw)
635 def testNamespaces(self):
639 <y q="1" x:q="2" y:q="3" />
640 <y:y xml:space="1">here is some space </y:y>
645 d = microdom.parseString(s)
646 # at least make sure it doesn't traceback
648 self.assertEqual(d.documentElement.namespace,
650 self.assertEqual(d.documentElement.getElementsByTagName("y")[0].namespace,
653 d.documentElement.getElementsByTagName("y")[1].getAttributeNS('base','q'),
656 d2 = microdom.parseString(s2)
657 self.assertEqual(d2.documentElement.namespace,
659 self.assertEqual(d2.documentElement.getElementsByTagName("y")[0].namespace,
662 d2.documentElement.getElementsByTagName("y")[1].getAttributeNS('base','q'),
665 def testNamespaceDelete(self):
667 Test that C{toxml} can support xml structures that remove namespaces.
669 s1 = ('<?xml version="1.0"?><html xmlns="http://www.w3.org/TR/REC-html40">'
670 '<body xmlns=""></body></html>')
671 s2 = microdom.parseString(s1).toxml()
672 self.assertEqual(s1, s2)
674 def testNamespaceInheritance(self):
676 Check that unspecified namespace is a thing separate from undefined
677 namespace. This test added after discovering some weirdness in Lore.
679 # will only work if childNodes is mutated. not sure why.
680 child = microdom.Element('ol')
681 parent = microdom.Element('div', namespace='http://www.w3.org/1999/xhtml')
682 parent.childNodes = [child]
683 self.assertEqual(parent.toxml(),
684 '<div xmlns="http://www.w3.org/1999/xhtml"><ol></ol></div>')
686 def test_prefixedTags(self):
688 XML elements with a prefixed name as per upper level tag definition
689 have a start-tag of C{"<prefix:tag>"} and an end-tag of
692 Refer to U{http://www.w3.org/TR/xml-names/#ns-using} for details.
694 outerNamespace = "http://example.com/outer"
695 innerNamespace = "http://example.com/inner"
697 document = microdom.Document()
698 # Create the root in one namespace. Microdom will probably make this
699 # the default namespace.
700 root = document.createElement("root", namespace=outerNamespace)
702 # Give the root some prefixes to use.
703 root.addPrefixes({innerNamespace: "inner"})
705 # Append a child to the root from the namespace that prefix is bound
707 tag = document.createElement("tag", namespace=innerNamespace)
709 # Give that tag a child too. This way we test rendering of tags with
710 # children and without children.
711 child = document.createElement("child", namespace=innerNamespace)
713 tag.appendChild(child)
714 root.appendChild(tag)
715 document.appendChild(root)
717 # ok, the xml should appear like this
719 '<?xml version="1.0"?>'
720 '<root xmlns="http://example.com/outer" '
721 'xmlns:inner="http://example.com/inner">'
722 '<inner:tag><inner:child></inner:child></inner:tag>'
725 xmlOut = document.toxml()
726 self.assertEqual(xmlOut, xmlOk)
729 def test_prefixPropagation(self):
731 Children of prefixed tags respect the default namespace at the point
732 where they are rendered. Specifically, they are not influenced by the
733 prefix of their parent as that prefix has no bearing on them.
735 See U{http://www.w3.org/TR/xml-names/#scoping} for details.
737 To further clarify the matter, the following::
739 <root xmlns="http://example.com/ns/test">
740 <mytag xmlns="http://example.com/ns/mytags">
741 <mysubtag xmlns="http://example.com/ns/mytags">
742 <element xmlns="http://example.com/ns/test"></element>
747 Should become this after all the namespace declarations have been
750 <root xmlns="http://example.com/ns/test"
751 xmlns:mytags="http://example.com/ns/mytags">
759 outerNamespace = "http://example.com/outer"
760 innerNamespace = "http://example.com/inner"
762 document = microdom.Document()
763 # creates a root element
764 root = document.createElement("root", namespace=outerNamespace)
765 document.appendChild(root)
767 # Create a child with a specific namespace with a prefix bound to it.
768 root.addPrefixes({innerNamespace: "inner"})
769 mytag = document.createElement("mytag",namespace=innerNamespace)
770 root.appendChild(mytag)
772 # Create a child of that which has the outer namespace.
773 mysubtag = document.createElement("mysubtag", namespace=outerNamespace)
774 mytag.appendChild(mysubtag)
777 '<?xml version="1.0"?>'
778 '<root xmlns="http://example.com/outer" '
779 'xmlns:inner="http://example.com/inner">'
781 '<mysubtag></mysubtag>'
785 xmlOut = document.toxml()
786 self.assertEqual(xmlOut, xmlOk)
790 class TestBrokenHTML(TestCase):
792 Tests for when microdom encounters very bad HTML and C{beExtremelyLenient}
793 is enabled. These tests are inspired by some HTML generated in by a mailer,
794 which breaks up very long lines by splitting them with '!\n '. The expected
795 behaviour is loosely modelled on the way Firefox treats very bad HTML.
798 def checkParsed(self, input, expected, beExtremelyLenient=1):
800 Check that C{input}, when parsed, produces a DOM where the XML
801 of the document element is equal to C{expected}.
803 output = microdom.parseString(input,
804 beExtremelyLenient=beExtremelyLenient)
805 self.assertEqual(output.documentElement.toxml(), expected)
808 def test_brokenAttributeName(self):
810 Check that microdom does its best to handle broken attribute names.
811 The important thing is that it doesn't raise an exception.
813 input = '<body><h1><div al!\n ign="center">Foo</div></h1></body>'
814 expected = ('<body><h1><div ign="center" al="True">'
815 'Foo</div></h1></body>')
816 self.checkParsed(input, expected)
819 def test_brokenAttributeValue(self):
821 Check that microdom encompasses broken attribute values.
823 input = '<body><h1><div align="cen!\n ter">Foo</div></h1></body>'
824 expected = '<body><h1><div align="cen!\n ter">Foo</div></h1></body>'
825 self.checkParsed(input, expected)
828 def test_brokenOpeningTag(self):
830 Check that microdom does its best to handle broken opening tags.
831 The important thing is that it doesn't raise an exception.
833 input = '<body><h1><sp!\n an>Hello World!</span></h1></body>'
834 expected = '<body><h1><sp an="True">Hello World!</sp></h1></body>'
835 self.checkParsed(input, expected)
838 def test_brokenSelfClosingTag(self):
840 Check that microdom does its best to handle broken self-closing tags
841 The important thing is that it doesn't raise an exception.
843 self.checkParsed('<body><span /!\n></body>',
844 '<body><span></span></body>')
845 self.checkParsed('<span!\n />', '<span></span>')
848 def test_brokenClosingTag(self):
850 Check that microdom does its best to handle broken closing tags.
851 The important thing is that it doesn't raise an exception.
853 input = '<body><h1><span>Hello World!</sp!\nan></h1></body>'
854 expected = '<body><h1><span>Hello World!</span></h1></body>'
855 self.checkParsed(input, expected)
856 input = '<body><h1><span>Hello World!</!\nspan></h1></body>'
857 self.checkParsed(input, expected)
858 input = '<body><h1><span>Hello World!</span!\n></h1></body>'
859 self.checkParsed(input, expected)
860 input = '<body><h1><span>Hello World!<!\n/span></h1></body>'
861 expected = '<body><h1><span>Hello World!<!></!></span></h1></body>'
862 self.checkParsed(input, expected)
867 class NodeTests(TestCase):
871 def test_isNodeEqualTo(self):
873 L{Node.isEqualToNode} returns C{True} if and only if passed a L{Node}
874 with the same children.
876 # A node is equal to itself
877 node = microdom.Node(object())
878 self.assertTrue(node.isEqualToNode(node))
879 another = microdom.Node(object())
880 # Two nodes with no children are equal
881 self.assertTrue(node.isEqualToNode(another))
882 node.appendChild(microdom.Node(object()))
883 # A node with no children is not equal to a node with a child
884 self.assertFalse(node.isEqualToNode(another))
885 another.appendChild(microdom.Node(object()))
886 # A node with a child and no grandchildren is equal to another node
887 # with a child and no grandchildren.
888 self.assertTrue(node.isEqualToNode(another))
889 # A node with a child and a grandchild is not equal to another node
890 # with a child and no grandchildren.
891 node.firstChild().appendChild(microdom.Node(object()))
892 self.assertFalse(node.isEqualToNode(another))
893 # A node with a child and a grandchild is equal to another node with a
894 # child and a grandchild.
895 another.firstChild().appendChild(microdom.Node(object()))
896 self.assertTrue(node.isEqualToNode(another))
898 def test_validChildInstance(self):
900 Children of L{Node} instances must also be L{Node} instances.
902 node = microdom.Node()
903 child = microdom.Node()
904 # Node.appendChild() only accepts Node instances.
905 node.appendChild(child)
906 self.assertRaises(TypeError, node.appendChild, None)
907 # Node.insertBefore() only accepts Node instances.
908 self.assertRaises(TypeError, node.insertBefore, child, None)
909 self.assertRaises(TypeError, node.insertBefore, None, child)
910 self.assertRaises(TypeError, node.insertBefore, None, None)
911 # Node.removeChild() only accepts Node instances.
912 node.removeChild(child)
913 self.assertRaises(TypeError, node.removeChild, None)
914 # Node.replaceChild() only accepts Node instances.
915 self.assertRaises(TypeError, node.replaceChild, child, None)
916 self.assertRaises(TypeError, node.replaceChild, None, child)
917 self.assertRaises(TypeError, node.replaceChild, None, None)
920 class DocumentTests(TestCase):
922 Tests for L{Document}.
924 doctype = 'foo PUBLIC "baz" "http://www.example.com/example.dtd"'
926 def test_isEqualToNode(self):
928 L{Document.isEqualToNode} returns C{True} if and only if passed a
929 L{Document} with the same C{doctype} and C{documentElement}.
931 # A document is equal to itself
932 document = microdom.Document()
933 self.assertTrue(document.isEqualToNode(document))
934 # A document without a doctype or documentElement is equal to another
935 # document without a doctype or documentElement.
936 another = microdom.Document()
937 self.assertTrue(document.isEqualToNode(another))
938 # A document with a doctype is not equal to a document without a
940 document.doctype = self.doctype
941 self.assertFalse(document.isEqualToNode(another))
942 # Two documents with the same doctype are equal
943 another.doctype = self.doctype
944 self.assertTrue(document.isEqualToNode(another))
945 # A document with a documentElement is not equal to a document without
947 document.appendChild(microdom.Node(object()))
948 self.assertFalse(document.isEqualToNode(another))
949 # Two documents with equal documentElements are equal.
950 another.appendChild(microdom.Node(object()))
951 self.assertTrue(document.isEqualToNode(another))
952 # Two documents with documentElements which are not equal are not
954 document.documentElement.appendChild(microdom.Node(object()))
955 self.assertFalse(document.isEqualToNode(another))
958 def test_childRestriction(self):
960 L{Document.appendChild} raises L{ValueError} if the document already
963 document = microdom.Document()
964 child = microdom.Node()
965 another = microdom.Node()
966 document.appendChild(child)
967 self.assertRaises(ValueError, document.appendChild, another)
971 class EntityReferenceTests(TestCase):
973 Tests for L{EntityReference}.
975 def test_isEqualToNode(self):
977 L{EntityReference.isEqualToNode} returns C{True} if and only if passed
978 a L{EntityReference} with the same C{eref}.
981 microdom.EntityReference('quot').isEqualToNode(
982 microdom.EntityReference('quot')))
984 microdom.EntityReference('quot').isEqualToNode(
985 microdom.EntityReference('apos')))
989 class CharacterDataTests(TestCase):
991 Tests for L{CharacterData}.
993 def test_isEqualToNode(self):
995 L{CharacterData.isEqualToNode} returns C{True} if and only if passed a
996 L{CharacterData} with the same value.
999 microdom.CharacterData('foo').isEqualToNode(
1000 microdom.CharacterData('foo')))
1002 microdom.CharacterData('foo').isEqualToNode(
1003 microdom.CharacterData('bar')))
1007 class CommentTests(TestCase):
1009 Tests for L{Comment}.
1011 def test_isEqualToNode(self):
1013 L{Comment.isEqualToNode} returns C{True} if and only if passed a
1014 L{Comment} with the same value.
1017 microdom.Comment('foo').isEqualToNode(
1018 microdom.Comment('foo')))
1020 microdom.Comment('foo').isEqualToNode(
1021 microdom.Comment('bar')))
1025 class TextTests(TestCase):
1029 def test_isEqualToNode(self):
1031 L{Text.isEqualToNode} returns C{True} if and only if passed a L{Text}
1032 which represents the same data.
1035 microdom.Text('foo', raw=True).isEqualToNode(
1036 microdom.Text('foo', raw=True)))
1038 microdom.Text('foo', raw=True).isEqualToNode(
1039 microdom.Text('foo', raw=False)))
1041 microdom.Text('foo', raw=True).isEqualToNode(
1042 microdom.Text('bar', raw=True)))
1046 class CDATASectionTests(TestCase):
1048 Tests for L{CDATASection}.
1050 def test_isEqualToNode(self):
1052 L{CDATASection.isEqualToNode} returns C{True} if and only if passed a
1053 L{CDATASection} which represents the same data.
1056 microdom.CDATASection('foo').isEqualToNode(
1057 microdom.CDATASection('foo')))
1059 microdom.CDATASection('foo').isEqualToNode(
1060 microdom.CDATASection('bar')))
1064 class ElementTests(TestCase):
1066 Tests for L{Element}.
1068 def test_isEqualToNode(self):
1070 L{Element.isEqualToNode} returns C{True} if and only if passed a
1071 L{Element} with the same C{nodeName}, C{namespace}, C{childNodes}, and
1076 'foo', {'a': 'b'}, object(), namespace='bar').isEqualToNode(
1078 'foo', {'a': 'b'}, object(), namespace='bar')))
1080 # Elements with different nodeName values do not compare equal.
1083 'foo', {'a': 'b'}, object(), namespace='bar').isEqualToNode(
1085 'bar', {'a': 'b'}, object(), namespace='bar')))
1087 # Elements with different namespaces do not compare equal.
1090 'foo', {'a': 'b'}, object(), namespace='bar').isEqualToNode(
1092 'foo', {'a': 'b'}, object(), namespace='baz')))
1094 # Elements with different childNodes do not compare equal.
1095 one = microdom.Element('foo', {'a': 'b'}, object(), namespace='bar')
1096 two = microdom.Element('foo', {'a': 'b'}, object(), namespace='bar')
1097 two.appendChild(microdom.Node(object()))
1098 self.assertFalse(one.isEqualToNode(two))
1100 # Elements with different attributes do not compare equal.
1103 'foo', {'a': 'b'}, object(), namespace='bar').isEqualToNode(
1105 'foo', {'a': 'c'}, object(), namespace='bar')))