1 # -*- coding: utf-8 -*-
4 Tests specific to the extended etree API
6 Tests that apply to the general ElementTree API should go into
10 from __future__ import absolute_import
12 from collections import OrderedDict
24 from .common_imports import etree, StringIO, BytesIO, HelperTestCase
25 from .common_imports import fileInTestDir, fileUrlInTestDir, read_file, path2url, tmpfile
26 from .common_imports import SillyFileLike, LargeFileLikeUnicode, doctest, make_doctest
27 from .common_imports import canonicalize, _str, _bytes
30 TESTED VERSION: %s""" % etree.__version__ + """
31 Python: %r""" % (sys.version_info,) + """
32 lxml.etree: %r""" % (etree.LXML_VERSION,) + """
33 libxml used: %r""" % (etree.LIBXML_VERSION,) + """
34 libxml compiled: %r""" % (etree.LIBXML_COMPILED_VERSION,) + """
35 libxslt used: %r""" % (etree.LIBXSLT_VERSION,) + """
36 libxslt compiled: %r""" % (etree.LIBXSLT_COMPILED_VERSION,) + """
37 FS encoding: %s""" % (sys.getfilesystemencoding(),) + """
38 Default encoding: %s""" % (sys.getdefaultencoding(),) + """
39 Max Unicode: %s""" % (sys.maxunicode,) + """
49 class ETreeOnlyTestCase(HelperTestCase):
50 """Tests only for etree, not ElementTree"""
53 def test_version(self):
54 self.assertTrue(isinstance(etree.__version__, _unicode))
55 self.assertTrue(isinstance(etree.LXML_VERSION, tuple))
56 self.assertEqual(len(etree.LXML_VERSION), 4)
57 self.assertTrue(isinstance(etree.LXML_VERSION[0], int))
58 self.assertTrue(isinstance(etree.LXML_VERSION[1], int))
59 self.assertTrue(isinstance(etree.LXML_VERSION[2], int))
60 self.assertTrue(isinstance(etree.LXML_VERSION[3], int))
61 self.assertTrue(etree.__version__.startswith(
62 str(etree.LXML_VERSION[0])))
65 if hasattr(self.etree, '__pyx_capi__'):
66 # newer Pyrex compatible C-API
67 self.assertTrue(isinstance(self.etree.__pyx_capi__, dict))
68 self.assertTrue(len(self.etree.__pyx_capi__) > 0)
70 # older C-API mechanism
71 self.assertTrue(hasattr(self.etree, '_import_c_api'))
73 def test_include_paths(self):
75 includes = lxml.get_include()
76 self.assertTrue(includes)
77 self.assertTrue(len(includes) >= 2)
78 self.assertTrue(os.path.join(os.path.dirname(lxml.__file__), 'includes') in includes,
81 def test_element_names(self):
82 Element = self.etree.Element
84 self.assertEqual(el.tag, 'name')
85 el = Element('{}name')
86 self.assertEqual(el.tag, 'name')
88 def test_element_name_empty(self):
89 Element = self.etree.Element
91 self.assertRaises(ValueError, Element, '{}')
92 self.assertRaises(ValueError, setattr, el, 'tag', '{}')
94 self.assertRaises(ValueError, Element, '{test}')
95 self.assertRaises(ValueError, setattr, el, 'tag', '{test}')
97 def test_element_name_colon(self):
98 Element = self.etree.Element
99 self.assertRaises(ValueError, Element, 'p:name')
100 self.assertRaises(ValueError, Element, '{test}p:name')
103 self.assertRaises(ValueError, setattr, el, 'tag', 'p:name')
105 def test_element_name_quote(self):
106 Element = self.etree.Element
107 self.assertRaises(ValueError, Element, "p'name")
108 self.assertRaises(ValueError, Element, 'p"name')
110 self.assertRaises(ValueError, Element, "{test}p'name")
111 self.assertRaises(ValueError, Element, '{test}p"name')
114 self.assertRaises(ValueError, setattr, el, 'tag', "p'name")
115 self.assertRaises(ValueError, setattr, el, 'tag', 'p"name')
117 def test_element_name_space(self):
118 Element = self.etree.Element
119 self.assertRaises(ValueError, Element, ' name ')
120 self.assertRaises(ValueError, Element, 'na me')
121 self.assertRaises(ValueError, Element, '{test} name')
124 self.assertRaises(ValueError, setattr, el, 'tag', ' name ')
126 def test_subelement_name_empty(self):
127 Element = self.etree.Element
128 SubElement = self.etree.SubElement
131 self.assertRaises(ValueError, SubElement, el, '{}')
132 self.assertRaises(ValueError, SubElement, el, '{test}')
134 def test_subelement_name_colon(self):
135 Element = self.etree.Element
136 SubElement = self.etree.SubElement
139 self.assertRaises(ValueError, SubElement, el, 'p:name')
140 self.assertRaises(ValueError, SubElement, el, '{test}p:name')
142 def test_subelement_name_quote(self):
143 Element = self.etree.Element
144 SubElement = self.etree.SubElement
147 self.assertRaises(ValueError, SubElement, el, "p'name")
148 self.assertRaises(ValueError, SubElement, el, "{test}p'name")
150 self.assertRaises(ValueError, SubElement, el, 'p"name')
151 self.assertRaises(ValueError, SubElement, el, '{test}p"name')
153 def test_subelement_name_space(self):
154 Element = self.etree.Element
155 SubElement = self.etree.SubElement
158 self.assertRaises(ValueError, SubElement, el, ' name ')
159 self.assertRaises(ValueError, SubElement, el, 'na me')
160 self.assertRaises(ValueError, SubElement, el, '{test} name')
162 def test_subelement_attribute_invalid(self):
163 Element = self.etree.Element
164 SubElement = self.etree.SubElement
167 self.assertRaises(ValueError, SubElement, el, 'name', {'a b c' : 'abc'})
168 self.assertRaises(ValueError, SubElement, el, 'name', {'a' : 'a\0\n'})
169 self.assertEqual(0, len(el))
171 def test_qname_empty(self):
172 QName = self.etree.QName
173 self.assertRaises(ValueError, QName, '')
174 self.assertRaises(ValueError, QName, None)
175 self.assertRaises(ValueError, QName, None, None)
176 self.assertRaises(ValueError, QName, 'test', '')
178 def test_qname_none(self):
179 QName = self.etree.QName
180 q = QName(None, 'TAG')
181 self.assertEqual('TAG', q)
182 self.assertEqual('TAG', q.localname)
183 self.assertEqual(None, q.namespace)
185 def test_qname_colon(self):
186 QName = self.etree.QName
187 self.assertRaises(ValueError, QName, 'p:name')
188 self.assertRaises(ValueError, QName, 'test', 'p:name')
190 def test_qname_space(self):
191 QName = self.etree.QName
192 self.assertRaises(ValueError, QName, ' name ')
193 self.assertRaises(ValueError, QName, 'na me')
194 self.assertRaises(ValueError, QName, 'test', ' name')
196 def test_qname_namespace_localname(self):
197 # ET doesn't have namespace/localname properties on QNames
198 QName = self.etree.QName
199 namespace, localname = 'http://myns', 'a'
200 qname = QName(namespace, localname)
201 self.assertEqual(namespace, qname.namespace)
202 self.assertEqual(localname, qname.localname)
204 def test_qname_element(self):
205 # ET doesn't have namespace/localname properties on QNames
206 QName = self.etree.QName
207 qname1 = QName('http://myns', 'a')
208 a = self.etree.Element(qname1, nsmap={'p' : 'http://myns'})
211 self.assertEqual(a.tag, qname1.text)
212 self.assertEqual(a.tag, qname1)
213 self.assertEqual(qname1.text, qname2.text)
214 self.assertEqual(qname1, qname2.text)
215 self.assertEqual(qname1.text, qname2)
216 self.assertEqual(qname1, qname2)
218 def test_qname_text_resolve(self):
219 # ET doesn't resove QNames as text values
221 qname = etree.QName('http://myns', 'a')
222 a = etree.Element(qname, nsmap={'p' : 'http://myns'})
225 self.assertEqual("p:a", a.text)
227 def test_nsmap_prefix_invalid(self):
229 self.assertRaises(ValueError,
230 etree.Element, "root", nsmap={'"' : 'testns'})
231 self.assertRaises(ValueError,
232 etree.Element, "root", nsmap={'&' : 'testns'})
233 self.assertRaises(ValueError,
234 etree.Element, "root", nsmap={'a:b' : 'testns'})
236 def test_clear_keep_tail(self):
238 tostring = self.etree.tostring
239 a = XML('<a aa="A"><b ba="B">B1</b>B2<c ca="C">C1</c>C2</a>')
240 a[0].clear(keep_tail=True)
241 self.assertEqual(_bytes('<a aa="A"><b/>B2<c ca="C">C1</c>C2</a>'), tostring(a))
243 def test_attribute_has_key(self):
244 # ET in Py 3.x has no "attrib.has_key()" method
247 root = XML(_bytes('<foo bar="Bar" xmlns:ns="http://ns.codespeak.net/test" ns:baz="Baz" />'))
249 True, root.attrib.has_key('bar'))
251 False, root.attrib.has_key('baz'))
253 False, root.attrib.has_key('hah'))
256 root.attrib.has_key('{http://ns.codespeak.net/test}baz'))
258 def test_attribute_set(self):
259 Element = self.etree.Element
260 root = Element("root")
261 root.set("attr", "TEST")
262 self.assertEqual("TEST", root.get("attr"))
264 def test_attribute_set_nonstring(self):
265 # ElementTree accepts arbitrary attribute values
266 # lxml.etree allows only strings
267 Element = self.etree.Element
269 root = Element("root")
270 root.set("attr", "TEST")
271 self.assertEqual("TEST", root.get("attr"))
272 self.assertRaises(TypeError, root.set, "newattr", 5)
274 def test_attrib_and_keywords(self):
275 Element = self.etree.Element
277 root = Element("root")
278 root.set("attr", "TEST")
279 self.assertEqual("TEST", root.attrib["attr"])
281 root2 = Element("root2", root.attrib, attr2='TOAST')
282 self.assertEqual("TEST", root2.attrib["attr"])
283 self.assertEqual("TOAST", root2.attrib["attr2"])
284 self.assertEqual(None, root.attrib.get("attr2"))
286 def test_attrib_order(self):
287 Element = self.etree.Element
289 keys = ["attr%d" % i for i in range(12, 4, -1)]
290 values = ["TEST-%d" % i for i in range(12, 4, -1)]
291 items = list(zip(keys, values))
293 root = Element("root")
294 for key, value in items:
296 self.assertEqual(keys, root.attrib.keys())
297 self.assertEqual(values, root.attrib.values())
300 ('attr_99', 'TOAST-1'),
301 ('attr_98', 'TOAST-2'),
303 ordered_dict_types = [OrderedDict, lambda x:x]
304 if sys.version_info >= (3, 6):
305 ordered_dict_types.append(dict)
307 # Keyword arguments are not ordered in Py<3.6, and thus get sorted.
310 expected_keys = [attr[0] for attr in attr_order]
311 expected_values = [attr[1] for attr in attr_order]
312 expected_items = list(zip(expected_keys, expected_values))
314 for dict_type in ordered_dict_types:
315 root2 = Element("root2", dict_type(root.attrib),
316 attr_99='TOAST-1', attr_98='TOAST-2')
319 self.assertSequenceEqual(expected_keys, root2.attrib.keys())
320 self.assertSequenceEqual(expected_values, root2.attrib.values())
321 self.assertSequenceEqual(expected_items, root2.attrib.items())
322 except AssertionError as exc:
323 exc.args = ("Order of '%s': %s" % (dict_type.__name__, exc.args[0]),) + exc.args[1:]
326 self.assertEqual(keys, root.attrib.keys())
327 self.assertEqual(values, root.attrib.values())
329 def test_attribute_set_invalid(self):
330 # ElementTree accepts arbitrary attribute values
331 # lxml.etree allows only strings, or None for (html5) boolean attributes
332 Element = self.etree.Element
333 root = Element("root")
334 self.assertRaises(TypeError, root.set, "newattr", 5)
335 self.assertRaises(TypeError, root.set, "newattr", object)
336 self.assertRaises(TypeError, root.set, "newattr", None)
337 self.assertRaises(TypeError, root.set, "newattr")
339 def test_strip_attributes(self):
341 xml = _bytes('<test a="5" b="10" c="20"><x a="4" b="2"/></test>')
344 self.etree.strip_attributes(root, 'a')
345 self.assertEqual(_bytes('<test b="10" c="20"><x b="2"></x></test>'),
346 self._writeElement(root))
349 self.etree.strip_attributes(root, 'b', 'c')
350 self.assertEqual(_bytes('<test a="5"><x a="4"></x></test>'),
351 self._writeElement(root))
353 def test_strip_attributes_ns(self):
355 xml = _bytes('<test xmlns:n="http://test/ns" a="6" b="10" c="20" n:a="5"><x a="4" n:b="2"/></test>')
358 self.etree.strip_attributes(root, 'a')
360 _bytes('<test xmlns:n="http://test/ns" b="10" c="20" n:a="5"><x n:b="2"></x></test>'),
361 self._writeElement(root))
364 self.etree.strip_attributes(root, '{http://test/ns}a', 'c')
366 _bytes('<test xmlns:n="http://test/ns" a="6" b="10"><x a="4" n:b="2"></x></test>'),
367 self._writeElement(root))
370 self.etree.strip_attributes(root, '{http://test/ns}*')
372 _bytes('<test xmlns:n="http://test/ns" a="6" b="10" c="20"><x a="4"></x></test>'),
373 self._writeElement(root))
375 def test_strip_elements(self):
377 xml = _bytes('<test><a><b><c/></b></a><x><a><b/><c/></a></x></test>')
380 self.etree.strip_elements(root, 'a')
381 self.assertEqual(_bytes('<test><x></x></test>'),
382 self._writeElement(root))
385 self.etree.strip_elements(root, 'b', 'c', 'X', 'Y', 'Z')
386 self.assertEqual(_bytes('<test><a></a><x><a></a></x></test>'),
387 self._writeElement(root))
390 self.etree.strip_elements(root, 'c')
391 self.assertEqual(_bytes('<test><a><b></b></a><x><a><b></b></a></x></test>'),
392 self._writeElement(root))
394 def test_strip_elements_ns(self):
396 xml = _bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"/>C</b>BT</n:a>AT<x>X<a>A<b xmlns="urn:a"/>BT<c xmlns="urn:x"/>CT</a>AT</x>XT</test>')
399 self.etree.strip_elements(root, 'a')
400 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X</x>XT</test>'),
401 self._writeElement(root))
404 self.etree.strip_elements(root, '{urn:a}b', 'c')
405 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
406 self._writeElement(root))
409 self.etree.strip_elements(root, '{urn:a}*', 'c')
410 self.assertEqual(_bytes('<test>TEST<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
411 self._writeElement(root))
414 self.etree.strip_elements(root, '{urn:a}*', 'c', with_tail=False)
415 self.assertEqual(_bytes('<test>TESTAT<x>X<a>ABT<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
416 self._writeElement(root))
418 def test_strip_tags(self):
420 xml = _bytes('<test>TEST<a>A<b>B<c/>CT</b>BT</a>AT<x>X<a>A<b/>BT<c/>CT</a>AT</x>XT</test>')
423 self.etree.strip_tags(root, 'a')
424 self.assertEqual(_bytes('<test>TESTA<b>B<c></c>CT</b>BTAT<x>XA<b></b>BT<c></c>CTAT</x>XT</test>'),
425 self._writeElement(root))
428 self.etree.strip_tags(root, 'b', 'c', 'X', 'Y', 'Z')
429 self.assertEqual(_bytes('<test>TEST<a>ABCTBT</a>AT<x>X<a>ABTCT</a>AT</x>XT</test>'),
430 self._writeElement(root))
433 self.etree.strip_tags(root, 'c')
434 self.assertEqual(_bytes('<test>TEST<a>A<b>BCT</b>BT</a>AT<x>X<a>A<b></b>BTCT</a>AT</x>XT</test>'),
435 self._writeElement(root))
437 def test_strip_tags_pi_comment(self):
439 PI = self.etree.ProcessingInstruction
440 Comment = self.etree.Comment
441 xml = _bytes('<!--comment1-->\n<?PI1?>\n<test>TEST<!--comment2-->XT<?PI2?></test>\n<!--comment3-->\n<?PI1?>')
444 self.etree.strip_tags(root, PI)
445 self.assertEqual(_bytes('<!--comment1-->\n<?PI1?>\n<test>TEST<!--comment2-->XT</test>\n<!--comment3-->\n<?PI1?>'),
446 self._writeElement(root))
449 self.etree.strip_tags(root, Comment)
450 self.assertEqual(_bytes('<!--comment1-->\n<?PI1?>\n<test>TESTXT<?PI2?></test>\n<!--comment3-->\n<?PI1?>'),
451 self._writeElement(root))
454 self.etree.strip_tags(root, PI, Comment)
455 self.assertEqual(_bytes('<!--comment1-->\n<?PI1?>\n<test>TESTXT</test>\n<!--comment3-->\n<?PI1?>'),
456 self._writeElement(root))
459 self.etree.strip_tags(root, Comment, PI)
460 self.assertEqual(_bytes('<!--comment1-->\n<?PI1?>\n<test>TESTXT</test>\n<!--comment3-->\n<?PI1?>'),
461 self._writeElement(root))
463 def test_strip_tags_pi_comment_all(self):
465 ElementTree = self.etree.ElementTree
466 PI = self.etree.ProcessingInstruction
467 Comment = self.etree.Comment
468 xml = _bytes('<!--comment1-->\n<?PI1?>\n<test>TEST<!--comment2-->XT<?PI2?></test>\n<!--comment3-->\n<?PI1?>')
471 self.etree.strip_tags(ElementTree(root), PI)
472 self.assertEqual(_bytes('<!--comment1-->\n<test>TEST<!--comment2-->XT</test>\n<!--comment3-->'),
473 self._writeElement(root))
476 self.etree.strip_tags(ElementTree(root), Comment)
477 self.assertEqual(_bytes('<?PI1?>\n<test>TESTXT<?PI2?></test>\n<?PI1?>'),
478 self._writeElement(root))
481 self.etree.strip_tags(ElementTree(root), PI, Comment)
482 self.assertEqual(_bytes('<test>TESTXT</test>'),
483 self._writeElement(root))
486 self.etree.strip_tags(ElementTree(root), Comment, PI)
487 self.assertEqual(_bytes('<test>TESTXT</test>'),
488 self._writeElement(root))
490 def test_strip_tags_doc_style(self):
495 I like <strong>sheep</strong>.
497 I like lots of <strong>sheep</strong>.
499 Click <a href="http://www.sheep.com">here</a>
500 for <a href="http://www.sheep.com">those</a> sheep.
507 self.etree.strip_tags(root, 'a')
508 self.assertEqual(re.sub(_bytes('</?a[^>]*>'), _bytes(''), xml).replace(_bytes('<br/>'), _bytes('<br></br>')),
509 self._writeElement(root))
512 self.etree.strip_tags(root, 'a', 'br')
513 self.assertEqual(re.sub(_bytes('</?a[^>]*>'), _bytes(''),
514 re.sub(_bytes('<br[^>]*>'), _bytes(''), xml)),
515 self._writeElement(root))
517 def test_strip_tags_ns(self):
519 xml = _bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"/>CT</b>BT</n:a>AT<x>X<a>A<b xmlns="urn:a"/>BT<c xmlns="urn:x"/>CT</a>AT</x>XT</test>')
522 self.etree.strip_tags(root, 'a')
523 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>CT</b>BT</n:a>AT<x>XA<b xmlns="urn:a"></b>BT<c xmlns="urn:x"></c>CTAT</x>XT</test>'),
524 self._writeElement(root))
527 self.etree.strip_tags(root, '{urn:a}b', 'c')
528 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>CT</b>BT</n:a>AT<x>X<a>ABT<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
529 self._writeElement(root))
532 self.etree.strip_tags(root, '{urn:a}*', 'c')
533 self.assertEqual(_bytes('<test>TESTA<b>B<c xmlns="urn:c"></c>CT</b>BTAT<x>X<a>ABT<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
534 self._writeElement(root))
536 def test_strip_tags_and_remove(self):
538 HTML = self.etree.HTML
539 root = HTML(_bytes('<div><h1>title</h1> <b>foo</b> <p>boo</p></div>'))[0][0]
540 self.assertEqual(_bytes('<div><h1>title</h1> <b>foo</b> <p>boo</p></div>'),
541 self.etree.tostring(root))
542 self.etree.strip_tags(root, 'b')
543 self.assertEqual(_bytes('<div><h1>title</h1> foo <p>boo</p></div>'),
544 self.etree.tostring(root))
546 self.assertEqual(_bytes('<div><p>boo</p></div>'),
547 self.etree.tostring(root))
550 # lxml.etree separates target and text
551 Element = self.etree.Element
552 SubElement = self.etree.SubElement
553 ProcessingInstruction = self.etree.ProcessingInstruction
556 a.append(ProcessingInstruction('foo', 'some more text'))
557 self.assertEqual(a[0].target, 'foo')
558 self.assertEqual(a[0].text, 'some more text')
560 def test_pi_parse(self):
562 root = XML(_bytes("<test><?mypi my test ?></test>"))
563 self.assertEqual(root[0].target, "mypi")
564 self.assertEqual(root[0].text, "my test ")
566 def test_pi_pseudo_attributes_get(self):
568 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
569 self.assertEqual(root[0].target, "mypi")
570 self.assertEqual(root[0].get('my'), "1")
571 self.assertEqual(root[0].get('test'), " abc ")
572 self.assertEqual(root[0].get('quotes'), "' '")
573 self.assertEqual(root[0].get('only'), None)
574 self.assertEqual(root[0].get('names'), None)
575 self.assertEqual(root[0].get('nope'), None)
577 def test_pi_pseudo_attributes_attrib(self):
579 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
580 self.assertEqual(root[0].target, "mypi")
581 self.assertEqual(root[0].attrib['my'], "1")
582 self.assertEqual(root[0].attrib['test'], " abc ")
583 self.assertEqual(root[0].attrib['quotes'], "' '")
584 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'only')
585 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'names')
586 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'nope')
588 def test_deepcopy_pi(self):
589 # previously caused a crash
590 ProcessingInstruction = self.etree.ProcessingInstruction
592 a = ProcessingInstruction("PI", "ONE")
596 self.assertEqual('ONE', a.text)
597 self.assertEqual('ANOTHER', b.text)
599 def test_deepcopy_elementtree_pi(self):
601 tostring = self.etree.tostring
602 root = XML(_bytes("<?mypi my test ?><test/><!--comment -->"))
603 tree1 = self.etree.ElementTree(root)
604 self.assertEqual(_bytes("<?mypi my test ?><test/><!--comment -->"),
607 tree2 = copy.deepcopy(tree1)
608 self.assertEqual(_bytes("<?mypi my test ?><test/><!--comment -->"),
611 root2 = copy.deepcopy(tree1.getroot())
612 self.assertEqual(_bytes("<test/>"),
615 def test_deepcopy_elementtree_dtd(self):
617 tostring = self.etree.tostring
618 xml = _bytes('<!DOCTYPE test [\n<!ENTITY entity "tasty">\n]>\n<test/>')
620 tree1 = self.etree.ElementTree(root)
621 self.assertEqual(xml, tostring(tree1))
623 tree2 = copy.deepcopy(tree1)
624 self.assertEqual(xml, tostring(tree2))
626 root2 = copy.deepcopy(tree1.getroot())
627 self.assertEqual(_bytes("<test/>"),
630 def test_deepcopy_pi_dtd(self):
632 tostring = self.etree.tostring
633 xml = _bytes('<!-- comment --><!DOCTYPE test [\n<!ENTITY entity "tasty">\n]>\n<test/>')
635 tree1 = self.etree.ElementTree(root)
636 self.assertEqual(xml, tostring(tree1))
638 tree2 = copy.deepcopy(tree1)
639 self.assertEqual(xml, tostring(tree2))
641 def test_parse_remove_comments(self):
642 fromstring = self.etree.fromstring
643 tostring = self.etree.tostring
644 XMLParser = self.etree.XMLParser
646 xml = _bytes('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
647 parser = XMLParser(remove_comments=True)
648 root = fromstring(xml, parser)
650 _bytes('<a><b><c/></b></a>'),
653 def test_parse_remove_pis(self):
654 parse = self.etree.parse
655 tostring = self.etree.tostring
656 XMLParser = self.etree.XMLParser
658 xml = _bytes('<?test?><a><?A?><b><?B?><c/></b><?C?></a><?tail?>')
666 parser = XMLParser(remove_pis=True)
667 tree = parse(f, parser)
669 _bytes('<a><b><c/></b></a>'),
672 def test_parse_parser_type_error(self):
673 # ET raises IOError only
674 parse = self.etree.parse
675 self.assertRaises(TypeError, parse, 'notthere.xml', object())
677 def test_iterparse_getiterator(self):
678 iterparse = self.etree.iterparse
679 f = BytesIO('<a><b><d/></b><c/></a>')
682 for event, elem in iterparse(f):
683 counts.append(len(list(elem.getiterator())))
688 def test_iterparse_tree_comments(self):
689 # ET removes comments
690 iterparse = self.etree.iterparse
691 tostring = self.etree.tostring
693 f = BytesIO('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
694 events = list(iterparse(f))
696 self.assertEqual(3, len(events))
698 _bytes('<a><!--A--><b><!-- B --><c/></b><!--C--></a>'),
701 def test_iterparse_comments(self):
702 # ET removes comments
703 iterparse = self.etree.iterparse
704 tostring = self.etree.tostring
707 if event == 'comment':
712 f = BytesIO('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
713 events = list(iterparse(f, events=('end', 'comment')))
715 self.assertEqual(6, len(events))
716 self.assertEqual(['A', ' B ', 'c', 'b', 'C', 'a'],
717 [ name(*item) for item in events ])
719 _bytes('<a><!--A--><b><!-- B --><c/></b><!--C--></a>'),
722 def test_iterparse_pis(self):
724 iterparse = self.etree.iterparse
725 tostring = self.etree.tostring
726 ElementTree = self.etree.ElementTree
730 return el.target, el.text
734 f = BytesIO('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>')
735 events = list(iterparse(f, events=('end', 'pi')))
737 self.assertEqual(8, len(events))
738 self.assertEqual([('pia','a'), ('pib','b'), ('pic','c'), 'c', 'b',
739 ('pid','d'), 'a', ('pie','e')],
740 [ name(*item) for item in events ])
742 _bytes('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>'),
743 tostring(ElementTree(root)))
745 def test_iterparse_remove_comments(self):
746 iterparse = self.etree.iterparse
747 tostring = self.etree.tostring
749 f = BytesIO('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
750 events = list(iterparse(f, remove_comments=True,
751 events=('end', 'comment')))
753 self.assertEqual(3, len(events))
754 self.assertEqual(['c', 'b', 'a'],
755 [ el.tag for (event, el) in events ])
757 _bytes('<a><b><c/></b></a>'),
760 def test_iterparse_broken(self):
761 iterparse = self.etree.iterparse
762 f = BytesIO('<a><b><c/></a>')
763 # ET raises ExpatError, lxml raises XMLSyntaxError
764 self.assertRaises(self.etree.XMLSyntaxError, list, iterparse(f))
766 def test_iterparse_broken_recover(self):
767 iterparse = self.etree.iterparse
768 f = BytesIO('<a><b><c/></a>')
769 it = iterparse(f, events=('start', 'end'), recover=True)
770 events = [(ev, el.tag) for ev, el in it]
772 self.assertTrue(root is not None)
774 self.assertEqual(1, events.count(('start', 'a')))
775 self.assertEqual(1, events.count(('end', 'a')))
777 self.assertEqual(1, events.count(('start', 'b')))
778 self.assertEqual(1, events.count(('end', 'b')))
780 self.assertEqual(1, events.count(('start', 'c')))
781 self.assertEqual(1, events.count(('end', 'c')))
783 def test_iterparse_broken_multi_recover(self):
784 iterparse = self.etree.iterparse
785 f = BytesIO('<a><b><c/></d><b><c/></a></b>')
786 it = iterparse(f, events=('start', 'end'), recover=True)
787 events = [(ev, el.tag) for ev, el in it]
789 self.assertTrue(root is not None)
791 self.assertEqual(1, events.count(('start', 'a')))
792 self.assertEqual(1, events.count(('end', 'a')))
794 self.assertEqual(2, events.count(('start', 'b')))
795 self.assertEqual(2, events.count(('end', 'b')))
797 self.assertEqual(2, events.count(('start', 'c')))
798 self.assertEqual(2, events.count(('end', 'c')))
800 def test_iterparse_strip(self):
801 iterparse = self.etree.iterparse
803 <a> \n \n <b> b test </b> \n
805 \n\t <c> \n </c> </a> \n """)
806 iterator = iterparse(f, remove_blank_text=True)
807 text = [ (element.text, element.tail)
808 for event, element in iterator ]
810 [(" b test ", None), (" \n ", None), (None, None)],
813 def test_iterparse_tag(self):
814 iterparse = self.etree.iterparse
815 f = BytesIO('<a><b><d/></b><c/></a>')
817 iterator = iterparse(f, tag="b", events=('start', 'end'))
818 events = list(iterator)
821 [('start', root[0]), ('end', root[0])],
824 def test_iterparse_tag_all(self):
825 iterparse = self.etree.iterparse
826 f = BytesIO('<a><b><d/></b><c/></a>')
828 iterator = iterparse(f, tag="*", events=('start', 'end'))
829 events = list(iterator)
834 def test_iterparse_tag_ns(self):
835 iterparse = self.etree.iterparse
836 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
838 iterator = iterparse(f, tag="{urn:test:1}b", events=('start', 'end'))
839 events = list(iterator)
842 [('start', root[0]), ('end', root[0])],
845 def test_iterparse_tag_ns_empty(self):
846 iterparse = self.etree.iterparse
847 f = BytesIO('<a><b><d/></b><c/></a>')
848 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
849 events = list(iterator)
852 [('start', root[0]), ('end', root[0])],
855 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
856 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
857 events = list(iterator)
859 self.assertEqual([], events)
861 def test_iterparse_tag_ns_all(self):
862 iterparse = self.etree.iterparse
863 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
864 iterator = iterparse(f, tag="{urn:test:1}*", events=('start', 'end'))
865 events = list(iterator)
866 self.assertEqual(8, len(events))
868 def test_iterparse_tag_ns_empty_all(self):
869 iterparse = self.etree.iterparse
870 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
871 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
872 events = list(iterator)
873 self.assertEqual([], events)
875 f = BytesIO('<a><b><d/></b><c/></a>')
876 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
877 events = list(iterator)
878 self.assertEqual(8, len(events))
880 def test_iterparse_encoding_error(self):
881 text = _str('Søk på nettet')
882 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
883 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
884 ).encode('iso-8859-1')
886 self.assertRaises(self.etree.ParseError,
887 list, self.etree.iterparse(BytesIO(xml_latin1)))
889 def test_iterparse_encoding_8bit_override(self):
890 text = _str('Søk på nettet', encoding="UTF-8")
891 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
892 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
893 ).encode('iso-8859-1')
895 iterator = self.etree.iterparse(BytesIO(xml_latin1),
896 encoding="iso-8859-1")
897 self.assertEqual(1, len(list(iterator)))
900 self.assertEqual(a.text, text)
902 def test_iterparse_keep_cdata(self):
903 tostring = self.etree.tostring
904 f = BytesIO('<root><![CDATA[test]]></root>')
905 context = self.etree.iterparse(f, strip_cdata=False)
906 content = [ el.text for event,el in context ]
908 self.assertEqual(['test'], content)
909 self.assertEqual(_bytes('<root><![CDATA[test]]></root>'),
910 tostring(context.root))
912 def test_parser_encoding_unknown(self):
914 LookupError, self.etree.XMLParser, encoding="hopefully unknown")
916 def test_parser_encoding(self):
917 self.etree.XMLParser(encoding="ascii")
918 self.etree.XMLParser(encoding="utf-8")
919 self.etree.XMLParser(encoding="iso-8859-1")
921 def test_feed_parser_recover(self):
922 parser = self.etree.XMLParser(recover=True)
924 parser.feed('<?xml version=')
925 parser.feed('"1.0"?><ro')
927 parser.feed('a test="works"')
928 parser.feed('><othertag/></root') # <a> not closed!
931 root = parser.close()
933 self.assertEqual(root.tag, "root")
934 self.assertEqual(len(root), 1)
935 self.assertEqual(root[0].tag, "a")
936 self.assertEqual(root[0].get("test"), "works")
937 self.assertEqual(len(root[0]), 1)
938 self.assertEqual(root[0][0].tag, "othertag")
939 # FIXME: would be nice to get some errors logged ...
940 #self.assertTrue(len(parser.error_log) > 0, "error log is empty")
942 def test_feed_parser_recover_no_id_dict(self):
943 # test that recover mode plays nicely with the no-id-dict setup
944 parser = self.etree.XMLParser(recover=True, collect_ids=False)
946 parser.feed('<?xml version=')
947 parser.feed('"1.0"?><ro')
948 parser.feed('ot xml:id="123"><')
949 parser.feed('a test="works" xml:id=')
950 parser.feed('"321"><othertag/></root') # <a> not closed!
953 root = parser.close()
955 self.assertEqual(root.tag, "root")
956 self.assertEqual(len(root), 1)
957 self.assertEqual(root[0].tag, "a")
958 self.assertEqual(root[0].get("test"), "works")
959 self.assertEqual(root[0].attrib, {
961 '{http://www.w3.org/XML/1998/namespace}id': '321'})
962 self.assertEqual(len(root[0]), 1)
963 self.assertEqual(root[0][0].tag, "othertag")
964 # FIXME: would be nice to get some errors logged ...
965 #self.assertTrue(len(parser.error_log) > 0, "error log is empty")
967 def test_elementtree_parser_target_type_error(self):
968 assertEqual = self.assertEqual
969 assertFalse = self.assertFalse
972 class Target(object):
973 def start(self, tag, attrib):
974 events.append("start")
976 assertEqual("TAG", tag)
979 assertEqual("TAG", tag)
981 return "DONE" # no Element!
983 parser = self.etree.XMLParser(target=Target())
984 tree = self.etree.ElementTree()
986 self.assertRaises(TypeError,
987 tree.parse, BytesIO("<TAG/>"), parser=parser)
988 self.assertEqual(["start", "end"], events)
990 def test_parser_target_feed_exception(self):
991 # ET doesn't call .close() on errors
993 class Target(object):
994 def start(self, tag, attrib):
995 events.append("start-" + tag)
997 events.append("end-" + tag)
999 raise ValueError("dead and gone")
1000 def data(self, data):
1001 events.append("data-" + data)
1003 events.append("close")
1006 parser = self.etree.XMLParser(target=Target())
1009 parser.feed(_bytes('<root>A<a>ca</a>B</root>'))
1010 done = parser.close()
1011 self.fail("error expected, but parsing succeeded")
1013 done = 'value error received as expected'
1015 self.assertEqual(["start-root", "data-A", "start-a",
1016 "data-ca", "end-a", "close"],
1019 def test_parser_target_fromstring_exception(self):
1020 # ET doesn't call .close() on errors
1022 class Target(object):
1023 def start(self, tag, attrib):
1024 events.append("start-" + tag)
1026 events.append("end-" + tag)
1028 raise ValueError("dead and gone")
1029 def data(self, data):
1030 events.append("data-" + data)
1032 events.append("close")
1035 parser = self.etree.XMLParser(target=Target())
1038 done = self.etree.fromstring(_bytes('<root>A<a>ca</a>B</root>'),
1040 self.fail("error expected, but parsing succeeded")
1042 done = 'value error received as expected'
1044 self.assertEqual(["start-root", "data-A", "start-a",
1045 "data-ca", "end-a", "close"],
1048 def test_parser_target_feed_no_id_dict(self):
1049 # test that target parsing works nicely with the no-id-hash setup
1051 class Target(object):
1052 def start(self, tag, attrib):
1053 events.append("start-" + tag)
1055 events.append("end-" + tag)
1056 def data(self, data):
1057 events.append("data-" + data)
1058 def comment(self, text):
1059 events.append("comment-" + text)
1063 parser = self.etree.XMLParser(target=Target(), collect_ids=False)
1065 parser.feed(_bytes('<!--a--><root xml:id="123">A<!--b-->'))
1066 parser.feed(_bytes('<sub xml:id="321"/>B</root>'))
1067 done = parser.close()
1069 self.assertEqual("DONE", done)
1070 self.assertEqual(["comment-a", "start-root", "data-A", "comment-b",
1071 "start-sub", "end-sub", "data-B", "end-root"],
1074 def test_parser_target_comment(self):
1076 class Target(object):
1077 def start(self, tag, attrib):
1078 events.append("start-" + tag)
1080 events.append("end-" + tag)
1081 def data(self, data):
1082 events.append("data-" + data)
1083 def comment(self, text):
1084 events.append("comment-" + text)
1088 parser = self.etree.XMLParser(target=Target())
1090 parser.feed(_bytes('<!--a--><root>A<!--b--><sub/><!--c-->B</root><!--d-->'))
1091 done = parser.close()
1093 self.assertEqual("DONE", done)
1094 self.assertEqual(["comment-a", "start-root", "data-A", "comment-b",
1095 "start-sub", "end-sub", "comment-c", "data-B",
1096 "end-root", "comment-d"],
1099 def test_parser_target_pi(self):
1101 class Target(object):
1102 def start(self, tag, attrib):
1103 events.append("start-" + tag)
1105 events.append("end-" + tag)
1106 def data(self, data):
1107 events.append("data-" + data)
1108 def pi(self, target, data):
1109 events.append("pi-" + target + "-" + data)
1113 parser = self.etree.XMLParser(target=Target())
1115 parser.feed(_bytes('<?test a?><root>A<?test b?>B</root><?test c?>'))
1116 done = parser.close()
1118 self.assertEqual("DONE", done)
1119 self.assertEqual(["pi-test-a", "start-root", "data-A", "pi-test-b",
1120 "data-B", "end-root", "pi-test-c"],
1123 def test_parser_target_cdata(self):
1125 class Target(object):
1126 def start(self, tag, attrib):
1127 events.append("start-" + tag)
1129 events.append("end-" + tag)
1130 def data(self, data):
1131 events.append("data-" + data)
1135 parser = self.etree.XMLParser(target=Target(),
1138 parser.feed(_bytes('<root>A<a><![CDATA[ca]]></a>B</root>'))
1139 done = parser.close()
1141 self.assertEqual("DONE", done)
1142 self.assertEqual(["start-root", "data-A", "start-a",
1143 "data-ca", "end-a", "data-B", "end-root"],
1146 def test_parser_target_recover(self):
1148 class Target(object):
1149 def start(self, tag, attrib):
1150 events.append("start-" + tag)
1152 events.append("end-" + tag)
1153 def data(self, data):
1154 events.append("data-" + data)
1156 events.append("close")
1159 parser = self.etree.XMLParser(target=Target(),
1162 parser.feed(_bytes('<root>A<a>ca</a>B</not-root>'))
1163 done = parser.close()
1165 self.assertEqual("DONE", done)
1166 self.assertEqual(["start-root", "data-A", "start-a",
1167 "data-ca", "end-a", "data-B",
1168 "end-root", "close"],
1171 def test_iterwalk_tag(self):
1172 iterwalk = self.etree.iterwalk
1173 root = self.etree.XML(_bytes('<a><b><d/></b><c/></a>'))
1175 iterator = iterwalk(root, tag="b", events=('start', 'end'))
1176 events = list(iterator)
1178 [('start', root[0]), ('end', root[0])],
1181 def test_iterwalk_tag_all(self):
1182 iterwalk = self.etree.iterwalk
1183 root = self.etree.XML(_bytes('<a><b><d/></b><c/></a>'))
1185 iterator = iterwalk(root, tag="*", events=('start', 'end'))
1186 events = list(iterator)
1191 def test_iterwalk(self):
1192 iterwalk = self.etree.iterwalk
1193 root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1195 events = list(iterwalk(root))
1197 [('end', root[0]), ('end', root[1]), ('end', root)],
1200 def test_iterwalk_comments_root_element(self):
1201 iterwalk = self.etree.iterwalk
1202 root = self.etree.XML(
1203 b'<!--C0--><a><!--Ca--><b><!--Cb--></b><!--Cc--><c/></a><!--C99-->')
1205 iterator = iterwalk(root, events=('start', 'end', 'comment'))
1206 events = list(iterator)
1208 [('start', root), ('comment', root[0]),
1209 ('start', root[1]), ('comment', root[1][0]), ('end', root[1]),
1210 ('comment', root[2]), ('start', root[3]), ('end', root[3]),
1215 def test_iterwalk_comments_tree(self):
1216 iterwalk = self.etree.iterwalk
1217 root = self.etree.XML(
1218 b'<!--C0--><a><!--Ca--><b><!--Cb--></b><!--Cc--><c/></a><!--C99-->')
1220 iterator = iterwalk(self.etree.ElementTree(root), events=('start', 'end', 'comment'))
1221 events = list(iterator)
1223 [('comment', root.getprevious()),
1224 ('start', root), ('comment', root[0]), # <a>
1225 ('start', root[1]), ('comment', root[1][0]), ('end', root[1]), # <b>
1226 ('comment', root[2]), ('start', root[3]), ('end', root[3]), # <c>
1227 ('end', root), ('comment', root.getnext()),
1231 def test_iterwalk_pis_root_element(self):
1232 iterwalk = self.etree.iterwalk
1233 root = self.etree.XML(
1234 b'<?C0?><a><?Ca?><b><?Cb?></b><?Cc?><c/></a><?C99?>')
1236 iterator = iterwalk(root, events=('start', 'end', 'pi'))
1237 events = list(iterator)
1239 [('start', root), ('pi', root[0]),
1240 ('start', root[1]), ('pi', root[1][0]), ('end', root[1]),
1241 ('pi', root[2]), ('start', root[3]), ('end', root[3]),
1246 def test_iterwalk_pis_tree(self):
1247 iterwalk = self.etree.iterwalk
1248 root = self.etree.XML(
1249 b'<?C0?><a><?Ca?><b><?Cb?></b><?Cc?><c/></a><?C99?>')
1251 iterator = iterwalk(self.etree.ElementTree(root), events=('start', 'end', 'pi'))
1252 events = list(iterator)
1254 [('pi', root.getprevious()),
1255 ('start', root), ('pi', root[0]), # <a>
1256 ('start', root[1]), ('pi', root[1][0]), ('end', root[1]), # <b>
1257 ('pi', root[2]), ('start', root[3]), ('end', root[3]), # <c>
1258 ('end', root), ('pi', root.getnext()),
1262 def test_iterwalk_pis_comments_tree(self):
1263 iterwalk = self.etree.iterwalk
1264 root = self.etree.XML(
1265 b'<!--C0--><?C0?><!--C1--><a><?Ca?><b><!--Cb--></b><?Cc?><c/></a><!--C99--><?C99?>')
1267 iterator = iterwalk(self.etree.ElementTree(root), events=('start', 'end', 'pi', 'comment'))
1268 events = list(iterator)
1270 [('comment', root.getprevious().getprevious().getprevious()),
1271 ('pi', root.getprevious().getprevious()),
1272 ('comment', root.getprevious()),
1273 ('start', root), ('pi', root[0]), # <a>
1274 ('start', root[1]), ('comment', root[1][0]), ('end', root[1]), # <b>
1275 ('pi', root[2]), ('start', root[3]), ('end', root[3]), # <c>
1276 ('end', root), ('comment', root.getnext()), ('pi', root.getnext().getnext()),
1280 def test_iterwalk_pis_comments_tree_no_events(self):
1281 iterwalk = self.etree.iterwalk
1282 root = self.etree.XML(
1283 b'<!--C0--><?C0?><!--C1--><a><?Ca?><b><!--Cb--></b><?Cc?><c/></a><!--C99--><?C99?>')
1285 iterator = iterwalk(self.etree.ElementTree(root), events=('start', 'end'))
1286 events = list(iterator)
1288 [('start', root), # <a>
1289 ('start', root[1]), ('end', root[1]), # <b>
1290 ('start', root[3]), ('end', root[3]), # <c>
1295 def test_iterwalk_start(self):
1296 iterwalk = self.etree.iterwalk
1297 root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1299 iterator = iterwalk(root, events=('start',))
1300 events = list(iterator)
1302 [('start', root), ('start', root[0]), ('start', root[1])],
1305 def test_iterwalk_start_end(self):
1306 iterwalk = self.etree.iterwalk
1307 root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1309 iterator = iterwalk(root, events=('start','end'))
1310 events = list(iterator)
1312 [('start', root), ('start', root[0]), ('end', root[0]),
1313 ('start', root[1]), ('end', root[1]), ('end', root)],
1316 def test_iterwalk_start_tags(self):
1317 iterwalk = self.etree.iterwalk
1318 root = self.etree.XML(_bytes('<a><b></b><c/><b><d/></b></a>'))
1320 iterator = iterwalk(root, events=('start',), tag='b')
1321 events = list(iterator)
1323 [('start', root[0]), ('start', root[2])],
1326 def test_iterwalk_start_end_tags(self):
1327 iterwalk = self.etree.iterwalk
1328 root = self.etree.XML(_bytes('<a><b></b><c/><b><d/></b></a>'))
1330 iterator = iterwalk(root, events=('start', 'end'), tag='b')
1331 events = list(iterator)
1333 [('start', root[0]), ('end', root[0]), ('start', root[2]), ('end', root[2])],
1336 def test_iterwalk_start_end_tags_with_root(self):
1337 iterwalk = self.etree.iterwalk
1338 root = self.etree.XML(_bytes('<a><b></b><c/><b><d/></b></a>'))
1340 iterator = iterwalk(root, events=('start', 'end'), tag=('b', 'a'))
1341 events = list(iterator)
1344 ('start', root[0]), ('end', root[0]),
1345 ('start', root[2]), ('end', root[2]),
1350 def test_iterwalk_clear(self):
1351 iterwalk = self.etree.iterwalk
1352 root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1354 iterator = iterwalk(root)
1355 for event, elem in iterator:
1361 def test_iterwalk_attrib_ns(self):
1362 iterwalk = self.etree.iterwalk
1363 root = self.etree.XML(_bytes('<a xmlns="ns1"><b><c xmlns="ns2"/></b></a>'))
1365 attr_name = '{testns}bla'
1367 iterator = iterwalk(root, events=('start','end','start-ns','end-ns'))
1368 for event, elem in iterator:
1369 events.append(event)
1370 if event == 'start':
1371 if elem.tag != '{ns1}a':
1372 elem.set(attr_name, 'value')
1375 ['start-ns', 'start', 'start', 'start-ns', 'start',
1376 'end', 'end-ns', 'end', 'end', 'end-ns'],
1381 root.get(attr_name))
1384 root[0].get(attr_name))
1386 def test_iterwalk_end_skip(self):
1387 iterwalk = self.etree.iterwalk
1388 root = self.etree.XML(_bytes('<a><b><c/></b><d><e/></d></a>'))
1390 iterator = iterwalk(root)
1392 for event, elem in iterator:
1393 tags.append(elem.tag)
1394 # requesting a skip after an 'end' event should never have an effect
1395 iterator.skip_subtree()
1397 self.assertEqual(['c', 'b', 'e', 'd', 'a'], tags)
1399 def test_iterwalk_start_end_skip(self):
1400 iterwalk = self.etree.iterwalk
1401 root = self.etree.XML(_bytes('<a><b><c/></b><d><e/></d></a>'))
1403 iterator = iterwalk(root, events=('start', 'end'))
1405 for event, elem in iterator:
1406 tags.append((event, elem.tag))
1407 if elem.tag in ('b', 'e'):
1408 # skipping should only have an effect on 'start', not on 'end'
1409 iterator.skip_subtree()
1413 ('start', 'b'), ('end', 'b'), # ignored child 'c'
1415 ('start', 'e'), ('end', 'e'),
1420 def test_iterwalk_ns_skip(self):
1421 iterwalk = self.etree.iterwalk
1422 root = self.etree.XML(_bytes(
1423 '<a xmlns="ns1"><b xmlns="nsb"><c xmlns="ns2"/></b><d xmlns="ns2"><e/></d></a>'))
1426 iterator = iterwalk(root, events=('start','start-ns','end-ns'))
1427 for event, elem in iterator:
1428 if event in ('start-ns', 'end-ns'):
1429 events.append((event, elem))
1430 if event == 'start-ns' and elem == ('', 'nsb'):
1431 events.append('skip')
1432 iterator.skip_subtree()
1434 events.append((event, elem.tag))
1437 [('start-ns', ('', 'ns1')),
1438 ('start', '{ns1}a'),
1439 ('start-ns', ('', 'nsb')),
1441 ('start', '{nsb}b'),
1443 ('start-ns', ('', 'ns2')),
1444 ('start', '{ns2}d'),
1445 ('start', '{ns2}e'),
1451 def test_iterwalk_getiterator(self):
1452 iterwalk = self.etree.iterwalk
1453 root = self.etree.XML(_bytes('<a><b><d/></b><c/></a>'))
1456 for event, elem in iterwalk(root):
1457 counts.append(len(list(elem.getiterator())))
1462 def test_itertext_comment_pi(self):
1463 # https://bugs.launchpad.net/lxml/+bug/1844674
1464 XML = self.etree.XML
1466 "<root>RTEXT<a></a>ATAIL<b/><!-- COMMENT -->CTAIL<?PI PITEXT?> PITAIL </root>"
1469 text = list(root.itertext())
1470 self.assertEqual(["RTEXT", "ATAIL", "CTAIL", " PITAIL "],
1473 def test_resolve_string_dtd(self):
1474 parse = self.etree.parse
1475 parser = self.etree.XMLParser(dtd_validation=True)
1476 assertEqual = self.assertEqual
1477 test_url = _str("__nosuch.dtd")
1479 class MyResolver(self.etree.Resolver):
1480 def resolve(self, url, id, context):
1481 assertEqual(url, test_url)
1482 return self.resolve_string(
1483 _str('''<!ENTITY myentity "%s">
1484 <!ELEMENT doc ANY>''') % url, context)
1486 parser.resolvers.add(MyResolver())
1488 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1489 tree = parse(StringIO(xml), parser)
1490 root = tree.getroot()
1491 self.assertEqual(root.text, test_url)
1493 def test_resolve_bytes_dtd(self):
1494 parse = self.etree.parse
1495 parser = self.etree.XMLParser(dtd_validation=True)
1496 assertEqual = self.assertEqual
1497 test_url = _str("__nosuch.dtd")
1499 class MyResolver(self.etree.Resolver):
1500 def resolve(self, url, id, context):
1501 assertEqual(url, test_url)
1502 return self.resolve_string(
1503 (_str('''<!ENTITY myentity "%s">
1504 <!ELEMENT doc ANY>''') % url).encode('utf-8'),
1507 parser.resolvers.add(MyResolver())
1509 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1510 tree = parse(StringIO(xml), parser)
1511 root = tree.getroot()
1512 self.assertEqual(root.text, test_url)
1514 def test_resolve_filelike_dtd(self):
1515 parse = self.etree.parse
1516 parser = self.etree.XMLParser(dtd_validation=True)
1517 assertEqual = self.assertEqual
1518 test_url = _str("__nosuch.dtd")
1520 class MyResolver(self.etree.Resolver):
1521 def resolve(self, url, id, context):
1522 assertEqual(url, test_url)
1523 return self.resolve_file(
1525 _str('''<!ENTITY myentity "%s">
1526 <!ELEMENT doc ANY>''') % url), context)
1528 parser.resolvers.add(MyResolver())
1530 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1531 tree = parse(StringIO(xml), parser)
1532 root = tree.getroot()
1533 self.assertEqual(root.text, test_url)
1535 def test_resolve_filename_dtd(self):
1536 parse = self.etree.parse
1537 parser = self.etree.XMLParser(attribute_defaults=True)
1538 assertEqual = self.assertEqual
1539 test_url = _str("__nosuch.dtd")
1541 class MyResolver(self.etree.Resolver):
1542 def resolve(self, url, id, context):
1543 assertEqual(url, test_url)
1544 return self.resolve_filename(
1545 fileInTestDir('test.dtd'), context)
1547 parser.resolvers.add(MyResolver())
1549 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1550 tree = parse(StringIO(xml), parser)
1551 root = tree.getroot()
1553 root.attrib, {'default': 'valueA'})
1555 root[0].attrib, {'default': 'valueB'})
1557 def test_resolve_filename_dtd_relative(self):
1558 parse = self.etree.parse
1559 parser = self.etree.XMLParser(attribute_defaults=True)
1560 assertEqual = self.assertEqual
1561 test_url = _str("__nosuch.dtd")
1563 class MyResolver(self.etree.Resolver):
1564 def resolve(self, url, id, context):
1565 expected = fileUrlInTestDir(test_url)
1566 url = url.replace('file://', 'file:') # depends on libxml2 version
1567 expected = expected.replace('file://', 'file:')
1568 assertEqual(url, expected)
1569 return self.resolve_filename(
1570 fileUrlInTestDir('test.dtd'), context)
1572 parser.resolvers.add(MyResolver())
1574 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1575 tree = parse(StringIO(xml), parser,
1576 base_url=fileUrlInTestDir('__test.xml'))
1577 root = tree.getroot()
1579 root.attrib, {'default': 'valueA'})
1581 root[0].attrib, {'default': 'valueB'})
1583 def test_resolve_file_dtd(self):
1584 parse = self.etree.parse
1585 parser = self.etree.XMLParser(attribute_defaults=True)
1586 assertEqual = self.assertEqual
1587 test_url = _str("__nosuch.dtd")
1589 class MyResolver(self.etree.Resolver):
1590 def resolve(self, url, id, context):
1591 assertEqual(url, test_url)
1592 return self.resolve_file(
1593 open(fileInTestDir('test.dtd'), 'rb'), context)
1595 parser.resolvers.add(MyResolver())
1597 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1598 tree = parse(StringIO(xml), parser)
1599 root = tree.getroot()
1601 root.attrib, {'default': 'valueA'})
1603 root[0].attrib, {'default': 'valueB'})
1605 def test_resolve_empty(self):
1606 parse = self.etree.parse
1607 parser = self.etree.XMLParser(load_dtd=True)
1608 assertEqual = self.assertEqual
1609 test_url = _str("__nosuch.dtd")
1611 class check(object):
1614 class MyResolver(self.etree.Resolver):
1615 def resolve(self, url, id, context):
1616 assertEqual(url, test_url)
1617 check.resolved = True
1618 return self.resolve_empty(context)
1620 parser.resolvers.add(MyResolver())
1622 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1623 self.assertRaises(etree.XMLSyntaxError, parse, StringIO(xml), parser)
1624 self.assertTrue(check.resolved)
1626 def test_resolve_error(self):
1627 parse = self.etree.parse
1628 parser = self.etree.XMLParser(dtd_validation=True)
1630 class _LocalException(Exception):
1633 class MyResolver(self.etree.Resolver):
1634 def resolve(self, url, id, context):
1635 raise _LocalException
1637 parser.resolvers.add(MyResolver())
1639 xml = '<!DOCTYPE doc SYSTEM "test"><doc>&myentity;</doc>'
1640 self.assertRaises(_LocalException, parse, BytesIO(xml), parser)
1642 def test_entity_parse(self):
1643 parse = self.etree.parse
1644 tostring = self.etree.tostring
1645 parser = self.etree.XMLParser(resolve_entities=False)
1646 Entity = self.etree.Entity
1648 xml = _bytes('<!DOCTYPE doc SYSTEM "test"><doc>&myentity;</doc>')
1649 tree = parse(BytesIO(xml), parser)
1650 root = tree.getroot()
1651 self.assertEqual(root[0].tag, Entity)
1652 self.assertEqual(root[0].text, "&myentity;")
1653 self.assertEqual(root[0].tail, None)
1654 self.assertEqual(root[0].name, "myentity")
1656 self.assertEqual(_bytes('<doc>&myentity;</doc>'),
1659 def test_entity_restructure(self):
1660 xml = _bytes('''<!DOCTYPE root [ <!ENTITY nbsp " "> ]>
1664 <child3> </child3>
1667 parser = self.etree.XMLParser(resolve_entities=False)
1668 root = etree.fromstring(xml, parser)
1669 self.assertEqual([ el.tag for el in root ],
1670 ['child1', 'child2', 'child3'])
1673 self.assertEqual([ el.tag for el in root ],
1674 ['child3', 'child2'])
1675 self.assertEqual(root[0][0].text, ' ')
1676 self.assertEqual(root[0][0].name, 'nbsp')
1678 def test_entity_append(self):
1679 Entity = self.etree.Entity
1680 Element = self.etree.Element
1681 tostring = self.etree.tostring
1683 root = Element("root")
1684 root.append( Entity("test") )
1686 self.assertEqual(root[0].tag, Entity)
1687 self.assertEqual(root[0].text, "&test;")
1688 self.assertEqual(root[0].tail, None)
1689 self.assertEqual(root[0].name, "test")
1691 self.assertEqual(_bytes('<root>&test;</root>'),
1694 def test_entity_append_parsed(self):
1695 Entity = self.etree.Entity
1696 Element = self.etree.Element
1697 parser = self.etree.XMLParser(resolve_entities=False)
1698 entity = self.etree.XML('''<!DOCTYPE data [
1705 el = Element('test')
1707 self.assertEqual(el.tag, 'test')
1708 self.assertEqual(el[0].tag, 'data')
1709 self.assertEqual(el[0][0].tag, Entity)
1710 self.assertEqual(el[0][0].name, 'b')
1712 def test_entity_values(self):
1713 Entity = self.etree.Entity
1714 self.assertEqual(Entity("test").text, '&test;')
1715 self.assertEqual(Entity("#17683").text, '䔓')
1716 self.assertEqual(Entity("#x1768").text, 'ᝨ')
1717 self.assertEqual(Entity("#x98AF").text, '颯')
1719 def test_entity_error(self):
1720 Entity = self.etree.Entity
1721 self.assertRaises(ValueError, Entity, 'a b c')
1722 self.assertRaises(ValueError, Entity, 'a,b')
1723 self.assertRaises(ValueError, Entity, 'a\0b')
1724 self.assertRaises(ValueError, Entity, '#abc')
1725 self.assertRaises(ValueError, Entity, '#xxyz')
1727 def test_cdata(self):
1728 CDATA = self.etree.CDATA
1729 Element = self.etree.Element
1730 tostring = self.etree.tostring
1732 root = Element("root")
1733 root.text = CDATA('test')
1735 self.assertEqual('test',
1737 self.assertEqual(_bytes('<root><![CDATA[test]]></root>'),
1740 def test_cdata_tail(self):
1741 CDATA = self.etree.CDATA
1742 Element = self.etree.Element
1743 SubElement = self.etree.SubElement
1744 tostring = self.etree.tostring
1746 root = Element("root")
1747 child = SubElement(root, 'child')
1748 child.tail = CDATA('test')
1750 self.assertEqual('test', child.tail)
1751 self.assertEqual(_bytes('<root><child/><![CDATA[test]]></root>'),
1754 root = Element("root")
1755 root.tail = CDATA('test')
1757 self.assertEqual('test', root.tail)
1758 self.assertEqual(_bytes('<root/><![CDATA[test]]>'),
1761 def test_cdata_type(self):
1762 CDATA = self.etree.CDATA
1763 Element = self.etree.Element
1764 root = Element("root")
1766 root.text = CDATA("test")
1767 self.assertEqual('test', root.text)
1769 root.text = CDATA(_str("test"))
1770 self.assertEqual('test', root.text)
1772 self.assertRaises(TypeError, CDATA, 1)
1774 def test_cdata_errors(self):
1775 CDATA = self.etree.CDATA
1776 Element = self.etree.Element
1778 root = Element("root")
1779 cdata = CDATA('test')
1781 self.assertRaises(TypeError,
1782 root.set, 'attr', cdata)
1783 self.assertRaises(TypeError,
1784 operator.setitem, root.attrib, 'attr', cdata)
1786 def test_cdata_parser(self):
1787 tostring = self.etree.tostring
1788 parser = self.etree.XMLParser(strip_cdata=False)
1789 root = self.etree.XML(_bytes('<root><![CDATA[test]]></root>'), parser)
1791 self.assertEqual('test', root.text)
1792 self.assertEqual(_bytes('<root><![CDATA[test]]></root>'),
1795 def test_cdata_xpath(self):
1796 tostring = self.etree.tostring
1797 parser = self.etree.XMLParser(strip_cdata=False)
1798 root = self.etree.XML(_bytes('<root><![CDATA[test]]></root>'), parser)
1799 self.assertEqual(_bytes('<root><![CDATA[test]]></root>'),
1802 self.assertEqual(['test'], root.xpath('//text()'))
1804 # TypeError in etree, AssertionError in ElementTree;
1805 def test_setitem_assert(self):
1806 Element = self.etree.Element
1807 SubElement = self.etree.SubElement
1810 b = SubElement(a, 'b')
1812 self.assertRaises(TypeError,
1813 a.__setitem__, 0, 'foo')
1815 def test_append_error(self):
1816 Element = self.etree.Element
1817 root = Element('root')
1818 # raises AssertionError in ElementTree
1819 self.assertRaises(TypeError, root.append, None)
1820 self.assertRaises(TypeError, root.extend, [None])
1821 self.assertRaises(TypeError, root.extend, [Element('one'), None])
1822 self.assertEqual('one', root[0].tag)
1824 def test_append_recursive_error(self):
1825 Element = self.etree.Element
1826 SubElement = self.etree.SubElement
1827 root = Element('root')
1828 self.assertRaises(ValueError, root.append, root)
1829 child = SubElement(root, 'child')
1830 self.assertRaises(ValueError, child.append, root)
1831 child2 = SubElement(child, 'child2')
1832 self.assertRaises(ValueError, child2.append, root)
1833 self.assertRaises(ValueError, child2.append, child)
1834 self.assertEqual('child2', root[0][0].tag)
1836 def test_addnext(self):
1837 Element = self.etree.Element
1838 SubElement = self.etree.SubElement
1839 root = Element('root')
1840 SubElement(root, 'a')
1841 SubElement(root, 'b')
1843 self.assertEqual(['a', 'b'],
1844 [c.tag for c in root])
1845 root[1].addnext(root[0])
1846 self.assertEqual(['b', 'a'],
1847 [c.tag for c in root])
1849 def test_addprevious(self):
1850 Element = self.etree.Element
1851 SubElement = self.etree.SubElement
1852 root = Element('root')
1853 SubElement(root, 'a')
1854 SubElement(root, 'b')
1856 self.assertEqual(['a', 'b'],
1857 [c.tag for c in root])
1858 root[0].addprevious(root[1])
1859 self.assertEqual(['b', 'a'],
1860 [c.tag for c in root])
1862 def test_addnext_cycle(self):
1863 Element = self.etree.Element
1864 SubElement = self.etree.SubElement
1865 root = Element('root')
1866 a = SubElement(root, 'a')
1867 b = SubElement(a, 'b')
1868 # appending parent as sibling is forbidden
1869 self.assertRaises(ValueError, b.addnext, a)
1870 self.assertEqual(['a'], [c.tag for c in root])
1871 self.assertEqual(['b'], [c.tag for c in a])
1873 def test_addprevious_cycle(self):
1874 Element = self.etree.Element
1875 SubElement = self.etree.SubElement
1876 root = Element('root')
1877 a = SubElement(root, 'a')
1878 b = SubElement(a, 'b')
1879 # appending parent as sibling is forbidden
1880 self.assertRaises(ValueError, b.addprevious, a)
1881 self.assertEqual(['a'], [c.tag for c in root])
1882 self.assertEqual(['b'], [c.tag for c in a])
1884 def test_addnext_cycle_long(self):
1885 Element = self.etree.Element
1886 SubElement = self.etree.SubElement
1887 root = Element('root')
1888 a = SubElement(root, 'a')
1889 b = SubElement(a, 'b')
1890 c = SubElement(b, 'c')
1891 # appending parent as sibling is forbidden
1892 self.assertRaises(ValueError, c.addnext, a)
1894 def test_addprevious_cycle_long(self):
1895 Element = self.etree.Element
1896 SubElement = self.etree.SubElement
1897 root = Element('root')
1898 a = SubElement(root, 'a')
1899 b = SubElement(a, 'b')
1900 c = SubElement(b, 'c')
1901 # appending parent as sibling is forbidden
1902 self.assertRaises(ValueError, c.addprevious, a)
1904 def test_addprevious_noops(self):
1905 Element = self.etree.Element
1906 SubElement = self.etree.SubElement
1907 root = Element('root')
1908 a = SubElement(root, 'a')
1909 b = SubElement(root, 'b')
1911 self.assertEqual('a', root[0].tag)
1912 self.assertEqual('b', root[1].tag)
1914 self.assertEqual('a', root[0].tag)
1915 self.assertEqual('b', root[1].tag)
1917 self.assertEqual('a', root[0].tag)
1918 self.assertEqual('b', root[1].tag)
1920 def test_addnext_noops(self):
1921 Element = self.etree.Element
1922 SubElement = self.etree.SubElement
1923 root = Element('root')
1924 a = SubElement(root, 'a')
1925 b = SubElement(root, 'b')
1927 self.assertEqual('a', root[0].tag)
1928 self.assertEqual('b', root[1].tag)
1930 self.assertEqual('a', root[0].tag)
1931 self.assertEqual('b', root[1].tag)
1933 self.assertEqual('a', root[0].tag)
1934 self.assertEqual('b', root[1].tag)
1936 def test_addnext_root(self):
1937 Element = self.etree.Element
1940 self.assertRaises(TypeError, a.addnext, b)
1942 def test_addprevious_pi(self):
1943 Element = self.etree.Element
1944 SubElement = self.etree.SubElement
1946 root = Element('root')
1947 SubElement(root, 'a')
1948 pi = PI('TARGET', 'TEXT')
1951 self.assertEqual(_bytes('<root><a></a></root>'),
1952 self._writeElement(root))
1953 root[0].addprevious(pi)
1954 self.assertEqual(_bytes('<root><?TARGET TEXT?>TAIL<a></a></root>'),
1955 self._writeElement(root))
1957 def test_addprevious_root_pi(self):
1958 Element = self.etree.Element
1960 root = Element('root')
1961 pi = PI('TARGET', 'TEXT')
1964 self.assertEqual(_bytes('<root></root>'),
1965 self._writeElement(root))
1966 root.addprevious(pi)
1967 self.assertEqual(_bytes('<?TARGET TEXT?>\n<root></root>'),
1968 self._writeElement(root))
1970 def test_addnext_pi(self):
1971 Element = self.etree.Element
1972 SubElement = self.etree.SubElement
1974 root = Element('root')
1975 SubElement(root, 'a')
1976 pi = PI('TARGET', 'TEXT')
1979 self.assertEqual(_bytes('<root><a></a></root>'),
1980 self._writeElement(root))
1982 self.assertEqual(_bytes('<root><a></a><?TARGET TEXT?>TAIL</root>'),
1983 self._writeElement(root))
1985 def test_addnext_root_pi(self):
1986 Element = self.etree.Element
1988 root = Element('root')
1989 pi = PI('TARGET', 'TEXT')
1992 self.assertEqual(_bytes('<root></root>'),
1993 self._writeElement(root))
1995 self.assertEqual(_bytes('<root></root>\n<?TARGET TEXT?>'),
1996 self._writeElement(root))
1998 def test_addnext_comment(self):
1999 Element = self.etree.Element
2000 SubElement = self.etree.SubElement
2001 Comment = self.etree.Comment
2002 root = Element('root')
2003 SubElement(root, 'a')
2004 comment = Comment('TEXT ')
2005 comment.tail = "TAIL"
2007 self.assertEqual(_bytes('<root><a></a></root>'),
2008 self._writeElement(root))
2009 root[0].addnext(comment)
2010 self.assertEqual(_bytes('<root><a></a><!--TEXT -->TAIL</root>'),
2011 self._writeElement(root))
2013 def test_addnext_root_comment(self):
2014 Element = self.etree.Element
2015 Comment = self.etree.Comment
2016 root = Element('root')
2017 comment = Comment('TEXT ')
2018 comment.tail = "TAIL"
2020 self.assertEqual(_bytes('<root></root>'),
2021 self._writeElement(root))
2022 root.addnext(comment)
2023 self.assertEqual(_bytes('<root></root>\n<!--TEXT -->'),
2024 self._writeElement(root))
2026 def test_addprevious_comment(self):
2027 Element = self.etree.Element
2028 SubElement = self.etree.SubElement
2029 Comment = self.etree.Comment
2030 root = Element('root')
2031 SubElement(root, 'a')
2032 comment = Comment('TEXT ')
2033 comment.tail = "TAIL"
2035 self.assertEqual(_bytes('<root><a></a></root>'),
2036 self._writeElement(root))
2037 root[0].addprevious(comment)
2038 self.assertEqual(_bytes('<root><!--TEXT -->TAIL<a></a></root>'),
2039 self._writeElement(root))
2041 def test_addprevious_root_comment(self):
2042 Element = self.etree.Element
2043 Comment = self.etree.Comment
2044 root = Element('root')
2045 comment = Comment('TEXT ')
2046 comment.tail = "TAIL"
2048 self.assertEqual(_bytes('<root></root>'),
2049 self._writeElement(root))
2050 root.addprevious(comment)
2051 self.assertEqual(_bytes('<!--TEXT -->\n<root></root>'),
2052 self._writeElement(root))
2054 # ET's Elements have items() and key(), but not values()
2055 def test_attribute_values(self):
2056 XML = self.etree.XML
2058 root = XML(_bytes('<doc alpha="Alpha" beta="Beta" gamma="Gamma"/>'))
2059 values = root.values()
2061 self.assertEqual(['Alpha', 'Beta', 'Gamma'], values)
2063 # gives error in ElementTree
2064 def test_comment_empty(self):
2065 Element = self.etree.Element
2066 Comment = self.etree.Comment
2071 _bytes('<a><!----></a>'),
2072 self._writeElement(a))
2074 # ElementTree ignores comments
2075 def test_comment_parse_empty(self):
2076 ElementTree = self.etree.ElementTree
2077 tostring = self.etree.tostring
2079 xml = _bytes('<a><b/><!----><c/></a>')
2081 doc = ElementTree(file=f)
2090 # ElementTree ignores comments
2091 def test_comment_no_proxy_yet(self):
2092 ElementTree = self.etree.ElementTree
2094 f = BytesIO('<a><b></b><!-- hoi --><c></c></a>')
2095 doc = ElementTree(file=f)
2101 # does not raise an exception in ElementTree
2102 def test_comment_immutable(self):
2103 Element = self.etree.Element
2104 Comment = self.etree.Comment
2107 el = Element('myel')
2109 self.assertRaises(TypeError, c.append, el)
2110 self.assertRaises(TypeError, c.insert, 0, el)
2111 self.assertRaises(TypeError, c.set, "myattr", "test")
2113 def test_comment_immutable_attrib(self):
2114 c = self.etree.Comment()
2115 self.assertEqual(0, len(c.attrib))
2117 self.assertFalse(c.attrib.__contains__('nope'))
2118 self.assertFalse('nope' in c.attrib)
2119 self.assertFalse('nope' in c.attrib.keys())
2120 self.assertFalse('nope' in c.attrib.values())
2121 self.assertFalse(('nope', 'huhu') in c.attrib.items())
2123 self.assertEqual([], list(c.attrib))
2124 self.assertEqual([], list(c.attrib.keys()))
2125 self.assertEqual([], list(c.attrib.items()))
2126 self.assertEqual([], list(c.attrib.values()))
2127 self.assertEqual([], list(c.attrib.iterkeys()))
2128 self.assertEqual([], list(c.attrib.iteritems()))
2129 self.assertEqual([], list(c.attrib.itervalues()))
2131 self.assertEqual('HUHU', c.attrib.pop('nope', 'HUHU'))
2132 self.assertRaises(KeyError, c.attrib.pop, 'nope')
2134 self.assertRaises(KeyError, c.attrib.__getitem__, 'only')
2135 self.assertRaises(KeyError, c.attrib.__getitem__, 'names')
2136 self.assertRaises(KeyError, c.attrib.__getitem__, 'nope')
2137 self.assertRaises(KeyError, c.attrib.__setitem__, 'nope', 'yep')
2138 self.assertRaises(KeyError, c.attrib.__delitem__, 'nope')
2140 # test passing 'None' to dump()
2141 def test_dump_none(self):
2142 self.assertRaises(TypeError, self.etree.dump, None)
2144 def test_prefix(self):
2145 ElementTree = self.etree.ElementTree
2147 f = BytesIO('<a xmlns:foo="http://www.infrae.com/ns/1"><foo:b/></a>')
2148 doc = ElementTree(file=f)
2157 def test_prefix_default_ns(self):
2158 ElementTree = self.etree.ElementTree
2160 f = BytesIO('<a xmlns="http://www.infrae.com/ns/1"><b/></a>')
2161 doc = ElementTree(file=f)
2170 def test_getparent(self):
2171 Element = self.etree.Element
2172 SubElement = self.etree.SubElement
2175 b = SubElement(a, 'b')
2176 c = SubElement(a, 'c')
2177 d = SubElement(b, 'd')
2191 def test_iterchildren(self):
2192 XML = self.etree.XML
2194 root = XML(_bytes('<doc><one/><two>Two</two>Hm<three/></doc>'))
2196 for el in root.iterchildren():
2197 result.append(el.tag)
2198 self.assertEqual(['one', 'two', 'three'], result)
2200 def test_iterchildren_reversed(self):
2201 XML = self.etree.XML
2203 root = XML(_bytes('<doc><one/><two>Two</two>Hm<three/></doc>'))
2205 for el in root.iterchildren(reversed=True):
2206 result.append(el.tag)
2207 self.assertEqual(['three', 'two', 'one'], result)
2209 def test_iterchildren_tag(self):
2210 XML = self.etree.XML
2212 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two></doc>'))
2214 for el in root.iterchildren(tag='two'):
2215 result.append(el.text)
2216 self.assertEqual(['Two', 'Bla'], result)
2218 def test_iterchildren_tag_posarg(self):
2219 XML = self.etree.XML
2221 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two></doc>'))
2223 for el in root.iterchildren('two'):
2224 result.append(el.text)
2225 self.assertEqual(['Two', 'Bla'], result)
2227 def test_iterchildren_tag_reversed(self):
2228 XML = self.etree.XML
2230 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two></doc>'))
2232 for el in root.iterchildren(reversed=True, tag='two'):
2233 result.append(el.text)
2234 self.assertEqual(['Bla', 'Two'], result)
2236 def test_iterchildren_tag_multiple(self):
2237 XML = self.etree.XML
2239 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
2241 for el in root.iterchildren(tag=['two', 'three']):
2242 result.append(el.text)
2243 self.assertEqual(['Two', 'Bla', None], result)
2245 def test_iterchildren_tag_multiple_posarg(self):
2246 XML = self.etree.XML
2248 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
2250 for el in root.iterchildren('two', 'three'):
2251 result.append(el.text)
2252 self.assertEqual(['Two', 'Bla', None], result)
2254 def test_iterchildren_tag_multiple_reversed(self):
2255 XML = self.etree.XML
2257 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
2259 for el in root.iterchildren(reversed=True, tag=['two', 'three']):
2260 result.append(el.text)
2261 self.assertEqual([None, 'Bla', 'Two'], result)
2263 def test_iterancestors(self):
2264 Element = self.etree.Element
2265 SubElement = self.etree.SubElement
2268 b = SubElement(a, 'b')
2269 c = SubElement(a, 'c')
2270 d = SubElement(b, 'd')
2273 list(a.iterancestors()))
2276 list(b.iterancestors()))
2279 list(c.iterancestors()))
2282 list(d.iterancestors()))
2284 def test_iterancestors_tag(self):
2285 Element = self.etree.Element
2286 SubElement = self.etree.SubElement
2289 b = SubElement(a, 'b')
2290 c = SubElement(a, 'c')
2291 d = SubElement(b, 'd')
2294 list(d.iterancestors('a')))
2297 list(d.iterancestors(tag='a')))
2301 list(d.iterancestors('*')))
2304 list(d.iterancestors(tag='*')))
2306 def test_iterancestors_tag_multiple(self):
2307 Element = self.etree.Element
2308 SubElement = self.etree.SubElement
2311 b = SubElement(a, 'b')
2312 c = SubElement(a, 'c')
2313 d = SubElement(b, 'd')
2316 list(d.iterancestors(tag=('a', 'b'))))
2319 list(d.iterancestors('a', 'b')))
2323 list(d.iterancestors(tag=('w', 'x', 'y', 'z'))))
2326 list(d.iterancestors('w', 'x', 'y', 'z')))
2330 list(d.iterancestors(tag=('d', 'x'))))
2333 list(d.iterancestors('d', 'x')))
2337 list(d.iterancestors(tag=('b', '*'))))
2340 list(d.iterancestors('b', '*')))
2344 list(d.iterancestors(tag=('b', 'c'))))
2347 list(d.iterancestors('b', 'c')))
2349 def test_iterdescendants(self):
2350 Element = self.etree.Element
2351 SubElement = self.etree.SubElement
2354 b = SubElement(a, 'b')
2355 c = SubElement(a, 'c')
2356 d = SubElement(b, 'd')
2357 e = SubElement(c, 'e')
2361 list(a.iterdescendants()))
2364 list(d.iterdescendants()))
2366 def test_iterdescendants_tag(self):
2367 Element = self.etree.Element
2368 SubElement = self.etree.SubElement
2371 b = SubElement(a, 'b')
2372 c = SubElement(a, 'c')
2373 d = SubElement(b, 'd')
2374 e = SubElement(c, 'e')
2378 list(a.iterdescendants('a')))
2381 list(a.iterdescendants(tag='a')))
2383 a2 = SubElement(e, 'a')
2386 list(a.iterdescendants('a')))
2390 list(c.iterdescendants('a')))
2393 list(c.iterdescendants(tag='a')))
2395 def test_iterdescendants_tag_multiple(self):
2396 Element = self.etree.Element
2397 SubElement = self.etree.SubElement
2400 b = SubElement(a, 'b')
2401 c = SubElement(a, 'c')
2402 d = SubElement(b, 'd')
2403 e = SubElement(c, 'e')
2407 list(a.iterdescendants(tag=('a', 'b', 'e'))))
2410 list(a.iterdescendants('a', 'b', 'e')))
2412 a2 = SubElement(e, 'a')
2415 list(a.iterdescendants(tag=('a', 'b'))))
2418 list(a.iterdescendants('a', 'b')))
2422 list(c.iterdescendants(tag=('x', 'y', 'z'))))
2425 list(c.iterdescendants('x', 'y', 'z')))
2429 list(a.iterdescendants(tag=('x', 'y', 'z', '*'))))
2432 list(a.iterdescendants('x', 'y', 'z', '*')))
2434 def test_getroottree(self):
2435 Element = self.etree.Element
2436 SubElement = self.etree.SubElement
2439 b = SubElement(a, 'b')
2440 c = SubElement(a, 'c')
2441 d = SubElement(b, 'd')
2444 a.getroottree().getroot())
2447 b.getroottree().getroot())
2450 d.getroottree().getroot())
2452 def test_getnext(self):
2453 Element = self.etree.Element
2454 SubElement = self.etree.SubElement
2457 b = SubElement(a, 'b')
2458 c = SubElement(a, 'c')
2469 def test_getprevious(self):
2470 Element = self.etree.Element
2471 SubElement = self.etree.SubElement
2474 b = SubElement(a, 'b')
2475 c = SubElement(a, 'c')
2476 d = SubElement(b, 'd')
2487 def test_itersiblings(self):
2488 Element = self.etree.Element
2489 SubElement = self.etree.SubElement
2492 b = SubElement(a, 'b')
2493 c = SubElement(a, 'c')
2494 d = SubElement(b, 'd')
2497 list(a.itersiblings()))
2500 list(b.itersiblings()))
2503 list(c.itersiblings()))
2506 list(c.itersiblings(preceding=True)))
2509 list(b.itersiblings(preceding=True)))
2511 def test_itersiblings_tag(self):
2512 Element = self.etree.Element
2513 SubElement = self.etree.SubElement
2516 b = SubElement(a, 'b')
2517 c = SubElement(a, 'c')
2518 d = SubElement(b, 'd')
2521 list(a.itersiblings(tag='XXX')))
2524 list(b.itersiblings(tag='c')))
2527 list(b.itersiblings(tag='*')))
2530 list(c.itersiblings(preceding=True, tag='b')))
2533 list(c.itersiblings(preceding=True, tag='c')))
2535 def test_itersiblings_tag_multiple(self):
2536 Element = self.etree.Element
2537 SubElement = self.etree.SubElement
2540 b = SubElement(a, 'b')
2541 c = SubElement(a, 'c')
2542 d = SubElement(b, 'd')
2543 e = SubElement(a, 'e')
2546 list(a.itersiblings(tag=('XXX', 'YYY'))))
2549 list(b.itersiblings(tag=('c', 'd', 'e'))))
2552 list(c.itersiblings(preceding=True, tag=('b', 'b', 'c', 'd'))))
2555 list(e.itersiblings(preceding=True, tag=('c', '*'))))
2557 def test_parseid(self):
2558 parseid = self.etree.parseid
2559 XML = self.etree.XML
2560 xml_text = _bytes('''
2561 <!DOCTYPE document [
2562 <!ELEMENT document (h1,p)*>
2563 <!ELEMENT h1 (#PCDATA)>
2564 <!ATTLIST h1 myid ID #REQUIRED>
2565 <!ELEMENT p (#PCDATA)>
2566 <!ATTLIST p someid ID #REQUIRED>
2569 <h1 myid="chapter1">...</h1>
2570 <p id="note1" class="note">...</p>
2571 <p>Regular paragraph.</p>
2572 <p xml:id="xmlid">XML:ID paragraph.</p>
2573 <p someid="warn1" class="warning">...</p>
2577 tree, dic = parseid(BytesIO(xml_text))
2578 root = tree.getroot()
2579 root2 = XML(xml_text)
2580 self.assertEqual(self._writeElement(root),
2581 self._writeElement(root2))
2583 "chapter1" : root[0],
2587 self.assertTrue("chapter1" in dic)
2588 self.assertTrue("warn1" in dic)
2589 self.assertTrue("xmlid" in dic)
2590 self._checkIDDict(dic, expected)
2592 def test_XMLDTDID(self):
2593 XMLDTDID = self.etree.XMLDTDID
2594 XML = self.etree.XML
2595 xml_text = _bytes('''
2596 <!DOCTYPE document [
2597 <!ELEMENT document (h1,p)*>
2598 <!ELEMENT h1 (#PCDATA)>
2599 <!ATTLIST h1 myid ID #REQUIRED>
2600 <!ELEMENT p (#PCDATA)>
2601 <!ATTLIST p someid ID #REQUIRED>
2604 <h1 myid="chapter1">...</h1>
2605 <p id="note1" class="note">...</p>
2606 <p>Regular paragraph.</p>
2607 <p xml:id="xmlid">XML:ID paragraph.</p>
2608 <p someid="warn1" class="warning">...</p>
2612 root, dic = XMLDTDID(xml_text)
2613 root2 = XML(xml_text)
2614 self.assertEqual(self._writeElement(root),
2615 self._writeElement(root2))
2617 "chapter1" : root[0],
2621 self.assertTrue("chapter1" in dic)
2622 self.assertTrue("warn1" in dic)
2623 self.assertTrue("xmlid" in dic)
2624 self._checkIDDict(dic, expected)
2626 def test_XMLDTDID_empty(self):
2627 XMLDTDID = self.etree.XMLDTDID
2628 XML = self.etree.XML
2629 xml_text = _bytes('''
2631 <h1 myid="chapter1">...</h1>
2632 <p id="note1" class="note">...</p>
2633 <p>Regular paragraph.</p>
2634 <p someid="warn1" class="warning">...</p>
2638 root, dic = XMLDTDID(xml_text)
2639 root2 = XML(xml_text)
2640 self.assertEqual(self._writeElement(root),
2641 self._writeElement(root2))
2643 self._checkIDDict(dic, expected)
2645 def test_XMLDTDID_no_id_dict(self):
2646 XMLDTDID = self.etree.XMLDTDID
2647 XML = self.etree.XML
2648 xml_text = _bytes('''
2649 <!DOCTYPE document [
2650 <!ELEMENT document (h1,p)*>
2651 <!ELEMENT h1 (#PCDATA)>
2652 <!ATTLIST h1 myid ID #REQUIRED>
2653 <!ELEMENT p (#PCDATA)>
2654 <!ATTLIST p someid ID #REQUIRED>
2657 <h1 myid="chapter1">...</h1>
2658 <p id="note1" class="note">...</p>
2659 <p>Regular paragraph.</p>
2660 <p xml:id="xmlid">XML:ID paragraph.</p>
2661 <p someid="warn1" class="warning">...</p>
2665 parser = etree.XMLParser(collect_ids=False)
2666 root, dic = XMLDTDID(xml_text, parser=parser)
2667 root2 = XML(xml_text)
2668 self.assertEqual(self._writeElement(root),
2669 self._writeElement(root2))
2670 self.assertFalse(dic)
2671 self._checkIDDict(dic, {})
2673 def _checkIDDict(self, dic, expected):
2674 self.assertEqual(len(dic),
2676 self.assertEqual(sorted(dic.items()),
2677 sorted(expected.items()))
2678 if sys.version_info < (3,):
2679 self.assertEqual(sorted(dic.iteritems()),
2680 sorted(expected.iteritems()))
2681 self.assertEqual(sorted(dic.keys()),
2682 sorted(expected.keys()))
2683 if sys.version_info < (3,):
2684 self.assertEqual(sorted(dic.iterkeys()),
2685 sorted(expected.iterkeys()))
2686 if sys.version_info < (3,):
2687 self.assertEqual(sorted(dic.values()),
2688 sorted(expected.values()))
2689 self.assertEqual(sorted(dic.itervalues()),
2690 sorted(expected.itervalues()))
2692 def test_register_namespace_xml(self):
2693 self.assertRaises(ValueError, self.etree.register_namespace,
2694 "XML", "http://www.w3.org/XML/1998/namespace")
2695 self.assertRaises(ValueError, self.etree.register_namespace,
2696 "xml", "http://www.w3.org/XML/2345")
2697 self.etree.register_namespace("xml", "http://www.w3.org/XML/1998/namespace") # ok
2699 def test_namespaces(self):
2702 r = {'foo': 'http://ns.infrae.com/foo'}
2703 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2708 _bytes('<foo:bar xmlns:foo="http://ns.infrae.com/foo"></foo:bar>'),
2709 self._writeElement(e))
2711 def test_namespaces_default(self):
2714 r = {None: 'http://ns.infrae.com/foo'}
2715 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2720 '{http://ns.infrae.com/foo}bar',
2723 _bytes('<bar xmlns="http://ns.infrae.com/foo"></bar>'),
2724 self._writeElement(e))
2726 def test_namespaces_default_and_other(self):
2729 r = {None: 'http://ns.infrae.com/foo', 'p': 'http://test/'}
2730 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2731 self.assertEqual(None, e.prefix)
2732 self.assertEqual('{http://ns.infrae.com/foo}bar', e.tag)
2734 _bytes('<bar xmlns="http://ns.infrae.com/foo" xmlns:p="http://test/"></bar>'),
2735 self._writeElement(e))
2737 def test_namespaces_default_and_attr(self):
2740 r = {None: 'http://ns.infrae.com/foo',
2741 'hoi': 'http://ns.infrae.com/hoi'}
2742 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2743 e.set('{http://ns.infrae.com/hoi}test', 'value')
2745 _bytes('<bar xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi" hoi:test="value"></bar>'),
2746 self._writeElement(e))
2748 def test_attribute_keeps_namespace_prefix_on_merge(self):
2751 root = etree.Element('{http://test/ns}root',
2752 nsmap={None: 'http://test/ns'})
2753 sub = etree.Element('{http://test/ns}sub',
2754 nsmap={'test': 'http://test/ns'})
2756 sub.attrib['{http://test/ns}attr'] = 'value'
2757 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2759 _bytes('<test:sub xmlns:test="http://test/ns" test:attr="value"/>'),
2760 etree.tostring(sub))
2764 _bytes('<root xmlns="http://test/ns">'
2765 '<sub xmlns:test="http://test/ns" test:attr="value"/>'
2767 etree.tostring(root))
2769 def test_attribute_keeps_namespace_prefix_on_merge_with_nons(self):
2772 root = etree.Element('root')
2773 sub = etree.Element('{http://test/ns}sub',
2774 nsmap={'test': 'http://test/ns'})
2776 sub.attrib['{http://test/ns}attr'] = 'value'
2777 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2779 _bytes('<test:sub xmlns:test="http://test/ns" test:attr="value"/>'),
2780 etree.tostring(sub))
2785 '<test:sub xmlns:test="http://test/ns" test:attr="value"/>'
2787 etree.tostring(root))
2789 def test_attribute_gets_namespace_prefix_on_merge_with_nons(self):
2792 root = etree.Element('root')
2793 sub = etree.Element('{http://test/ns}sub',
2794 nsmap={None: 'http://test/ns'})
2796 sub.attrib['{http://test/ns}attr'] = 'value'
2797 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2799 _bytes('<sub xmlns="http://test/ns" '
2800 'xmlns:ns0="http://test/ns" ns0:attr="value"/>'),
2801 etree.tostring(sub))
2806 '<sub xmlns="http://test/ns"'
2807 ' xmlns:ns0="http://test/ns" ns0:attr="value"/>'
2809 etree.tostring(root))
2811 def test_attribute_gets_namespace_prefix_on_merge(self):
2814 root = etree.Element('{http://test/ns}root',
2815 nsmap={'test': 'http://test/ns',
2816 None: 'http://test/ns'})
2817 sub = etree.Element('{http://test/ns}sub',
2818 nsmap={None: 'http://test/ns'})
2820 sub.attrib['{http://test/ns}attr'] = 'value'
2821 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2823 _bytes('<sub xmlns="http://test/ns" '
2824 'xmlns:ns0="http://test/ns" ns0:attr="value"/>'),
2825 etree.tostring(sub))
2829 _bytes('<test:root xmlns:test="http://test/ns" xmlns="http://test/ns">'
2830 '<test:sub test:attr="value"/>'
2832 etree.tostring(root))
2834 def test_namespaces_elementtree(self):
2836 r = {None: 'http://ns.infrae.com/foo',
2837 'hoi': 'http://ns.infrae.com/hoi'}
2838 e = etree.Element('{http://ns.infrae.com/foo}z', nsmap=r)
2839 tree = etree.ElementTree(element=e)
2840 etree.SubElement(e, '{http://ns.infrae.com/hoi}x')
2842 _bytes('<z xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi"><hoi:x></hoi:x></z>'),
2843 self._writeElement(e))
2845 def test_namespaces_default_copy_element(self):
2848 r = {None: 'http://ns.infrae.com/foo'}
2849 e1 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2850 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2861 '{http://ns.infrae.com/foo}bar',
2864 '{http://ns.infrae.com/foo}bar',
2867 def test_namespaces_copy_element(self):
2870 r = {None: 'http://ns.infrae.com/BAR'}
2871 e1 = etree.Element('{http://ns.infrae.com/BAR}bar', nsmap=r)
2872 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2879 self.assertNotEqual(
2883 '{http://ns.infrae.com/BAR}bar',
2886 '{http://ns.infrae.com/foo}bar',
2889 def test_namespaces_reuse_after_move(self):
2890 ns_href = "http://a.b.c"
2891 one = self.etree.fromstring(
2892 _bytes('<foo><bar xmlns:ns="%s"><ns:baz/></bar></foo>' % ns_href))
2895 two = self.etree.fromstring(
2896 _bytes('<root xmlns:ns="%s"/>' % ns_href))
2898 del one # make sure the source document is deallocated
2900 self.assertEqual('{%s}baz' % ns_href, baz.tag)
2902 _bytes('<root xmlns:ns="%s"><ns:baz/></root>' % ns_href),
2903 self.etree.tostring(two))
2905 def test_namespace_cleanup(self):
2907 '<foo xmlns="F" xmlns:x="x">'
2908 '<bar xmlns:ns="NS" xmlns:b="b" xmlns="B">'
2912 root = self.etree.fromstring(xml)
2913 self.assertEqual(xml, self.etree.tostring(root))
2914 self.etree.cleanup_namespaces(root)
2916 _bytes('<foo xmlns="F"><bar xmlns:ns="NS" xmlns="B"><ns:baz/></bar></foo>'),
2917 self.etree.tostring(root))
2919 def test_namespace_cleanup_attributes(self):
2921 '<foo xmlns="F" xmlns:x="X" xmlns:a="A">'
2922 '<bar xmlns:ns="NS" xmlns:b="b" xmlns="B">'
2923 '<ns:baz a:test="attr"/>'
2926 root = self.etree.fromstring(xml)
2927 self.assertEqual(xml, self.etree.tostring(root))
2928 self.etree.cleanup_namespaces(root)
2930 _bytes('<foo xmlns="F" xmlns:a="A">'
2931 '<bar xmlns:ns="NS" xmlns="B">'
2932 '<ns:baz a:test="attr"/>'
2934 self.etree.tostring(root))
2936 def test_namespace_cleanup_many(self):
2937 xml = ('<n12:foo ' +
2938 ' '.join('xmlns:n{n}="NS{n}"'.format(n=i) for i in range(100)) +
2939 '><n68:a/></n12:foo>').encode('utf8')
2940 root = self.etree.fromstring(xml)
2941 self.assertEqual(xml, self.etree.tostring(root))
2942 self.etree.cleanup_namespaces(root)
2944 b'<n12:foo xmlns:n12="NS12" xmlns:n68="NS68"><n68:a/></n12:foo>',
2945 self.etree.tostring(root))
2947 def test_namespace_cleanup_deep(self):
2949 ''.join('<a xmlns:n{n}="NS{n}">'.format(n=i) for i in range(100)) +
2950 '<n64:x/>' + '</a>'*100 + '</root>').encode('utf8')
2951 root = self.etree.fromstring(xml)
2952 self.assertEqual(xml, self.etree.tostring(root))
2953 self.etree.cleanup_namespaces(root)
2955 b'<root>' + b'<a>'*64 + b'<a xmlns:n64="NS64">' + b'<a>'*35 +
2956 b'<n64:x/>' + b'</a>'*100 + b'</root>',
2957 self.etree.tostring(root))
2959 def test_namespace_cleanup_deep_to_top(self):
2961 ''.join('<a xmlns:n{n}="NS{n}">'.format(n=i) for i in range(100)) +
2962 '<n64:x xmlns:a="A" a:attr="X"/>' +
2964 '</root>').encode('utf8')
2965 root = self.etree.fromstring(xml)
2966 self.assertEqual(xml, self.etree.tostring(root))
2967 self.etree.cleanup_namespaces(root, top_nsmap={'n64': 'NS64'})
2969 b'<root xmlns:n64="NS64">' + b'<a>'*100 +
2970 b'<n64:x xmlns:a="A" a:attr="X"/>' + b'</a>'*100 + b'</root>',
2971 self.etree.tostring(root))
2973 def test_namespace_cleanup_keep_prefixes(self):
2974 xml = ('<root xmlns:n64="NS64" xmlns:foo="FOO" xmlns:unused1="UNUSED" xmlns:no="NO">'
2975 '<a xmlns:unused2="UNUSED"><n64:x xmlns:a="A" a:attr="X"/></a>'
2976 '<foo>foo:bar</foo>'
2977 '</root>').encode('utf8')
2978 root = self.etree.fromstring(xml)
2979 self.assertEqual(xml, self.etree.tostring(root))
2980 self.etree.cleanup_namespaces(root, keep_ns_prefixes=['foo'])
2982 b'<root xmlns:n64="NS64" xmlns:foo="FOO">'
2983 b'<a><n64:x xmlns:a="A" a:attr="X"/></a>'
2984 b'<foo>foo:bar</foo>'
2986 self.etree.tostring(root))
2988 def test_namespace_cleanup_keep_prefixes_top(self):
2989 xml = ('<root xmlns:n64="NS64" xmlns:unused1="UNUSED" xmlns:no="NO">'
2990 '<sub xmlns:foo="FOO">'
2991 '<a xmlns:unused2="UNUSED"><n64:x xmlns:a="A" a:attr="X"/></a>'
2992 '<foo>foo:bar</foo>'
2994 '</root>').encode('utf8')
2995 root = self.etree.fromstring(xml)
2996 self.assertEqual(xml, self.etree.tostring(root))
2997 self.etree.cleanup_namespaces(
2999 top_nsmap={'foo': 'FOO', 'unused1': 'UNUSED'},
3000 keep_ns_prefixes=['foo'])
3002 b'<root xmlns:n64="NS64" xmlns:foo="FOO">'
3004 b'<a><n64:x xmlns:a="A" a:attr="X"/></a>'
3005 b'<foo>foo:bar</foo>'
3008 self.etree.tostring(root))
3010 def test_element_nsmap(self):
3013 r = {None: 'http://ns.infrae.com/foo',
3014 'hoi': 'http://ns.infrae.com/hoi'}
3015 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
3020 def test_subelement_nsmap(self):
3023 re = {None: 'http://ns.infrae.com/foo',
3024 'hoi': 'http://ns.infrae.com/hoi'}
3025 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=re)
3027 rs = {None: 'http://ns.infrae.com/honk',
3028 'top': 'http://ns.infrae.com/top'}
3029 s = etree.SubElement(e, '{http://ns.infrae.com/honk}bar', nsmap=rs)
3033 self.assertEqual(re, e.nsmap)
3034 self.assertEqual(r, s.nsmap)
3036 def test_html_prefix_nsmap(self):
3038 el = etree.HTML('<hha:page-description>aa</hha:page-description>').find('.//page-description')
3039 self.assertEqual({'hha': None}, el.nsmap)
3041 def test_getchildren(self):
3042 Element = self.etree.Element
3043 SubElement = self.etree.SubElement
3046 b = SubElement(a, 'b')
3047 c = SubElement(a, 'c')
3048 d = SubElement(b, 'd')
3049 e = SubElement(c, 'e')
3051 _bytes('<a><b><d></d></b><c><e></e></c></a>'),
3052 self.etree.tostring(a, method="c14n"))
3063 def test_getiterator(self):
3064 Element = self.etree.Element
3065 SubElement = self.etree.SubElement
3068 b = SubElement(a, 'b')
3069 c = SubElement(a, 'c')
3070 d = SubElement(b, 'd')
3071 e = SubElement(c, 'e')
3075 list(a.getiterator()))
3078 list(d.getiterator()))
3080 def test_getiterator_empty(self):
3081 Element = self.etree.Element
3082 SubElement = self.etree.SubElement
3085 b = SubElement(a, 'b')
3086 c = SubElement(a, 'c')
3087 d = SubElement(b, 'd')
3088 e = SubElement(c, 'e')
3092 list(a.getiterator('none')))
3095 list(e.getiterator('none')))
3098 list(e.getiterator()))
3100 def test_getiterator_filter(self):
3101 Element = self.etree.Element
3102 SubElement = self.etree.SubElement
3105 b = SubElement(a, 'b')
3106 c = SubElement(a, 'c')
3107 d = SubElement(b, 'd')
3108 e = SubElement(c, 'e')
3112 list(a.getiterator('a')))
3113 a2 = SubElement(e, 'a')
3116 list(a.getiterator('a')))
3119 list(c.getiterator('a')))
3121 def test_getiterator_filter_all(self):
3122 Element = self.etree.Element
3123 SubElement = self.etree.SubElement
3126 b = SubElement(a, 'b')
3127 c = SubElement(a, 'c')
3128 d = SubElement(b, 'd')
3129 e = SubElement(c, 'e')
3133 list(a.getiterator('*')))
3135 def test_getiterator_filter_comment(self):
3136 Element = self.etree.Element
3137 Comment = self.etree.Comment
3138 SubElement = self.etree.SubElement
3141 b = SubElement(a, 'b')
3142 comment_b = Comment("TEST-b")
3147 list(a.getiterator(Comment)))
3149 comment_a = Comment("TEST-a")
3153 [comment_b, comment_a],
3154 list(a.getiterator(Comment)))
3158 list(b.getiterator(Comment)))
3160 def test_getiterator_filter_pi(self):
3161 Element = self.etree.Element
3162 PI = self.etree.ProcessingInstruction
3163 SubElement = self.etree.SubElement
3166 b = SubElement(a, 'b')
3172 list(a.getiterator(PI)))
3179 list(a.getiterator(PI)))
3183 list(b.getiterator(PI)))
3185 def test_getiterator_with_text(self):
3186 Element = self.etree.Element
3187 SubElement = self.etree.SubElement
3191 b = SubElement(a, 'b')
3194 c = SubElement(a, 'c')
3197 d = SubElement(b, 'd')
3200 e = SubElement(c, 'e')
3206 list(a.getiterator()))
3209 # list(d.getiterator()))
3211 def test_getiterator_filter_with_text(self):
3212 Element = self.etree.Element
3213 SubElement = self.etree.SubElement
3217 b = SubElement(a, 'b')
3220 c = SubElement(a, 'c')
3223 d = SubElement(b, 'd')
3226 e = SubElement(c, 'e')
3232 list(a.getiterator('a')))
3233 a2 = SubElement(e, 'a')
3236 list(a.getiterator('a')))
3239 list(e.getiterator('a')))
3241 def test_getiterator_filter_multiple(self):
3242 Element = self.etree.Element
3243 SubElement = self.etree.SubElement
3246 b = SubElement(a, 'b')
3247 c = SubElement(a, 'c')
3248 d = SubElement(b, 'd')
3249 e = SubElement(c, 'e')
3250 f = SubElement(c, 'f')
3254 list(a.getiterator('a', 'b')))
3257 list(a.getiterator('x', 'y')))
3260 list(a.getiterator('f', 'a')))
3263 list(c.getiterator('c', '*', 'a')))
3266 list(a.getiterator( (), () )))
3268 def test_getiterator_filter_multiple_tuple(self):
3269 Element = self.etree.Element
3270 SubElement = self.etree.SubElement
3273 b = SubElement(a, 'b')
3274 c = SubElement(a, 'c')
3275 d = SubElement(b, 'd')
3276 e = SubElement(c, 'e')
3277 f = SubElement(c, 'f')
3281 list(a.getiterator( ('a', 'b') )))
3284 list(a.getiterator( ('x', 'y') )))
3287 list(a.getiterator( ('f', 'a') )))
3290 list(c.getiterator( ('c', '*', 'a') )))
3293 list(a.getiterator( () )))
3295 def test_getiterator_filter_namespace(self):
3296 Element = self.etree.Element
3297 SubElement = self.etree.SubElement
3300 b = SubElement(a, '{a}b')
3301 c = SubElement(a, '{a}c')
3302 d = SubElement(b, '{b}d')
3303 e = SubElement(c, '{a}e')
3304 f = SubElement(c, '{b}f')
3305 g = SubElement(c, 'g')
3309 list(a.getiterator('{a}a')))
3312 list(a.getiterator('{b}a')))
3315 list(a.getiterator('a')))
3318 list(a.getiterator('*')))
3321 list(c.getiterator('{b}*')))
3324 list(a.getiterator('{b}*')))
3327 list(a.getiterator('g')))
3330 list(a.getiterator('{}g')))
3333 list(a.getiterator('{}*')))
3335 def test_getiterator_filter_local_name(self):
3336 Element = self.etree.Element
3337 Comment = self.etree.Comment
3338 SubElement = self.etree.SubElement
3341 b = SubElement(a, '{nsA}b')
3342 c = SubElement(b, '{nsB}b')
3343 d = SubElement(a, 'b')
3344 e = SubElement(a, '{nsA}e')
3345 f = SubElement(e, '{nsB}e')
3346 g = SubElement(e, 'e')
3347 a.append(Comment('test'))
3351 list(a.getiterator('{*}b')))
3354 list(a.getiterator('{*}e')))
3356 [a, b, c, d, e, f, g],
3357 list(a.getiterator('{*}*')))
3359 def test_getiterator_filter_entities(self):
3360 Element = self.etree.Element
3361 Entity = self.etree.Entity
3362 SubElement = self.etree.SubElement
3365 b = SubElement(a, 'b')
3366 entity_b = Entity("TEST-b")
3371 list(a.getiterator(Entity)))
3373 entity_a = Entity("TEST-a")
3377 [entity_b, entity_a],
3378 list(a.getiterator(Entity)))
3382 list(b.getiterator(Entity)))
3384 def test_getiterator_filter_element(self):
3385 Element = self.etree.Element
3386 Comment = self.etree.Comment
3388 SubElement = self.etree.SubElement
3391 b = SubElement(a, 'b')
3392 a.append(Comment("test"))
3393 a.append(PI("pi", "content"))
3394 c = SubElement(a, 'c')
3398 list(a.getiterator(Element)))
3400 def test_getiterator_filter_all_comment_pi(self):
3401 # ElementTree iterates over everything here
3402 Element = self.etree.Element
3403 Comment = self.etree.Comment
3405 SubElement = self.etree.SubElement
3408 b = SubElement(a, 'b')
3409 a.append(Comment("test"))
3410 a.append(PI("pi", "content"))
3411 c = SubElement(a, 'c')
3415 list(a.getiterator('*')))
3417 def test_elementtree_getiterator(self):
3418 Element = self.etree.Element
3419 SubElement = self.etree.SubElement
3420 ElementTree = self.etree.ElementTree
3423 b = SubElement(a, 'b')
3424 c = SubElement(a, 'c')
3425 d = SubElement(b, 'd')
3426 e = SubElement(c, 'e')
3427 t = ElementTree(element=a)
3431 list(t.getiterator()))
3433 def test_elementtree_getiterator_filter(self):
3434 Element = self.etree.Element
3435 SubElement = self.etree.SubElement
3436 ElementTree = self.etree.ElementTree
3438 b = SubElement(a, 'b')
3439 c = SubElement(a, 'c')
3440 d = SubElement(b, 'd')
3441 e = SubElement(c, 'e')
3442 t = ElementTree(element=a)
3446 list(t.getiterator('a')))
3447 a2 = SubElement(e, 'a')
3450 list(t.getiterator('a')))
3452 def test_elementtree_getelementpath(self):
3453 a = etree.Element("a")
3454 b = etree.SubElement(a, "b")
3455 c = etree.SubElement(a, "c")
3456 d1 = etree.SubElement(c, "d")
3457 d2 = etree.SubElement(c, "d")
3458 c.text = d1.text = 'TEXT'
3460 tree = etree.ElementTree(a)
3461 self.assertEqual('.', tree.getelementpath(a))
3462 self.assertEqual('c/d[1]', tree.getelementpath(d1))
3463 self.assertEqual('c/d[2]', tree.getelementpath(d2))
3465 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
3466 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3468 tree = etree.ElementTree(c)
3469 self.assertEqual('.', tree.getelementpath(c))
3470 self.assertEqual('d[2]', tree.getelementpath(d2))
3471 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3473 tree = etree.ElementTree(b) # not a parent of a/c/d1/d2
3474 self.assertEqual('.', tree.getelementpath(b))
3475 self.assertRaises(ValueError, tree.getelementpath, a)
3476 self.assertRaises(ValueError, tree.getelementpath, c)
3477 self.assertRaises(ValueError, tree.getelementpath, d2)
3479 def test_elementtree_getelementpath_ns(self):
3480 a = etree.Element("{http://ns1/}a")
3481 b = etree.SubElement(a, "{http://ns1/}b")
3482 c = etree.SubElement(a, "{http://ns1/}c")
3483 d1 = etree.SubElement(c, "{http://ns1/}d")
3484 d2 = etree.SubElement(c, "{http://ns2/}d")
3485 d3 = etree.SubElement(c, "{http://ns1/}d")
3487 tree = etree.ElementTree(a)
3488 self.assertEqual('.', tree.getelementpath(a))
3489 self.assertEqual('{http://ns1/}c/{http://ns1/}d[1]',
3490 tree.getelementpath(d1))
3491 self.assertEqual('{http://ns1/}c/{http://ns2/}d',
3492 tree.getelementpath(d2))
3493 self.assertEqual('{http://ns1/}c/{http://ns1/}d[2]',
3494 tree.getelementpath(d3))
3496 self.assertEqual(a, tree.find(tree.getelementpath(a)))
3497 self.assertEqual(b, tree.find(tree.getelementpath(b)))
3498 self.assertEqual(c, tree.find(tree.getelementpath(c)))
3499 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
3500 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3501 self.assertEqual(d3, tree.find(tree.getelementpath(d3)))
3503 tree = etree.ElementTree(c)
3504 self.assertEqual('{http://ns1/}d[1]', tree.getelementpath(d1))
3505 self.assertEqual('{http://ns2/}d', tree.getelementpath(d2))
3506 self.assertEqual('{http://ns1/}d[2]', tree.getelementpath(d3))
3507 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
3508 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3509 self.assertEqual(d3, tree.find(tree.getelementpath(d3)))
3511 tree = etree.ElementTree(b) # not a parent of d1/d2
3512 self.assertRaises(ValueError, tree.getelementpath, d1)
3513 self.assertRaises(ValueError, tree.getelementpath, d2)
3515 def test_elementtree_iter_qname(self):
3516 XML = self.etree.XML
3517 ElementTree = self.etree.ElementTree
3518 QName = self.etree.QName
3519 tree = ElementTree(XML(
3520 _bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><b/></a>')))
3522 list(tree.iter(QName("b"))),
3523 list(tree.iter("b")),
3526 list(tree.iter(QName("X", "b"))),
3527 list(tree.iter("{X}b")),
3531 [e.tag for e in tree.iter(QName("X", "b"), QName("b"))],
3532 ['{X}b', 'b', '{X}b', 'b', 'b']
3535 list(tree.iter(QName("X", "b"), QName("b"))),
3536 list(tree.iter("{X}b", "b"))
3539 def test_elementtree_find_qname(self):
3540 XML = self.etree.XML
3541 ElementTree = self.etree.ElementTree
3542 QName = self.etree.QName
3543 tree = ElementTree(XML(_bytes('<a><b><c/></b><b/><c><b/></c></a>')))
3544 self.assertEqual(tree.find(QName("c")), tree.getroot()[2])
3546 def test_elementtree_findall_qname(self):
3547 XML = self.etree.XML
3548 ElementTree = self.etree.ElementTree
3549 QName = self.etree.QName
3550 tree = ElementTree(XML(_bytes('<a><b><c/></b><b/><c><b/></c></a>')))
3551 self.assertEqual(len(list(tree.findall(QName("c")))), 1)
3553 def test_elementtree_findall_ns_qname(self):
3554 XML = self.etree.XML
3555 ElementTree = self.etree.ElementTree
3556 QName = self.etree.QName
3557 tree = ElementTree(XML(
3558 _bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><b/></a>')))
3559 self.assertEqual(len(list(tree.findall(QName("b")))), 2)
3560 self.assertEqual(len(list(tree.findall(QName("X", "b")))), 1)
3562 def test_findall_ns(self):
3563 XML = self.etree.XML
3564 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><b/></a>'))
3565 self.assertEqual(len(root.findall(".//{X}b")), 2)
3566 self.assertEqual(len(root.findall(".//{X}*")), 2)
3567 self.assertEqual(len(root.findall(".//b")), 3)
3569 def test_findall_different_nsmaps(self):
3570 XML = self.etree.XML
3571 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>'))
3573 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
3574 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 2)
3575 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
3577 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
3578 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 1)
3579 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
3581 def test_findall_empty_prefix(self):
3582 XML = self.etree.XML
3583 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>'))
3585 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
3586 nsmap = {'xx': 'X', None: 'Y'}
3587 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1)
3588 nsmap = {'xx': 'X', '': 'Y'}
3589 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1)
3591 def test_findall_syntax_error(self):
3592 XML = self.etree.XML
3593 root = XML(_bytes('<a><b><c/></b><b/><c><b/><b/></c><b/></a>'))
3594 self.assertRaises(SyntaxError, root.findall, '')
3595 self.assertRaises(SyntaxError, root.findall, '//') # absolute path on Element
3596 self.assertRaises(SyntaxError, root.findall, './//')
3598 def test_index(self):
3600 e = etree.Element('foo')
3602 etree.SubElement(e, 'a%s' % i)
3608 3, e.index(e[3], 3))
3610 ValueError, e.index, e[3], 4)
3612 ValueError, e.index, e[3], 0, 2)
3614 ValueError, e.index, e[8], 0, -3)
3616 ValueError, e.index, e[8], -5, -3)
3618 8, e.index(e[8], 0, -1))
3620 8, e.index(e[8], -12, -1))
3622 0, e.index(e[0], -12, -1))
3624 def test_replace(self):
3626 e = etree.Element('foo')
3628 el = etree.SubElement(e, 'a%s' % i)
3629 el.text = "text%d" % i
3630 el.tail = "tail%d" % i
3636 e.replace(e[0], e[1])
3642 child1.text, "text1")
3644 child1.tail, "tail1")
3646 child0.tail, "tail0")
3650 e.replace(e[-1], e[0])
3654 child1.text, "text1")
3656 child1.tail, "tail1")
3660 def test_replace_new(self):
3662 e = etree.Element('foo')
3664 etree.SubElement(e, 'a%s' % i)
3666 new_element = etree.Element("test")
3667 new_element.text = "TESTTEXT"
3668 new_element.tail = "TESTTAIL"
3670 e.replace(e[0], new_element)
3682 def test_setslice_all_reversed(self):
3683 Element = self.etree.Element
3684 SubElement = self.etree.SubElement
3702 def test_setslice_step(self):
3703 Element = self.etree.Element
3704 SubElement = self.etree.SubElement
3707 b = SubElement(a, 'b')
3708 c = SubElement(a, 'c')
3709 d = SubElement(a, 'd')
3710 e = SubElement(a, 'e')
3720 def test_setslice_step_negative(self):
3721 Element = self.etree.Element
3722 SubElement = self.etree.SubElement
3725 b = SubElement(a, 'b')
3726 c = SubElement(a, 'c')
3727 d = SubElement(a, 'd')
3728 e = SubElement(a, 'e')
3738 def test_setslice_step_negative2(self):
3739 Element = self.etree.Element
3740 SubElement = self.etree.SubElement
3743 b = SubElement(a, 'b')
3744 c = SubElement(a, 'c')
3745 d = SubElement(a, 'd')
3746 e = SubElement(a, 'e')
3756 def test_setslice_step_overrun(self):
3757 Element = self.etree.Element
3758 SubElement = self.etree.SubElement
3762 print("slice() not found")
3766 b = SubElement(a, 'b')
3767 c = SubElement(a, 'c')
3768 d = SubElement(a, 'd')
3769 e = SubElement(a, 'e')
3777 operator.setitem, a, slice(1,None,2), [x, y, z])
3783 def test_sourceline_XML(self):
3784 XML = self.etree.XML
3785 root = XML(_bytes('''<?xml version="1.0"?>
3794 [ el.sourceline for el in root.getiterator() ])
3796 def test_large_sourceline_XML(self):
3797 XML = self.etree.XML
3799 '<?xml version="1.0"?>\n'
3800 '<root>' + '\n' * 65536 +
3801 '<p>' + '\n' * 65536 + '</p>\n' +
3805 if self.etree.LIBXML_VERSION >= (2, 9):
3806 expected = [2, 131074, 131076]
3808 expected = [2, 65535, 65535]
3810 self.assertEqual(expected, [el.sourceline for el in root.iter()])
3812 def test_sourceline_parse(self):
3813 parse = self.etree.parse
3814 tree = parse(fileInTestDir('include/test_xinclude.xml'))
3818 [ el.sourceline for el in tree.getiterator() ])
3820 def test_sourceline_iterparse_end(self):
3821 iterparse = self.etree.iterparse
3822 lines = [ el.sourceline for (event, el) in
3823 iterparse(fileInTestDir('include/test_xinclude.xml')) ]
3829 def test_sourceline_iterparse_start(self):
3830 iterparse = self.etree.iterparse
3831 lines = [ el.sourceline for (event, el) in
3832 iterparse(fileInTestDir('include/test_xinclude.xml'),
3833 events=("start",)) ]
3839 def test_sourceline_element(self):
3840 Element = self.etree.Element
3841 SubElement = self.etree.SubElement
3842 el = Element("test")
3843 self.assertEqual(None, el.sourceline)
3845 child = SubElement(el, "test")
3846 self.assertEqual(None, el.sourceline)
3847 self.assertEqual(None, child.sourceline)
3849 def test_XML_base_url_docinfo(self):
3851 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3852 docinfo = root.getroottree().docinfo
3853 self.assertEqual(docinfo.URL, "http://no/such/url")
3855 def test_XML_set_base_url_docinfo(self):
3857 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3858 docinfo = root.getroottree().docinfo
3859 self.assertEqual(docinfo.URL, "http://no/such/url")
3860 docinfo.URL = "https://secret/url"
3861 self.assertEqual(docinfo.URL, "https://secret/url")
3863 def test_parse_stringio_base_url(self):
3865 tree = etree.parse(BytesIO("<root/>"), base_url="http://no/such/url")
3866 docinfo = tree.docinfo
3867 self.assertEqual(docinfo.URL, "http://no/such/url")
3869 def test_parse_base_url_docinfo(self):
3871 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
3872 base_url="http://no/such/url")
3873 docinfo = tree.docinfo
3874 self.assertEqual(docinfo.URL, "http://no/such/url")
3876 def test_HTML_base_url_docinfo(self):
3878 root = etree.HTML(_bytes("<html/>"), base_url="http://no/such/url")
3879 docinfo = root.getroottree().docinfo
3880 self.assertEqual(docinfo.URL, "http://no/such/url")
3882 def test_docinfo_public(self):
3884 xml_header = '<?xml version="1.0" encoding="ascii"?>'
3885 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
3886 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
3887 doctype_string = '<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id)
3889 xml = _bytes(xml_header + doctype_string + '<html><body></body></html>')
3891 tree = etree.parse(BytesIO(xml))
3892 docinfo = tree.docinfo
3893 self.assertEqual(docinfo.encoding, "ascii")
3894 self.assertEqual(docinfo.xml_version, "1.0")
3895 self.assertEqual(docinfo.public_id, pub_id)
3896 self.assertEqual(docinfo.system_url, sys_id)
3897 self.assertEqual(docinfo.root_name, 'html')
3898 self.assertEqual(docinfo.doctype, doctype_string)
3900 def test_docinfo_system(self):
3902 xml_header = '<?xml version="1.0" encoding="UTF-8"?>'
3904 doctype_string = '<!DOCTYPE html SYSTEM "%s">' % sys_id
3905 xml = _bytes(xml_header + doctype_string + '<html><body></body></html>')
3907 tree = etree.parse(BytesIO(xml))
3908 docinfo = tree.docinfo
3909 self.assertEqual(docinfo.encoding, "UTF-8")
3910 self.assertEqual(docinfo.xml_version, "1.0")
3911 self.assertEqual(docinfo.public_id, None)
3912 self.assertEqual(docinfo.system_url, sys_id)
3913 self.assertEqual(docinfo.root_name, 'html')
3914 self.assertEqual(docinfo.doctype, doctype_string)
3916 def test_docinfo_empty(self):
3918 xml = _bytes('<html><body></body></html>')
3919 tree = etree.parse(BytesIO(xml))
3920 docinfo = tree.docinfo
3921 self.assertEqual(docinfo.encoding, "UTF-8")
3922 self.assertEqual(docinfo.xml_version, "1.0")
3923 self.assertEqual(docinfo.public_id, None)
3924 self.assertEqual(docinfo.system_url, None)
3925 self.assertEqual(docinfo.root_name, 'html')
3926 self.assertEqual(docinfo.doctype, '')
3928 def test_docinfo_name_only(self):
3930 xml = _bytes('<!DOCTYPE root><root></root>')
3931 tree = etree.parse(BytesIO(xml))
3932 docinfo = tree.docinfo
3933 self.assertEqual(docinfo.encoding, "UTF-8")
3934 self.assertEqual(docinfo.xml_version, "1.0")
3935 self.assertEqual(docinfo.public_id, None)
3936 self.assertEqual(docinfo.system_url, None)
3937 self.assertEqual(docinfo.root_name, 'root')
3938 self.assertEqual(docinfo.doctype, '<!DOCTYPE root>')
3940 def test_doctype_name_only_roundtrip(self):
3942 xml = _bytes('<!DOCTYPE root>\n<root/>')
3943 tree = etree.parse(BytesIO(xml))
3944 self.assertEqual(xml, etree.tostring(tree))
3946 def test_doctype_output_override(self):
3948 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
3949 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
3950 doctype_string = _bytes('<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id))
3952 xml = _bytes('<!DOCTYPE root>\n<root/>')
3953 tree = etree.parse(BytesIO(xml))
3954 self.assertEqual(xml.replace(_bytes('<!DOCTYPE root>'), doctype_string),
3955 etree.tostring(tree, doctype=doctype_string))
3957 def test_xml_base(self):
3959 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3960 self.assertEqual(root.base, "http://no/such/url")
3962 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
3963 root.base = "https://secret/url"
3964 self.assertEqual(root.base, "https://secret/url")
3966 root.get('{http://www.w3.org/XML/1998/namespace}base'),
3967 "https://secret/url")
3969 def test_xml_base_attribute(self):
3971 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3972 self.assertEqual(root.base, "http://no/such/url")
3974 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
3975 root.set('{http://www.w3.org/XML/1998/namespace}base',
3976 "https://secret/url")
3977 self.assertEqual(root.base, "https://secret/url")
3979 root.get('{http://www.w3.org/XML/1998/namespace}base'),
3980 "https://secret/url")
3982 def test_html_base(self):
3984 root = etree.HTML(_bytes("<html><body></body></html>"),
3985 base_url="http://no/such/url")
3986 self.assertEqual(root.base, "http://no/such/url")
3988 def test_html_base_tag(self):
3990 root = etree.HTML(_bytes('<html><head><base href="http://no/such/url"></head></html>'))
3991 self.assertEqual(root.base, "http://no/such/url")
3993 def test_indent(self):
3995 elem = ET.XML("<root></root>")
3997 self.assertEqual(ET.tostring(elem), b'<root/>')
3999 elem = ET.XML("<html><body>text</body></html>")
4001 self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>')
4003 elem = ET.XML("<html> <body>text</body> </html>")
4005 self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>')
4007 elem = ET.XML("<html> <body>text</body> </html>")
4009 self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>')
4011 elem = ET.XML("<html><body>text</body>tail</html>")
4013 self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>tail</html>')
4015 elem = ET.XML("<html><body><p>par</p>\n<p>text</p>\t<p><br/></p></body></html>")
4030 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
4036 b' <p>pre<br/>post</p>\n'
4042 def test_indent_space(self):
4044 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
4045 ET.indent(elem, space='\t')
4050 b'\t\t<p>pre<br/>post</p>\n'
4051 b'\t\t<p>text</p>\n'
4056 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
4057 ET.indent(elem, space='')
4062 b'<p>pre<br/>post</p>\n'
4068 def test_indent_space_caching(self):
4070 elem = ET.XML("<html><body><p>par</p><p>text</p><p><br/></p><p /></body></html>")
4073 {el.tail for el in elem.iter()},
4074 {None, "\n", "\n ", "\n "}
4077 {el.text for el in elem.iter()},
4078 {None, "\n ", "\n ", "\n ", "par", "text"}
4080 # NOTE: lxml does not reuse Python text strings across elements.
4082 # len({el.tail for el in elem.iter()}),
4083 # len({id(el.tail) for el in elem.iter()}),
4086 def test_indent_level(self):
4088 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
4090 ET.indent(elem, level=-1)
4094 self.assertTrue(False, "ValueError not raised")
4097 b"<html><body><p>pre<br/>post</p><p>text</p></body></html>"
4100 ET.indent(elem, level=2)
4105 b' <p>pre<br/>post</p>\n'
4111 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
4112 ET.indent(elem, level=1, space=' ')
4117 b' <p>pre<br/>post</p>\n'
4123 def test_parse_fileobject_unicode(self):
4124 # parse from a file object that returns unicode strings
4125 f = LargeFileLikeUnicode()
4126 tree = self.etree.parse(f)
4127 root = tree.getroot()
4128 self.assertTrue(root.tag.endswith('root'))
4130 def test_dtd_io(self):
4131 # check that DTDs that go in also go back out
4133 <!DOCTYPE test SYSTEM "test.dtd" [
4134 <!ENTITY entity "tasty">
4136 <!ELEMENT a (#PCDATA)>
4138 <test><a>test-test</a></test>\
4140 tree = self.etree.parse(BytesIO(xml))
4141 self.assertEqual(self.etree.tostring(tree).replace(_bytes(" "), _bytes("")),
4142 xml.replace(_bytes(" "), _bytes("")))
4144 def test_byte_zero(self):
4145 Element = self.etree.Element
4148 self.assertRaises(ValueError, setattr, a, "text", 'ha\0ho')
4149 self.assertRaises(ValueError, setattr, a, "tail", 'ha\0ho')
4151 self.assertRaises(ValueError, Element, 'ha\0ho')
4153 def test_unicode_byte_zero(self):
4154 Element = self.etree.Element
4157 self.assertRaises(ValueError, setattr, a, "text",
4159 self.assertRaises(ValueError, setattr, a, "tail",
4162 self.assertRaises(ValueError, Element,
4165 def test_byte_invalid(self):
4166 Element = self.etree.Element
4169 self.assertRaises(ValueError, setattr, a, "text", 'ha\x07ho')
4170 self.assertRaises(ValueError, setattr, a, "text", 'ha\x02ho')
4172 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x07ho')
4173 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x02ho')
4175 self.assertRaises(ValueError, Element, 'ha\x07ho')
4176 self.assertRaises(ValueError, Element, 'ha\x02ho')
4178 def test_unicode_byte_invalid(self):
4179 Element = self.etree.Element
4182 self.assertRaises(ValueError, setattr, a, "text",
4184 self.assertRaises(ValueError, setattr, a, "text",
4187 self.assertRaises(ValueError, setattr, a, "tail",
4189 self.assertRaises(ValueError, setattr, a, "tail",
4192 self.assertRaises(ValueError, Element,
4194 self.assertRaises(ValueError, Element,
4197 def test_unicode_byte_invalid_sequence(self):
4198 Element = self.etree.Element
4201 self.assertRaises(ValueError, setattr, a, "text",
4202 _str('ha\u1234\x07ho'))
4203 self.assertRaises(ValueError, setattr, a, "text",
4204 _str('ha\u1234\x02ho'))
4206 self.assertRaises(ValueError, setattr, a, "tail",
4207 _str('ha\u1234\x07ho'))
4208 self.assertRaises(ValueError, setattr, a, "tail",
4209 _str('ha\u1234\x02ho'))
4211 self.assertRaises(ValueError, Element,
4212 _str('ha\u1234\x07ho'))
4213 self.assertRaises(ValueError, Element,
4214 _str('ha\u1234\x02ho'))
4216 def test_encoding_tostring_utf16(self):
4217 # ElementTree fails to serialize this
4218 tostring = self.etree.tostring
4219 Element = self.etree.Element
4220 SubElement = self.etree.SubElement
4223 b = SubElement(a, 'b')
4224 c = SubElement(a, 'c')
4226 result = tostring(a, encoding='UTF-16')
4227 self.assertEqual(_bytes('<a><b></b><c></c></a>'),
4228 canonicalize(result))
4230 def test_tostring_none(self):
4231 # ElementTree raises an AssertionError here
4232 tostring = self.etree.tostring
4233 self.assertRaises(TypeError, self.etree.tostring, None)
4235 def test_tostring_pretty(self):
4236 tostring = self.etree.tostring
4237 Element = self.etree.Element
4238 SubElement = self.etree.SubElement
4241 b = SubElement(a, 'b')
4242 c = SubElement(a, 'c')
4244 result = tostring(a)
4245 self.assertEqual(result, _bytes("<a><b/><c/></a>"))
4247 result = tostring(a, pretty_print=False)
4248 self.assertEqual(result, _bytes("<a><b/><c/></a>"))
4250 result = tostring(a, pretty_print=True)
4251 self.assertEqual(result, _bytes("<a>\n <b/>\n <c/>\n</a>\n"))
4253 def test_tostring_with_tail(self):
4254 tostring = self.etree.tostring
4255 Element = self.etree.Element
4256 SubElement = self.etree.SubElement
4260 b = SubElement(a, 'b')
4262 c = SubElement(a, 'c')
4264 result = tostring(a)
4265 self.assertEqual(result, _bytes("<a><b/>bTAIL<c/></a>aTAIL"))
4267 result = tostring(a, with_tail=False)
4268 self.assertEqual(result, _bytes("<a><b/>bTAIL<c/></a>"))
4270 result = tostring(a, with_tail=True)
4271 self.assertEqual(result, _bytes("<a><b/>bTAIL<c/></a>aTAIL"))
4273 def test_tostring_method_html_with_tail(self):
4274 tostring = self.etree.tostring
4275 html = self.etree.fromstring(
4277 '<div><p>Some text<i>\r\n</i></p></div>\r\n'
4279 parser=self.etree.HTMLParser())
4280 self.assertEqual(html.tag, 'html')
4281 div = html.find('.//div')
4282 self.assertEqual(div.tail, '\r\n')
4283 result = tostring(div, method='html')
4286 _bytes("<div><p>Some text<i>\r\n</i></p></div>\r\n"))
4287 result = tostring(div, method='html', with_tail=True)
4290 _bytes("<div><p>Some text<i>\r\n</i></p></div>\r\n"))
4291 result = tostring(div, method='html', with_tail=False)
4294 _bytes("<div><p>Some text<i>\r\n</i></p></div>"))
4296 def test_standalone(self):
4297 tostring = self.etree.tostring
4298 XML = self.etree.XML
4299 ElementTree = self.etree.ElementTree
4300 Element = self.etree.Element
4302 tree = Element("root").getroottree()
4303 self.assertEqual(None, tree.docinfo.standalone)
4305 tree = XML(_bytes("<root/>")).getroottree()
4306 self.assertEqual(None, tree.docinfo.standalone)
4309 "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"
4311 self.assertEqual(True, tree.docinfo.standalone)
4314 "<?xml version='1.0' encoding='ASCII' standalone='no'?>\n<root/>"
4316 self.assertEqual(False, tree.docinfo.standalone)
4318 def test_tostring_standalone(self):
4319 tostring = self.etree.tostring
4320 XML = self.etree.XML
4321 ElementTree = self.etree.ElementTree
4323 root = XML(_bytes("<root/>"))
4325 tree = ElementTree(root)
4326 self.assertEqual(None, tree.docinfo.standalone)
4328 result = tostring(root, xml_declaration=True, encoding="ASCII")
4329 self.assertEqual(result, _bytes(
4330 "<?xml version='1.0' encoding='ASCII'?>\n<root/>"))
4332 result = tostring(root, xml_declaration=True, encoding="ASCII",
4334 self.assertEqual(result, _bytes(
4335 "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"))
4337 tree = ElementTree(XML(result))
4338 self.assertEqual(True, tree.docinfo.standalone)
4340 result = tostring(root, xml_declaration=True, encoding="ASCII",
4342 self.assertEqual(result, _bytes(
4343 "<?xml version='1.0' encoding='ASCII' standalone='no'?>\n<root/>"))
4345 tree = ElementTree(XML(result))
4346 self.assertEqual(False, tree.docinfo.standalone)
4348 def test_tostring_standalone_in_out(self):
4349 tostring = self.etree.tostring
4350 XML = self.etree.XML
4351 ElementTree = self.etree.ElementTree
4354 "<?xml version='1.0' encoding='UTF-8' standalone='yes'?>\n<root/>"))
4356 tree = ElementTree(root)
4357 self.assertEqual(True, tree.docinfo.standalone)
4359 result = tostring(root, xml_declaration=True, encoding="ASCII")
4360 self.assertEqual(result, _bytes(
4361 "<?xml version='1.0' encoding='ASCII'?>\n<root/>"))
4363 result = tostring(root, xml_declaration=True, encoding="ASCII",
4365 self.assertEqual(result, _bytes(
4366 "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"))
4368 def test_tostring_method_text_encoding(self):
4369 tostring = self.etree.tostring
4370 Element = self.etree.Element
4371 SubElement = self.etree.SubElement
4376 b = SubElement(a, 'b')
4378 b.tail = _str("Søk på nettet")
4379 c = SubElement(a, 'c')
4382 result = tostring(a, method="text", encoding="UTF-16")
4384 self.assertEqual(_str('ABSøk på nettetCtail').encode("UTF-16"),
4387 def test_tostring_method_text_unicode(self):
4388 tostring = self.etree.tostring
4389 Element = self.etree.Element
4390 SubElement = self.etree.SubElement
4393 a.text = _str('Søk på nettetA')
4395 b = SubElement(a, 'b')
4397 b.tail = _str('Søk på nettetB')
4398 c = SubElement(a, 'c')
4401 self.assertRaises(UnicodeEncodeError,
4402 tostring, a, method="text")
4405 _str('Søk på nettetABSøk på nettetBCtail').encode('utf-8'),
4406 tostring(a, encoding="UTF-8", method="text"))
4408 def test_tounicode(self):
4409 tounicode = self.etree.tounicode
4410 Element = self.etree.Element
4411 SubElement = self.etree.SubElement
4414 b = SubElement(a, 'b')
4415 c = SubElement(a, 'c')
4417 self.assertTrue(isinstance(tounicode(a), _unicode))
4418 self.assertEqual(_bytes('<a><b></b><c></c></a>'),
4419 canonicalize(tounicode(a)))
4421 def test_tounicode_element(self):
4422 tounicode = self.etree.tounicode
4423 Element = self.etree.Element
4424 SubElement = self.etree.SubElement
4427 b = SubElement(a, 'b')
4428 c = SubElement(a, 'c')
4429 d = SubElement(c, 'd')
4430 self.assertTrue(isinstance(tounicode(b), _unicode))
4431 self.assertTrue(isinstance(tounicode(c), _unicode))
4432 self.assertEqual(_bytes('<b></b>'),
4433 canonicalize(tounicode(b)))
4434 self.assertEqual(_bytes('<c><d></d></c>'),
4435 canonicalize(tounicode(c)))
4437 def test_tounicode_none(self):
4438 tounicode = self.etree.tounicode
4439 self.assertRaises(TypeError, self.etree.tounicode, None)
4441 def test_tounicode_element_tail(self):
4442 tounicode = self.etree.tounicode
4443 Element = self.etree.Element
4444 SubElement = self.etree.SubElement
4447 b = SubElement(a, 'b')
4448 c = SubElement(a, 'c')
4449 d = SubElement(c, 'd')
4452 self.assertTrue(isinstance(tounicode(b), _unicode))
4453 self.assertTrue(tounicode(b) == '<b/>Foo' or
4454 tounicode(b) == '<b />Foo')
4456 def test_tounicode_pretty(self):
4457 tounicode = self.etree.tounicode
4458 Element = self.etree.Element
4459 SubElement = self.etree.SubElement
4462 b = SubElement(a, 'b')
4463 c = SubElement(a, 'c')
4465 result = tounicode(a)
4466 self.assertEqual(result, "<a><b/><c/></a>")
4468 result = tounicode(a, pretty_print=False)
4469 self.assertEqual(result, "<a><b/><c/></a>")
4471 result = tounicode(a, pretty_print=True)
4472 self.assertEqual(result, "<a>\n <b/>\n <c/>\n</a>\n")
4474 def test_tostring_unicode(self):
4475 tostring = self.etree.tostring
4476 Element = self.etree.Element
4477 SubElement = self.etree.SubElement
4480 b = SubElement(a, 'b')
4481 c = SubElement(a, 'c')
4483 self.assertTrue(isinstance(tostring(a, encoding=_unicode), _unicode))
4484 self.assertEqual(_bytes('<a><b></b><c></c></a>'),
4485 canonicalize(tostring(a, encoding=_unicode)))
4487 def test_tostring_unicode_element(self):
4488 tostring = self.etree.tostring
4489 Element = self.etree.Element
4490 SubElement = self.etree.SubElement
4493 b = SubElement(a, 'b')
4494 c = SubElement(a, 'c')
4495 d = SubElement(c, 'd')
4496 self.assertTrue(isinstance(tostring(b, encoding=_unicode), _unicode))
4497 self.assertTrue(isinstance(tostring(c, encoding=_unicode), _unicode))
4498 self.assertEqual(_bytes('<b></b>'),
4499 canonicalize(tostring(b, encoding=_unicode)))
4500 self.assertEqual(_bytes('<c><d></d></c>'),
4501 canonicalize(tostring(c, encoding=_unicode)))
4503 def test_tostring_unicode_none(self):
4504 tostring = self.etree.tostring
4505 self.assertRaises(TypeError, self.etree.tostring,
4506 None, encoding=_unicode)
4508 def test_tostring_unicode_element_tail(self):
4509 tostring = self.etree.tostring
4510 Element = self.etree.Element
4511 SubElement = self.etree.SubElement
4514 b = SubElement(a, 'b')
4515 c = SubElement(a, 'c')
4516 d = SubElement(c, 'd')
4519 self.assertTrue(isinstance(tostring(b, encoding=_unicode), _unicode))
4520 self.assertTrue(tostring(b, encoding=_unicode) == '<b/>Foo' or
4521 tostring(b, encoding=_unicode) == '<b />Foo')
4523 def test_tostring_unicode_pretty(self):
4524 tostring = self.etree.tostring
4525 Element = self.etree.Element
4526 SubElement = self.etree.SubElement
4529 b = SubElement(a, 'b')
4530 c = SubElement(a, 'c')
4532 result = tostring(a, encoding=_unicode)
4533 self.assertEqual(result, "<a><b/><c/></a>")
4535 result = tostring(a, encoding=_unicode, pretty_print=False)
4536 self.assertEqual(result, "<a><b/><c/></a>")
4538 result = tostring(a, encoding=_unicode, pretty_print=True)
4539 self.assertEqual(result, "<a>\n <b/>\n <c/>\n</a>\n")
4541 def test_pypy_proxy_collect(self):
4542 root = etree.Element('parent')
4543 etree.SubElement(root, 'child')
4545 self.assertEqual(len(root), 1)
4546 self.assertEqual(root[0].tag, 'child')
4548 # in PyPy, GC used to kill the Python proxy instance without cleanup
4550 self.assertEqual(len(root), 1)
4551 self.assertEqual(root[0].tag, 'child')
4553 def test_element_refcycle(self):
4554 class SubEl(etree.ElementBase):
4559 self.assertEqual('SubEl', el1.tag)
4560 self.assertEqual('SubEl', el2.tag)
4566 # not really testing anything here, but it shouldn't crash
4568 def test_proxy_collect_siblings(self):
4569 root = etree.Element('parent')
4570 c1 = etree.SubElement(root, 'child1')
4571 c2 = etree.SubElement(root, 'child2')
4577 # trigger deallocation attempt of c1
4579 # make sure it wasn't deallocated
4580 self.assertEqual('child1', c2.getprevious().tag)
4582 def test_proxy_collect_siblings_text(self):
4583 root = etree.Element('parent')
4584 c1 = etree.SubElement(root, 'child1')
4585 c2 = etree.SubElement(root, 'child2')
4593 # trigger deallocation attempt of c1
4595 # make sure it wasn't deallocated
4596 self.assertEqual('child1', c2.getprevious().tag)
4597 self.assertEqual('abc', c2.getprevious().tail)
4601 def _writeElement(self, element, encoding='us-ascii', compression=0):
4602 """Write out element for comparison.
4604 ElementTree = self.etree.ElementTree
4606 tree = ElementTree(element=element)
4607 tree.write(f, encoding=encoding, compression=compression)
4610 data = zlib.decompress(data)
4611 return canonicalize(data)
4614 class _XIncludeTestCase(HelperTestCase):
4615 def test_xinclude_text(self):
4616 filename = fileInTestDir('test_broken.xml')
4617 root = etree.XML(_bytes('''\
4618 <doc xmlns:xi="http://www.w3.org/2001/XInclude">
4619 <xi:include href="%s" parse="text"/>
4621 ''' % path2url(filename)))
4622 old_text = root.text
4623 content = read_file(filename)
4624 old_tail = root[0].tail
4626 self.include( etree.ElementTree(root) )
4627 self.assertEqual(old_text + content + old_tail,
4630 def test_xinclude(self):
4631 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'))
4632 self.assertNotEqual(
4634 tree.getroot()[1].tag)
4636 self.include( tree )
4637 # check whether we find it replaced with included data
4640 tree.getroot()[1].tag)
4642 def test_xinclude_resolver(self):
4643 class res(etree.Resolver):
4644 include_text = read_file(fileInTestDir('test.xml'))
4646 def resolve(self, url, id, context):
4647 if url.endswith(".dtd"):
4648 self.called["dtd"] = True
4649 return self.resolve_filename(
4650 fileInTestDir('test.dtd'), context)
4651 elif url.endswith("test_xinclude.xml"):
4652 self.called["input"] = True
4653 return None # delegate to default resolver
4655 self.called["include"] = True
4656 return self.resolve_string(self.include_text, context)
4658 res_instance = res()
4659 parser = etree.XMLParser(load_dtd = True)
4660 parser.resolvers.add(res_instance)
4662 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
4667 called = list(res_instance.called.items())
4670 [("dtd", True), ("include", True), ("input", True)],
4673 def test_xinclude_resolver_recursive(self):
4674 data = textwrap.dedent('''
4675 <doc xmlns:xi="http://www.w3.org/2001/XInclude">
4677 <xi:include href="./test.xml" />
4681 class Resolver(etree.Resolver):
4684 def resolve(self, url, id, context):
4685 if url.endswith("test_xinclude.xml"):
4686 assert not self.called.get("input")
4687 self.called["input"] = True
4688 return None # delegate to default resolver
4689 elif url.endswith('/test5.xml'):
4690 assert not self.called.get("DONE")
4691 self.called["DONE"] = True
4692 return self.resolve_string('<DONE/>', context)
4694 _, filename = url.rsplit('/', 1)
4695 assert not self.called.get(filename)
4696 self.called[filename] = True
4697 next_data = data.replace(
4698 'test.xml', 'test%d.xml' % len(self.called))
4699 return self.resolve_string(next_data, context)
4701 res_instance = Resolver()
4702 parser = etree.XMLParser(load_dtd=True)
4703 parser.resolvers.add(res_instance)
4705 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
4710 called = list(res_instance.called.items())
4713 [("DONE", True), ("input", True), ("test.xml", True),
4714 ("test2.xml", True), ("test3.xml", True), ("test4.xml", True)],
4718 class ETreeXIncludeTestCase(_XIncludeTestCase):
4719 def include(self, tree):
4723 class ElementIncludeTestCase(_XIncludeTestCase):
4724 from lxml import ElementInclude
4726 def include(self, tree, loader=None, max_depth=None):
4727 self.ElementInclude.include(tree.getroot(), loader=loader, max_depth=max_depth)
4731 XINCLUDE["Recursive1.xml"] = """\
4732 <?xml version='1.0'?>
4733 <document xmlns:xi="http://www.w3.org/2001/XInclude">
4734 <p>The following is the source code of Recursive2.xml:</p>
4735 <xi:include href="Recursive2.xml"/>
4739 XINCLUDE["Recursive2.xml"] = """\
4740 <?xml version='1.0'?>
4741 <document xmlns:xi="http://www.w3.org/2001/XInclude">
4742 <p>The following is the source code of Recursive3.xml:</p>
4743 <xi:include href="Recursive3.xml"/>
4747 XINCLUDE["Recursive3.xml"] = """\
4748 <?xml version='1.0'?>
4749 <document xmlns:xi="http://www.w3.org/2001/XInclude">
4750 <p>The following is the source code of Recursive1.xml:</p>
4751 <xi:include href="Recursive1.xml"/>
4755 XINCLUDE["NonRecursive1.xml"] = """\
4756 <?xml version='1.0'?>
4757 <document xmlns:xi="http://www.w3.org/2001/XInclude">
4758 <p>The following is multiple times the source code of NonRecursive3.xml:</p>
4759 <xi:include href="NonRecursive3.xml"/>
4760 <xi:include href="NonRecursive3.xml"/>
4761 <p>The following is multiple times the source code of Leaf.xml:</p>
4762 <xi:include href="Leaf.xml"/>
4763 <xi:include href="Leaf.xml"/>
4764 <xi:include href="Leaf.xml"/>
4765 <p>One more time the source code of NonRecursive3.xml:</p>
4766 <xi:include href="NonRecursive3.xml"/>
4770 XINCLUDE["NonRecursive2.xml"] = """\
4771 <?xml version='1.0'?>
4772 <document xmlns:xi="http://www.w3.org/2001/XInclude">
4773 <p>The following is multiple times the source code of NonRecursive3.xml:</p>
4774 <xi:include href="NonRecursive3.xml"/>
4775 <xi:include href="NonRecursive3.xml"/>
4779 XINCLUDE["NonRecursive3.xml"] = """\
4780 <?xml version='1.0'?>
4781 <document xmlns:xi="http://www.w3.org/2001/XInclude">
4782 <p>The following is multiple times the source code of Leaf.xml:</p>
4783 <xi:include href="Leaf.xml"/>
4784 <xi:include href="Leaf.xml"/>
4788 XINCLUDE["Leaf.xml"] = """\
4789 <?xml version='1.0'?>
4790 <document xmlns:xi="http://www.w3.org/2001/XInclude">
4791 <p>No further includes</p>
4795 def xinclude_loader(self, href, parse="xml", encoding=None):
4797 data = textwrap.dedent(self.XINCLUDE[href])
4799 raise OSError("resource not found")
4801 data = etree.fromstring(data)
4804 def test_xinclude_failures(self):
4805 # Test infinitely recursive includes.
4806 document = self.xinclude_loader("Recursive1.xml").getroottree()
4807 with self.assertRaises(self.ElementInclude.FatalIncludeError) as cm:
4808 self.include(document, self.xinclude_loader)
4809 self.assertEqual(str(cm.exception),
4810 "recursive include of 'Recursive2.xml' detected")
4812 # Test 'max_depth' limitation.
4813 document = self.xinclude_loader("Recursive1.xml").getroottree()
4814 with self.assertRaises(self.ElementInclude.FatalIncludeError) as cm:
4815 self.include(document, self.xinclude_loader, max_depth=None)
4816 self.assertEqual(str(cm.exception),
4817 "recursive include of 'Recursive2.xml' detected")
4819 document = self.xinclude_loader("Recursive1.xml").getroottree()
4820 with self.assertRaises(self.ElementInclude.LimitedRecursiveIncludeError) as cm:
4821 self.include(document, self.xinclude_loader, max_depth=0)
4822 self.assertEqual(str(cm.exception),
4823 "maximum xinclude depth reached when including file Recursive2.xml")
4825 document = self.xinclude_loader("Recursive1.xml").getroottree()
4826 with self.assertRaises(self.ElementInclude.LimitedRecursiveIncludeError) as cm:
4827 self.include(document, self.xinclude_loader, max_depth=1)
4828 self.assertEqual(str(cm.exception),
4829 "maximum xinclude depth reached when including file Recursive3.xml")
4831 document = self.xinclude_loader("Recursive1.xml").getroottree()
4832 with self.assertRaises(self.ElementInclude.LimitedRecursiveIncludeError) as cm:
4833 self.include(document, self.xinclude_loader, max_depth=2)
4834 self.assertEqual(str(cm.exception),
4835 "maximum xinclude depth reached when including file Recursive1.xml")
4837 document = self.xinclude_loader("Recursive1.xml").getroottree()
4838 with self.assertRaises(self.ElementInclude.FatalIncludeError) as cm:
4839 self.include(document, self.xinclude_loader, max_depth=3)
4840 self.assertEqual(str(cm.exception),
4841 "recursive include of 'Recursive2.xml' detected")
4843 def test_multiple_include_of_same_file(self):
4844 # Test that including the same file multiple times, but on the same level
4845 # is not detected as recursive include
4846 document = self.xinclude_loader("NonRecursive3.xml").getroottree()
4847 self.include(document, self.xinclude_loader)
4849 # same but for more than one level
4850 document = self.xinclude_loader("NonRecursive1.xml").getroottree()
4851 self.include(document, self.xinclude_loader)
4853 # same but no Leaf.xml in top-level file
4854 document = self.xinclude_loader("NonRecursive2.xml").getroottree()
4855 self.include(document, self.xinclude_loader)
4858 class ETreeC14NTestCase(HelperTestCase):
4859 def test_c14n(self):
4860 tree = self.parse(_bytes('<a><b/></a>'))
4864 self.assertEqual(_bytes('<a><b></b></a>'),
4867 def test_c14n_gzip(self):
4868 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4870 tree.write_c14n(f, compression=9)
4871 with gzip.GzipFile(fileobj=BytesIO(f.getvalue())) as gzfile:
4873 self.assertEqual(_bytes('<a>'+'<b></b>'*200+'</a>'),
4876 def test_c14n_file(self):
4877 tree = self.parse(_bytes('<a><b/></a>'))
4878 with tmpfile() as filename:
4879 tree.write_c14n(filename)
4880 data = read_file(filename, 'rb')
4881 self.assertEqual(_bytes('<a><b></b></a>'),
4884 def test_c14n_file_gzip(self):
4885 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4886 with tmpfile() as filename:
4887 tree.write_c14n(filename, compression=9)
4888 with gzip.open(filename, 'rb') as f:
4890 self.assertEqual(_bytes('<a>'+'<b></b>'*200+'</a>'),
4893 def test_c14n2_file_gzip(self):
4894 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4895 with tmpfile() as filename:
4896 tree.write(filename, method='c14n2', compression=9)
4897 with gzip.open(filename, 'rb') as f:
4899 self.assertEqual(_bytes('<a>'+'<b></b>'*200+'</a>'),
4902 def test_c14n2_with_text(self):
4904 b'<?xml version="1.0"?> <a> abc \n <b> btext </b> btail <c/> ctail </a> ')
4906 tree.write(f, method='c14n2')
4908 self.assertEqual(b'<a> abc \n <b> btext </b> btail <c></c> ctail </a>',
4912 tree.write(f, method='c14n2', strip_text=True)
4914 self.assertEqual(b'<a>abc<b>btext</b>btail<c></c>ctail</a>',
4917 def test_c14n_with_comments(self):
4918 tree = self.parse(_bytes('<!--hi--><a><!--ho--><b/></a><!--hu-->'))
4922 self.assertEqual(_bytes('<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->'),
4925 tree.write_c14n(f, with_comments=True)
4927 self.assertEqual(_bytes('<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->'),
4930 tree.write_c14n(f, with_comments=False)
4932 self.assertEqual(_bytes('<a><b></b></a>'),
4935 def test_c14n2_with_comments(self):
4936 tree = self.parse(b'<!--hi--> <a> <!-- ho --> <b/> </a> <!-- hu -->')
4938 b'<!--hi-->\n<a> <!-- ho --> <b></b> </a>\n<!-- hu -->',
4939 etree.tostring(tree, method='c14n2'))
4942 b'<!--hi-->\n<a> <!-- ho --> <b></b> </a>\n<!-- hu -->',
4943 etree.tostring(tree, method='c14n2', with_comments=True))
4946 b'<a> <b></b> </a>',
4947 etree.tostring(tree, method='c14n2', with_comments=False))
4949 def test_c14n2_with_comments_strip_text(self):
4950 tree = self.parse(b'<!--hi--> <a> <!-- ho --> <b/> </a> <!-- hu -->')
4952 b'<!--hi-->\n<a><!-- ho --><b></b></a>\n<!-- hu -->',
4953 etree.tostring(tree, method='c14n2', with_comments=True, strip_text=True))
4956 etree.tostring(tree, method='c14n2', with_comments=False, strip_text=True))
4958 def test_c14n_tostring_with_comments(self):
4959 tree = self.parse(_bytes('<!--hi--><a><!--ho--><b/></a><!--hu-->'))
4960 s = etree.tostring(tree, method='c14n')
4961 self.assertEqual(_bytes('<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->'),
4963 s = etree.tostring(tree, method='c14n', with_comments=True)
4964 self.assertEqual(_bytes('<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->'),
4966 s = etree.tostring(tree, method='c14n', with_comments=False)
4967 self.assertEqual(_bytes('<a><b></b></a>'),
4970 def test_c14n2_tostring_with_comments(self):
4971 tree = self.parse(b'<!--hi--><a><!--ho--><b/></a><!--hu-->')
4972 s = etree.tostring(tree, method='c14n2')
4973 self.assertEqual(b'<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->',
4975 s = etree.tostring(tree, method='c14n2', with_comments=True)
4976 self.assertEqual(b'<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->',
4978 s = etree.tostring(tree, method='c14n2', with_comments=False)
4979 self.assertEqual(b'<a><b></b></a>',
4982 def test_c14n_element_tostring_with_comments(self):
4983 tree = self.parse(_bytes('<!--hi--><a><!--ho--><b/></a><!--hu-->'))
4984 s = etree.tostring(tree.getroot(), method='c14n')
4985 self.assertEqual(_bytes('<a><!--ho--><b></b></a>'),
4987 s = etree.tostring(tree.getroot(), method='c14n', with_comments=True)
4988 self.assertEqual(_bytes('<a><!--ho--><b></b></a>'),
4990 s = etree.tostring(tree.getroot(), method='c14n', with_comments=False)
4991 self.assertEqual(_bytes('<a><b></b></a>'),
4994 def test_c14n_exclusive(self):
4995 tree = self.parse(_bytes(
4996 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
5000 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
5003 tree.write_c14n(f, exclusive=False)
5005 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
5008 tree.write_c14n(f, exclusive=True)
5010 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
5014 tree.write_c14n(f, exclusive=True, inclusive_ns_prefixes=['z'])
5016 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:z="http://cde"><z:b></z:b></a>'),
5019 def test_c14n_tostring_exclusive(self):
5020 tree = self.parse(_bytes(
5021 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
5022 s = etree.tostring(tree, method='c14n')
5023 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
5025 s = etree.tostring(tree, method='c14n', exclusive=False)
5026 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
5028 s = etree.tostring(tree, method='c14n', exclusive=True)
5029 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
5032 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
5033 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd"><z:b xmlns:z="http://cde"></z:b></a>'),
5036 def test_c14n_element_tostring_exclusive(self):
5037 tree = self.parse(_bytes(
5038 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
5039 s = etree.tostring(tree.getroot(), method='c14n')
5040 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
5042 s = etree.tostring(tree.getroot(), method='c14n', exclusive=False)
5043 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
5045 s = etree.tostring(tree.getroot(), method='c14n', exclusive=True)
5046 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
5049 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=False)
5050 self.assertEqual(_bytes('<z:b xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
5052 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True)
5053 self.assertEqual(_bytes('<z:b xmlns:z="http://cde"></z:b>'),
5056 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
5057 self.assertEqual(_bytes('<z:b xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
5060 def test_c14n_tostring_inclusive_ns_prefixes(self):
5061 """ Regression test to fix memory allocation issues (use 3+ inclusive NS spaces)"""
5062 tree = self.parse(_bytes(
5063 '<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
5065 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['x', 'y', 'z'])
5066 self.assertEqual(_bytes('<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
5070 class ETreeWriteTestCase(HelperTestCase):
5071 def test_write(self):
5072 tree = self.parse(_bytes('<a><b/></a>'))
5076 self.assertEqual(_bytes('<a><b/></a>'),
5079 def test_write_doctype(self):
5080 tree = self.parse(_bytes('<a><b/></a>'))
5082 tree.write(f, doctype='HUHU')
5084 self.assertEqual(_bytes('HUHU\n<a><b/></a>'),
5087 def test_write_gzip(self):
5088 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
5090 tree.write(f, compression=9)
5091 with gzip.GzipFile(fileobj=BytesIO(f.getvalue())) as gzfile:
5093 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
5096 def test_write_gzip_doctype(self):
5097 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
5099 tree.write(f, compression=9, doctype='<!DOCTYPE a>')
5100 with gzip.GzipFile(fileobj=BytesIO(f.getvalue())) as gzfile:
5102 self.assertEqual(_bytes('<!DOCTYPE a>\n<a>'+'<b/>'*200+'</a>'),
5105 def test_write_gzip_level(self):
5106 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
5108 tree.write(f, compression=0)
5113 self.assertEqual(f.getvalue(), s0)
5116 tree.write(f, compression=1)
5118 self.assertTrue(len(s) <= len(s0))
5119 with gzip.GzipFile(fileobj=BytesIO(s)) as gzfile:
5123 tree.write(f, compression=9)
5125 self.assertTrue(len(s) <= len(s0))
5126 with gzip.GzipFile(fileobj=BytesIO(s)) as gzfile:
5129 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
5131 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
5133 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
5136 def test_write_file(self):
5137 tree = self.parse(_bytes('<a><b/></a>'))
5138 with tmpfile() as filename:
5139 tree.write(filename)
5140 data = read_file(filename, 'rb')
5141 self.assertEqual(_bytes('<a><b/></a>'),
5144 def test_write_file_gzip(self):
5145 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
5146 with tmpfile() as filename:
5147 tree.write(filename, compression=9)
5148 with gzip.open(filename, 'rb') as f:
5150 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
5153 def test_write_file_gzip_parse(self):
5154 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
5155 with tmpfile() as filename:
5156 tree.write(filename, compression=9)
5157 data = etree.tostring(etree.parse(filename))
5158 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
5161 def test_write_file_gzipfile_parse(self):
5162 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
5163 with tmpfile() as filename:
5164 tree.write(filename, compression=9)
5165 with gzip.GzipFile(filename) as f:
5166 data = etree.tostring(etree.parse(f))
5167 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
5170 def test_write_file_url(self):
5171 xml = _bytes('<a>'+'<b/>'*200+'</a>')
5172 tree = self.parse(xml)
5173 with tmpfile(prefix="p+%20", suffix=".xml") as filename:
5174 url = 'file://' + (filename if sys.platform != 'win32'
5175 else '/' + filename.replace('\\', '/'))
5177 data = read_file(filename, 'rb').replace(_bytes('\n'), _bytes(''))
5178 self.assertEqual(data, xml)
5181 class ETreeErrorLogTest(HelperTestCase):
5184 def test_parse_error_logging(self):
5185 parse = self.etree.parse
5186 f = BytesIO('<a><b></c></b></a>')
5187 self.etree.clear_error_log()
5192 e = sys.exc_info()[1]
5195 self.assertTrue([ log for log in logs
5196 if 'mismatch' in log.message ])
5197 self.assertTrue([ log for log in logs
5198 if 'PARSER' in log.domain_name])
5199 self.assertTrue([ log for log in logs
5200 if 'ERR_TAG_NAME_MISMATCH' in log.type_name ])
5201 self.assertTrue([ log for log in logs
5203 self.assertTrue([ log for log in logs
5204 if 15 == log.column ])
5206 def _test_python_error_logging(self):
5207 """This can't really be tested as long as there isn't a way to
5208 reset the logging setup ...
5210 parse = self.etree.parse
5213 class Logger(self.etree.PyErrorLog):
5214 def log(self, entry, message, *args):
5215 messages.append(message)
5217 self.etree.use_global_python_log(Logger())
5218 f = BytesIO('<a><b></c></b></a>')
5225 self.assertTrue([ message for message in messages
5226 if 'mismatch' in message ])
5227 self.assertTrue([ message for message in messages
5228 if ':PARSER:' in message])
5229 self.assertTrue([ message for message in messages
5230 if ':ERR_TAG_NAME_MISMATCH:' in message ])
5231 self.assertTrue([ message for message in messages
5232 if ':1:15:' in message ])
5235 class XMLPullParserTest(unittest.TestCase):
5238 def assert_event_tags(self, events, expected):
5239 self.assertEqual([(action, elem.tag) for action, elem in events],
5242 def test_pull_from_simple_target(self):
5243 class Target(object):
5244 def start(self, tag, attrib):
5245 return 'start(%s)' % tag
5247 return 'end(%s)' % tag
5251 parser = self.etree.XMLPullParser(target=Target())
5252 events = parser.read_events()
5254 parser.feed('<root><element>')
5255 self.assertFalse(list(events))
5256 self.assertFalse(list(events))
5257 parser.feed('</element><child>')
5258 self.assertEqual([('end', 'end(element)')], list(events))
5259 parser.feed('</child>')
5260 self.assertEqual([('end', 'end(child)')], list(events))
5261 parser.feed('</root>')
5262 self.assertEqual([('end', 'end(root)')], list(events))
5263 self.assertFalse(list(events))
5264 self.assertEqual('close()', parser.close())
5266 def test_pull_from_simple_target_start_end(self):
5267 class Target(object):
5268 def start(self, tag, attrib):
5269 return 'start(%s)' % tag
5271 return 'end(%s)' % tag
5275 parser = self.etree.XMLPullParser(
5276 ['start', 'end'], target=Target())
5277 events = parser.read_events()
5279 parser.feed('<root><element>')
5281 [('start', 'start(root)'), ('start', 'start(element)')],
5283 self.assertFalse(list(events))
5284 parser.feed('</element><child>')
5286 [('end', 'end(element)'), ('start', 'start(child)')],
5288 parser.feed('</child>')
5290 [('end', 'end(child)')],
5292 parser.feed('</root>')
5294 [('end', 'end(root)')],
5296 self.assertFalse(list(events))
5297 self.assertEqual('close()', parser.close())
5299 def test_pull_from_tree_builder(self):
5300 parser = self.etree.XMLPullParser(
5301 ['start', 'end'], target=etree.TreeBuilder())
5302 events = parser.read_events()
5304 parser.feed('<root><element>')
5305 self.assert_event_tags(
5306 events, [('start', 'root'), ('start', 'element')])
5307 self.assertFalse(list(events))
5308 parser.feed('</element><child>')
5309 self.assert_event_tags(
5310 events, [('end', 'element'), ('start', 'child')])
5311 parser.feed('</child>')
5312 self.assert_event_tags(
5313 events, [('end', 'child')])
5314 parser.feed('</root>')
5315 self.assert_event_tags(
5316 events, [('end', 'root')])
5317 self.assertFalse(list(events))
5318 root = parser.close()
5319 self.assertEqual('root', root.tag)
5321 def test_pull_from_tree_builder_subclass(self):
5322 class Target(etree.TreeBuilder):
5324 el = super(Target, self).end(tag)
5328 parser = self.etree.XMLPullParser(
5329 ['start', 'end'], target=Target())
5330 events = parser.read_events()
5332 parser.feed('<root><element>')
5333 self.assert_event_tags(
5334 events, [('start', 'root'), ('start', 'element')])
5335 self.assertFalse(list(events))
5336 parser.feed('</element><child>')
5337 self.assert_event_tags(
5338 events, [('end', 'element-huhu'), ('start', 'child')])
5339 parser.feed('</child>')
5340 self.assert_event_tags(
5341 events, [('end', 'child-huhu')])
5342 parser.feed('</root>')
5343 self.assert_event_tags(
5344 events, [('end', 'root-huhu')])
5345 self.assertFalse(list(events))
5346 root = parser.close()
5347 self.assertEqual('root-huhu', root.tag)
5351 suite = unittest.TestSuite()
5352 suite.addTests([unittest.makeSuite(ETreeOnlyTestCase)])
5353 suite.addTests([unittest.makeSuite(ETreeXIncludeTestCase)])
5354 suite.addTests([unittest.makeSuite(ElementIncludeTestCase)])
5355 suite.addTests([unittest.makeSuite(ETreeC14NTestCase)])
5356 suite.addTests([unittest.makeSuite(ETreeWriteTestCase)])
5357 suite.addTests([unittest.makeSuite(ETreeErrorLogTest)])
5358 suite.addTests([unittest.makeSuite(XMLPullParserTest)])
5360 # add original doctests from ElementTree selftest modules
5361 from . import selftest, selftest2
5362 suite.addTests(doctest.DocTestSuite(selftest))
5363 suite.addTests(doctest.DocTestSuite(selftest2))
5366 suite.addTests(doctest.DocTestSuite(etree))
5368 [make_doctest('../../../doc/tutorial.txt')])
5370 [make_doctest('../../../doc/api.txt')])
5372 [make_doctest('../../../doc/FAQ.txt')])
5374 [make_doctest('../../../doc/parsing.txt')])
5376 [make_doctest('../../../doc/resolvers.txt')])
5380 if __name__ == '__main__':
5381 print('to test use test.py %s' % __file__)