Imported Upstream version 4.5.2
[platform/upstream/python-lxml.git] / src / lxml / tests / test_etree.py
1 # -*- coding: utf-8 -*-
2
3 """
4 Tests specific to the extended etree API
5
6 Tests that apply to the general ElementTree API should go into
7 test_elementtree
8 """
9
10 from __future__ import absolute_import
11
12 from collections import OrderedDict
13 import os.path
14 import unittest
15 import copy
16 import sys
17 import re
18 import gc
19 import operator
20 import textwrap
21 import zlib
22 import gzip
23
24 from .common_imports import etree, StringIO, BytesIO, HelperTestCase
25 from .common_imports import fileInTestDir, fileUrlInTestDir, read_file, path2url, tmpfile
26 from .common_imports import SillyFileLike, LargeFileLikeUnicode, doctest, make_doctest
27 from .common_imports import canonicalize, _str, _bytes
28
29 print("""
30 TESTED VERSION: %s""" % etree.__version__ + """
31     Python:           %r""" % (sys.version_info,) + """
32     lxml.etree:       %r""" % (etree.LXML_VERSION,) + """
33     libxml used:      %r""" % (etree.LIBXML_VERSION,) + """
34     libxml compiled:  %r""" % (etree.LIBXML_COMPILED_VERSION,) + """
35     libxslt used:     %r""" % (etree.LIBXSLT_VERSION,) + """
36     libxslt compiled: %r""" % (etree.LIBXSLT_COMPILED_VERSION,) + """
37     FS encoding:      %s""" % (sys.getfilesystemencoding(),) + """
38     Default encoding: %s""" % (sys.getdefaultencoding(),) + """
39     Max Unicode:      %s""" % (sys.maxunicode,) + """
40 """)
41
42 try:
43     _unicode = unicode
44 except NameError:
45     # Python 3
46     _unicode = str
47
48
49 class ETreeOnlyTestCase(HelperTestCase):
50     """Tests only for etree, not ElementTree"""
51     etree = etree
52
53     def test_version(self):
54         self.assertTrue(isinstance(etree.__version__, _unicode))
55         self.assertTrue(isinstance(etree.LXML_VERSION, tuple))
56         self.assertEqual(len(etree.LXML_VERSION), 4)
57         self.assertTrue(isinstance(etree.LXML_VERSION[0], int))
58         self.assertTrue(isinstance(etree.LXML_VERSION[1], int))
59         self.assertTrue(isinstance(etree.LXML_VERSION[2], int))
60         self.assertTrue(isinstance(etree.LXML_VERSION[3], int))
61         self.assertTrue(etree.__version__.startswith(
62             str(etree.LXML_VERSION[0])))
63
64     def test_c_api(self):
65         if hasattr(self.etree, '__pyx_capi__'):
66             # newer Pyrex compatible C-API
67             self.assertTrue(isinstance(self.etree.__pyx_capi__, dict))
68             self.assertTrue(len(self.etree.__pyx_capi__) > 0)
69         else:
70             # older C-API mechanism
71             self.assertTrue(hasattr(self.etree, '_import_c_api'))
72
73     def test_include_paths(self):
74         import lxml
75         includes = lxml.get_include()
76         self.assertTrue(includes)
77         self.assertTrue(len(includes) >= 2)
78         self.assertTrue(os.path.join(os.path.dirname(lxml.__file__), 'includes') in includes,
79                         includes)
80
81     def test_element_names(self):
82         Element = self.etree.Element
83         el = Element('name')
84         self.assertEqual(el.tag, 'name')
85         el = Element('{}name')
86         self.assertEqual(el.tag, 'name')
87
88     def test_element_name_empty(self):
89         Element = self.etree.Element
90         el = Element('name')
91         self.assertRaises(ValueError, Element, '{}')
92         self.assertRaises(ValueError, setattr, el, 'tag', '{}')
93
94         self.assertRaises(ValueError, Element, '{test}')
95         self.assertRaises(ValueError, setattr, el, 'tag', '{test}')
96
97     def test_element_name_colon(self):
98         Element = self.etree.Element
99         self.assertRaises(ValueError, Element, 'p:name')
100         self.assertRaises(ValueError, Element, '{test}p:name')
101
102         el = Element('name')
103         self.assertRaises(ValueError, setattr, el, 'tag', 'p:name')
104
105     def test_element_name_quote(self):
106         Element = self.etree.Element
107         self.assertRaises(ValueError, Element, "p'name")
108         self.assertRaises(ValueError, Element, 'p"name')
109
110         self.assertRaises(ValueError, Element, "{test}p'name")
111         self.assertRaises(ValueError, Element, '{test}p"name')
112
113         el = Element('name')
114         self.assertRaises(ValueError, setattr, el, 'tag', "p'name")
115         self.assertRaises(ValueError, setattr, el, 'tag', 'p"name')
116
117     def test_element_name_space(self):
118         Element = self.etree.Element
119         self.assertRaises(ValueError, Element, ' name ')
120         self.assertRaises(ValueError, Element, 'na me')
121         self.assertRaises(ValueError, Element, '{test} name')
122
123         el = Element('name')
124         self.assertRaises(ValueError, setattr, el, 'tag', ' name ')
125
126     def test_subelement_name_empty(self):
127         Element = self.etree.Element
128         SubElement = self.etree.SubElement
129
130         el = Element('name')
131         self.assertRaises(ValueError, SubElement, el, '{}')
132         self.assertRaises(ValueError, SubElement, el, '{test}')
133
134     def test_subelement_name_colon(self):
135         Element = self.etree.Element
136         SubElement = self.etree.SubElement
137
138         el = Element('name')
139         self.assertRaises(ValueError, SubElement, el, 'p:name')
140         self.assertRaises(ValueError, SubElement, el, '{test}p:name')
141
142     def test_subelement_name_quote(self):
143         Element = self.etree.Element
144         SubElement = self.etree.SubElement
145
146         el = Element('name')
147         self.assertRaises(ValueError, SubElement, el, "p'name")
148         self.assertRaises(ValueError, SubElement, el, "{test}p'name")
149
150         self.assertRaises(ValueError, SubElement, el, 'p"name')
151         self.assertRaises(ValueError, SubElement, el, '{test}p"name')
152
153     def test_subelement_name_space(self):
154         Element = self.etree.Element
155         SubElement = self.etree.SubElement
156
157         el = Element('name')
158         self.assertRaises(ValueError, SubElement, el, ' name ')
159         self.assertRaises(ValueError, SubElement, el, 'na me')
160         self.assertRaises(ValueError, SubElement, el, '{test} name')
161
162     def test_subelement_attribute_invalid(self):
163         Element = self.etree.Element
164         SubElement = self.etree.SubElement
165
166         el = Element('name')
167         self.assertRaises(ValueError, SubElement, el, 'name', {'a b c' : 'abc'})
168         self.assertRaises(ValueError, SubElement, el, 'name', {'a' : 'a\0\n'})
169         self.assertEqual(0, len(el))
170
171     def test_qname_empty(self):
172         QName = self.etree.QName
173         self.assertRaises(ValueError, QName, '')
174         self.assertRaises(ValueError, QName, None)
175         self.assertRaises(ValueError, QName, None, None)
176         self.assertRaises(ValueError, QName, 'test', '')
177
178     def test_qname_none(self):
179         QName = self.etree.QName
180         q = QName(None, 'TAG')
181         self.assertEqual('TAG', q)
182         self.assertEqual('TAG', q.localname)
183         self.assertEqual(None, q.namespace)
184
185     def test_qname_colon(self):
186         QName = self.etree.QName
187         self.assertRaises(ValueError, QName, 'p:name')
188         self.assertRaises(ValueError, QName, 'test', 'p:name')
189
190     def test_qname_space(self):
191         QName = self.etree.QName
192         self.assertRaises(ValueError, QName, ' name ')
193         self.assertRaises(ValueError, QName, 'na me')
194         self.assertRaises(ValueError, QName, 'test', ' name')
195
196     def test_qname_namespace_localname(self):
197         # ET doesn't have namespace/localname properties on QNames
198         QName = self.etree.QName
199         namespace, localname = 'http://myns', 'a'
200         qname = QName(namespace, localname)
201         self.assertEqual(namespace, qname.namespace)
202         self.assertEqual(localname, qname.localname)
203
204     def test_qname_element(self):
205         # ET doesn't have namespace/localname properties on QNames
206         QName = self.etree.QName
207         qname1 = QName('http://myns', 'a')
208         a = self.etree.Element(qname1, nsmap={'p' : 'http://myns'})
209
210         qname2 = QName(a)
211         self.assertEqual(a.tag, qname1.text)
212         self.assertEqual(a.tag, qname1)
213         self.assertEqual(qname1.text, qname2.text)
214         self.assertEqual(qname1, qname2.text)
215         self.assertEqual(qname1.text, qname2)
216         self.assertEqual(qname1, qname2)
217
218     def test_qname_text_resolve(self):
219         # ET doesn't resove QNames as text values
220         etree = self.etree
221         qname = etree.QName('http://myns', 'a')
222         a = etree.Element(qname, nsmap={'p' : 'http://myns'})
223         a.text = qname
224
225         self.assertEqual("p:a", a.text)
226
227     def test_nsmap_prefix_invalid(self):
228         etree = self.etree
229         self.assertRaises(ValueError,
230                           etree.Element, "root", nsmap={'"' : 'testns'})
231         self.assertRaises(ValueError,
232                           etree.Element, "root", nsmap={'&' : 'testns'})
233         self.assertRaises(ValueError,
234                           etree.Element, "root", nsmap={'a:b' : 'testns'})
235
236     def test_clear_keep_tail(self):
237         XML = self.etree.XML
238         tostring = self.etree.tostring
239         a = XML('<a aa="A"><b ba="B">B1</b>B2<c ca="C">C1</c>C2</a>')
240         a[0].clear(keep_tail=True)
241         self.assertEqual(_bytes('<a aa="A"><b/>B2<c ca="C">C1</c>C2</a>'), tostring(a))
242
243     def test_attribute_has_key(self):
244         # ET in Py 3.x has no "attrib.has_key()" method
245         XML = self.etree.XML
246
247         root = XML(_bytes('<foo bar="Bar" xmlns:ns="http://ns.codespeak.net/test" ns:baz="Baz" />'))
248         self.assertEqual(
249             True, root.attrib.has_key('bar'))
250         self.assertEqual(
251             False, root.attrib.has_key('baz'))
252         self.assertEqual(
253             False, root.attrib.has_key('hah'))
254         self.assertEqual(
255             True,
256             root.attrib.has_key('{http://ns.codespeak.net/test}baz'))
257
258     def test_attribute_set(self):
259         Element = self.etree.Element
260         root = Element("root")
261         root.set("attr", "TEST")
262         self.assertEqual("TEST", root.get("attr"))
263
264     def test_attribute_set_nonstring(self):
265         # ElementTree accepts arbitrary attribute values
266         # lxml.etree allows only strings
267         Element = self.etree.Element
268
269         root = Element("root")
270         root.set("attr", "TEST")
271         self.assertEqual("TEST", root.get("attr"))
272         self.assertRaises(TypeError, root.set, "newattr", 5)
273
274     def test_attrib_and_keywords(self):
275         Element = self.etree.Element
276
277         root = Element("root")
278         root.set("attr", "TEST")
279         self.assertEqual("TEST", root.attrib["attr"])
280
281         root2 = Element("root2", root.attrib, attr2='TOAST')
282         self.assertEqual("TEST", root2.attrib["attr"])
283         self.assertEqual("TOAST", root2.attrib["attr2"])
284         self.assertEqual(None, root.attrib.get("attr2"))
285
286     def test_attrib_order(self):
287         Element = self.etree.Element
288
289         keys = ["attr%d" % i for i in range(12, 4, -1)]
290         values = ["TEST-%d" % i for i in range(12, 4, -1)]
291         items = list(zip(keys, values))
292
293         root = Element("root")
294         for key, value in items:
295             root.set(key, value)
296         self.assertEqual(keys, root.attrib.keys())
297         self.assertEqual(values, root.attrib.values())
298
299         attr_order = [
300             ('attr_99', 'TOAST-1'),
301             ('attr_98', 'TOAST-2'),
302         ]
303         ordered_dict_types = [OrderedDict, lambda x:x]
304         if sys.version_info >= (3, 6):
305             ordered_dict_types.append(dict)
306         else:
307             # Keyword arguments are not ordered in Py<3.6, and thus get sorted.
308             attr_order.sort()
309         attr_order += items
310         expected_keys = [attr[0] for attr in attr_order]
311         expected_values = [attr[1] for attr in attr_order]
312         expected_items = list(zip(expected_keys, expected_values))
313
314         for dict_type in ordered_dict_types:
315             root2 = Element("root2", dict_type(root.attrib),
316                             attr_99='TOAST-1', attr_98='TOAST-2')
317
318             try:
319                 self.assertSequenceEqual(expected_keys, root2.attrib.keys())
320                 self.assertSequenceEqual(expected_values, root2.attrib.values())
321                 self.assertSequenceEqual(expected_items, root2.attrib.items())
322             except AssertionError as exc:
323                 exc.args = ("Order of '%s': %s" % (dict_type.__name__, exc.args[0]),) + exc.args[1:]
324                 raise
325
326         self.assertEqual(keys, root.attrib.keys())
327         self.assertEqual(values, root.attrib.values())
328
329     def test_attribute_set_invalid(self):
330         # ElementTree accepts arbitrary attribute values
331         # lxml.etree allows only strings, or None for (html5) boolean attributes
332         Element = self.etree.Element
333         root = Element("root")
334         self.assertRaises(TypeError, root.set, "newattr", 5)
335         self.assertRaises(TypeError, root.set, "newattr", object)
336         self.assertRaises(TypeError, root.set, "newattr", None)
337         self.assertRaises(TypeError, root.set, "newattr")
338
339     def test_strip_attributes(self):
340         XML = self.etree.XML
341         xml = _bytes('<test a="5" b="10" c="20"><x a="4" b="2"/></test>')
342
343         root = XML(xml)
344         self.etree.strip_attributes(root, 'a')
345         self.assertEqual(_bytes('<test b="10" c="20"><x b="2"></x></test>'),
346                           self._writeElement(root))
347
348         root = XML(xml)
349         self.etree.strip_attributes(root, 'b', 'c')
350         self.assertEqual(_bytes('<test a="5"><x a="4"></x></test>'),
351                           self._writeElement(root))
352
353     def test_strip_attributes_ns(self):
354         XML = self.etree.XML
355         xml = _bytes('<test xmlns:n="http://test/ns" a="6" b="10" c="20" n:a="5"><x a="4" n:b="2"/></test>')
356
357         root = XML(xml)
358         self.etree.strip_attributes(root, 'a')
359         self.assertEqual(
360             _bytes('<test xmlns:n="http://test/ns" b="10" c="20" n:a="5"><x n:b="2"></x></test>'),
361             self._writeElement(root))
362
363         root = XML(xml)
364         self.etree.strip_attributes(root, '{http://test/ns}a', 'c')
365         self.assertEqual(
366             _bytes('<test xmlns:n="http://test/ns" a="6" b="10"><x a="4" n:b="2"></x></test>'),
367             self._writeElement(root))
368
369         root = XML(xml)
370         self.etree.strip_attributes(root, '{http://test/ns}*')
371         self.assertEqual(
372             _bytes('<test xmlns:n="http://test/ns" a="6" b="10" c="20"><x a="4"></x></test>'),
373             self._writeElement(root))
374
375     def test_strip_elements(self):
376         XML = self.etree.XML
377         xml = _bytes('<test><a><b><c/></b></a><x><a><b/><c/></a></x></test>')
378
379         root = XML(xml)
380         self.etree.strip_elements(root, 'a')
381         self.assertEqual(_bytes('<test><x></x></test>'),
382                           self._writeElement(root))
383
384         root = XML(xml)
385         self.etree.strip_elements(root, 'b', 'c', 'X', 'Y', 'Z')
386         self.assertEqual(_bytes('<test><a></a><x><a></a></x></test>'),
387                           self._writeElement(root))
388
389         root = XML(xml)
390         self.etree.strip_elements(root, 'c')
391         self.assertEqual(_bytes('<test><a><b></b></a><x><a><b></b></a></x></test>'),
392                           self._writeElement(root))
393
394     def test_strip_elements_ns(self):
395         XML = self.etree.XML
396         xml = _bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"/>C</b>BT</n:a>AT<x>X<a>A<b xmlns="urn:a"/>BT<c xmlns="urn:x"/>CT</a>AT</x>XT</test>')
397
398         root = XML(xml)
399         self.etree.strip_elements(root, 'a')
400         self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X</x>XT</test>'),
401                           self._writeElement(root))
402
403         root = XML(xml)
404         self.etree.strip_elements(root, '{urn:a}b', 'c')
405         self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
406                           self._writeElement(root))
407
408         root = XML(xml)
409         self.etree.strip_elements(root, '{urn:a}*', 'c')
410         self.assertEqual(_bytes('<test>TEST<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
411                           self._writeElement(root))
412
413         root = XML(xml)
414         self.etree.strip_elements(root, '{urn:a}*', 'c', with_tail=False)
415         self.assertEqual(_bytes('<test>TESTAT<x>X<a>ABT<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
416                           self._writeElement(root))
417
418     def test_strip_tags(self):
419         XML = self.etree.XML
420         xml = _bytes('<test>TEST<a>A<b>B<c/>CT</b>BT</a>AT<x>X<a>A<b/>BT<c/>CT</a>AT</x>XT</test>')
421
422         root = XML(xml)
423         self.etree.strip_tags(root, 'a')
424         self.assertEqual(_bytes('<test>TESTA<b>B<c></c>CT</b>BTAT<x>XA<b></b>BT<c></c>CTAT</x>XT</test>'),
425                           self._writeElement(root))
426
427         root = XML(xml)
428         self.etree.strip_tags(root, 'b', 'c', 'X', 'Y', 'Z')
429         self.assertEqual(_bytes('<test>TEST<a>ABCTBT</a>AT<x>X<a>ABTCT</a>AT</x>XT</test>'),
430                           self._writeElement(root))
431
432         root = XML(xml)
433         self.etree.strip_tags(root, 'c')
434         self.assertEqual(_bytes('<test>TEST<a>A<b>BCT</b>BT</a>AT<x>X<a>A<b></b>BTCT</a>AT</x>XT</test>'),
435                           self._writeElement(root))
436
437     def test_strip_tags_pi_comment(self):
438         XML = self.etree.XML
439         PI = self.etree.ProcessingInstruction
440         Comment = self.etree.Comment
441         xml = _bytes('<!--comment1-->\n<?PI1?>\n<test>TEST<!--comment2-->XT<?PI2?></test>\n<!--comment3-->\n<?PI1?>')
442
443         root = XML(xml)
444         self.etree.strip_tags(root, PI)
445         self.assertEqual(_bytes('<!--comment1-->\n<?PI1?>\n<test>TEST<!--comment2-->XT</test>\n<!--comment3-->\n<?PI1?>'),
446                           self._writeElement(root))
447
448         root = XML(xml)
449         self.etree.strip_tags(root, Comment)
450         self.assertEqual(_bytes('<!--comment1-->\n<?PI1?>\n<test>TESTXT<?PI2?></test>\n<!--comment3-->\n<?PI1?>'),
451                           self._writeElement(root))
452
453         root = XML(xml)
454         self.etree.strip_tags(root, PI, Comment)
455         self.assertEqual(_bytes('<!--comment1-->\n<?PI1?>\n<test>TESTXT</test>\n<!--comment3-->\n<?PI1?>'),
456                           self._writeElement(root))
457
458         root = XML(xml)
459         self.etree.strip_tags(root, Comment, PI)
460         self.assertEqual(_bytes('<!--comment1-->\n<?PI1?>\n<test>TESTXT</test>\n<!--comment3-->\n<?PI1?>'),
461                           self._writeElement(root))
462
463     def test_strip_tags_pi_comment_all(self):
464         XML = self.etree.XML
465         ElementTree = self.etree.ElementTree
466         PI = self.etree.ProcessingInstruction
467         Comment = self.etree.Comment
468         xml = _bytes('<!--comment1-->\n<?PI1?>\n<test>TEST<!--comment2-->XT<?PI2?></test>\n<!--comment3-->\n<?PI1?>')
469
470         root = XML(xml)
471         self.etree.strip_tags(ElementTree(root), PI)
472         self.assertEqual(_bytes('<!--comment1-->\n<test>TEST<!--comment2-->XT</test>\n<!--comment3-->'),
473                           self._writeElement(root))
474
475         root = XML(xml)
476         self.etree.strip_tags(ElementTree(root), Comment)
477         self.assertEqual(_bytes('<?PI1?>\n<test>TESTXT<?PI2?></test>\n<?PI1?>'),
478                           self._writeElement(root))
479
480         root = XML(xml)
481         self.etree.strip_tags(ElementTree(root), PI, Comment)
482         self.assertEqual(_bytes('<test>TESTXT</test>'),
483                           self._writeElement(root))
484
485         root = XML(xml)
486         self.etree.strip_tags(ElementTree(root), Comment, PI)
487         self.assertEqual(_bytes('<test>TESTXT</test>'),
488                           self._writeElement(root))
489
490     def test_strip_tags_doc_style(self):
491         XML = self.etree.XML
492         xml = _bytes('''
493         <div>
494             <div>
495                 I like <strong>sheep</strong>.
496                 <br/>
497                 I like lots of <strong>sheep</strong>.
498                 <br/>
499                 Click <a href="http://www.sheep.com">here</a>
500                  for <a href="http://www.sheep.com">those</a> sheep.
501                 <br/>
502             </div>
503         </div>
504         '''.strip())
505
506         root = XML(xml)
507         self.etree.strip_tags(root, 'a')
508         self.assertEqual(re.sub(_bytes('</?a[^>]*>'), _bytes(''), xml).replace(_bytes('<br/>'), _bytes('<br></br>')),
509                           self._writeElement(root))
510
511         root = XML(xml)
512         self.etree.strip_tags(root, 'a', 'br')
513         self.assertEqual(re.sub(_bytes('</?a[^>]*>'), _bytes(''),
514                                  re.sub(_bytes('<br[^>]*>'), _bytes(''), xml)),
515                           self._writeElement(root))
516
517     def test_strip_tags_ns(self):
518         XML = self.etree.XML
519         xml = _bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"/>CT</b>BT</n:a>AT<x>X<a>A<b xmlns="urn:a"/>BT<c xmlns="urn:x"/>CT</a>AT</x>XT</test>')
520
521         root = XML(xml)
522         self.etree.strip_tags(root, 'a')
523         self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>CT</b>BT</n:a>AT<x>XA<b xmlns="urn:a"></b>BT<c xmlns="urn:x"></c>CTAT</x>XT</test>'),
524                           self._writeElement(root))
525
526         root = XML(xml)
527         self.etree.strip_tags(root, '{urn:a}b', 'c')
528         self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>CT</b>BT</n:a>AT<x>X<a>ABT<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
529                           self._writeElement(root))
530
531         root = XML(xml)
532         self.etree.strip_tags(root, '{urn:a}*', 'c')
533         self.assertEqual(_bytes('<test>TESTA<b>B<c xmlns="urn:c"></c>CT</b>BTAT<x>X<a>ABT<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
534                           self._writeElement(root))
535
536     def test_strip_tags_and_remove(self):
537         # previously crashed
538         HTML = self.etree.HTML
539         root = HTML(_bytes('<div><h1>title</h1> <b>foo</b> <p>boo</p></div>'))[0][0]
540         self.assertEqual(_bytes('<div><h1>title</h1> <b>foo</b> <p>boo</p></div>'),
541                           self.etree.tostring(root))
542         self.etree.strip_tags(root, 'b')
543         self.assertEqual(_bytes('<div><h1>title</h1> foo <p>boo</p></div>'),
544                           self.etree.tostring(root))
545         root.remove(root[0])
546         self.assertEqual(_bytes('<div><p>boo</p></div>'),
547                           self.etree.tostring(root))
548
549     def test_pi(self):
550         # lxml.etree separates target and text
551         Element = self.etree.Element
552         SubElement = self.etree.SubElement
553         ProcessingInstruction = self.etree.ProcessingInstruction
554
555         a = Element('a')
556         a.append(ProcessingInstruction('foo', 'some more text'))
557         self.assertEqual(a[0].target, 'foo')
558         self.assertEqual(a[0].text, 'some more text')
559
560     def test_pi_parse(self):
561         XML = self.etree.XML
562         root = XML(_bytes("<test><?mypi my test ?></test>"))
563         self.assertEqual(root[0].target, "mypi")
564         self.assertEqual(root[0].text, "my test ")
565
566     def test_pi_pseudo_attributes_get(self):
567         XML = self.etree.XML
568         root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
569         self.assertEqual(root[0].target, "mypi")
570         self.assertEqual(root[0].get('my'), "1")
571         self.assertEqual(root[0].get('test'), " abc ")
572         self.assertEqual(root[0].get('quotes'), "' '")
573         self.assertEqual(root[0].get('only'), None)
574         self.assertEqual(root[0].get('names'), None)
575         self.assertEqual(root[0].get('nope'), None)
576
577     def test_pi_pseudo_attributes_attrib(self):
578         XML = self.etree.XML
579         root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
580         self.assertEqual(root[0].target, "mypi")
581         self.assertEqual(root[0].attrib['my'], "1")
582         self.assertEqual(root[0].attrib['test'], " abc ")
583         self.assertEqual(root[0].attrib['quotes'], "' '")
584         self.assertRaises(KeyError, root[0].attrib.__getitem__, 'only')
585         self.assertRaises(KeyError, root[0].attrib.__getitem__, 'names')
586         self.assertRaises(KeyError, root[0].attrib.__getitem__, 'nope')
587
588     def test_deepcopy_pi(self):
589         # previously caused a crash
590         ProcessingInstruction = self.etree.ProcessingInstruction
591         
592         a = ProcessingInstruction("PI", "ONE")
593         b = copy.deepcopy(a)
594         b.text = "ANOTHER"
595
596         self.assertEqual('ONE',     a.text)
597         self.assertEqual('ANOTHER', b.text)
598
599     def test_deepcopy_elementtree_pi(self):
600         XML = self.etree.XML
601         tostring = self.etree.tostring
602         root = XML(_bytes("<?mypi my test ?><test/><!--comment -->"))
603         tree1 = self.etree.ElementTree(root)
604         self.assertEqual(_bytes("<?mypi my test ?><test/><!--comment -->"),
605                           tostring(tree1))
606
607         tree2 = copy.deepcopy(tree1)
608         self.assertEqual(_bytes("<?mypi my test ?><test/><!--comment -->"),
609                           tostring(tree2))
610
611         root2 = copy.deepcopy(tree1.getroot())
612         self.assertEqual(_bytes("<test/>"),
613                           tostring(root2))
614
615     def test_deepcopy_elementtree_dtd(self):
616         XML = self.etree.XML
617         tostring = self.etree.tostring
618         xml = _bytes('<!DOCTYPE test [\n<!ENTITY entity "tasty">\n]>\n<test/>')
619         root = XML(xml)
620         tree1 = self.etree.ElementTree(root)
621         self.assertEqual(xml, tostring(tree1))
622
623         tree2 = copy.deepcopy(tree1)
624         self.assertEqual(xml, tostring(tree2))
625
626         root2 = copy.deepcopy(tree1.getroot())
627         self.assertEqual(_bytes("<test/>"),
628                           tostring(root2))
629
630     def test_deepcopy_pi_dtd(self):
631         XML = self.etree.XML
632         tostring = self.etree.tostring
633         xml = _bytes('<!-- comment --><!DOCTYPE test [\n<!ENTITY entity "tasty">\n]>\n<test/>')
634         root = XML(xml)
635         tree1 = self.etree.ElementTree(root)
636         self.assertEqual(xml, tostring(tree1))
637
638         tree2 = copy.deepcopy(tree1)
639         self.assertEqual(xml, tostring(tree2))
640
641     def test_parse_remove_comments(self):
642         fromstring = self.etree.fromstring
643         tostring = self.etree.tostring
644         XMLParser = self.etree.XMLParser
645
646         xml = _bytes('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
647         parser = XMLParser(remove_comments=True)
648         root = fromstring(xml, parser)
649         self.assertEqual(
650             _bytes('<a><b><c/></b></a>'),
651             tostring(root))
652
653     def test_parse_remove_pis(self):
654         parse = self.etree.parse
655         tostring = self.etree.tostring
656         XMLParser = self.etree.XMLParser
657
658         xml = _bytes('<?test?><a><?A?><b><?B?><c/></b><?C?></a><?tail?>')
659
660         f = BytesIO(xml)
661         tree = parse(f)
662         self.assertEqual(
663             xml,
664             tostring(tree))
665
666         parser = XMLParser(remove_pis=True)
667         tree = parse(f, parser)
668         self.assertEqual(
669             _bytes('<a><b><c/></b></a>'),
670             tostring(tree))
671
672     def test_parse_parser_type_error(self):
673         # ET raises IOError only
674         parse = self.etree.parse
675         self.assertRaises(TypeError, parse, 'notthere.xml', object())
676
677     def test_iterparse_getiterator(self):
678         iterparse = self.etree.iterparse
679         f = BytesIO('<a><b><d/></b><c/></a>')
680
681         counts = []
682         for event, elem in iterparse(f):
683             counts.append(len(list(elem.getiterator())))
684         self.assertEqual(
685             [1,2,1,4],
686             counts)
687
688     def test_iterparse_tree_comments(self):
689         # ET removes comments
690         iterparse = self.etree.iterparse
691         tostring = self.etree.tostring
692
693         f = BytesIO('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
694         events = list(iterparse(f))
695         root = events[-1][1]
696         self.assertEqual(3, len(events))
697         self.assertEqual(
698             _bytes('<a><!--A--><b><!-- B --><c/></b><!--C--></a>'),
699             tostring(root))
700
701     def test_iterparse_comments(self):
702         # ET removes comments
703         iterparse = self.etree.iterparse
704         tostring = self.etree.tostring
705
706         def name(event, el):
707             if event == 'comment':
708                 return el.text
709             else:
710                 return el.tag
711
712         f = BytesIO('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
713         events = list(iterparse(f, events=('end', 'comment')))
714         root = events[-1][1]
715         self.assertEqual(6, len(events))
716         self.assertEqual(['A', ' B ', 'c', 'b', 'C', 'a'],
717                           [ name(*item) for item in events ])
718         self.assertEqual(
719             _bytes('<a><!--A--><b><!-- B --><c/></b><!--C--></a>'),
720             tostring(root))
721
722     def test_iterparse_pis(self):
723         # ET removes pis
724         iterparse = self.etree.iterparse
725         tostring = self.etree.tostring
726         ElementTree = self.etree.ElementTree
727
728         def name(event, el):
729             if event == 'pi':
730                 return el.target, el.text
731             else:
732                 return el.tag
733
734         f = BytesIO('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>')
735         events = list(iterparse(f, events=('end', 'pi')))
736         root = events[-2][1]
737         self.assertEqual(8, len(events))
738         self.assertEqual([('pia','a'), ('pib','b'), ('pic','c'), 'c', 'b',
739                            ('pid','d'), 'a', ('pie','e')],
740                           [ name(*item) for item in events ])
741         self.assertEqual(
742             _bytes('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>'),
743             tostring(ElementTree(root)))
744
745     def test_iterparse_remove_comments(self):
746         iterparse = self.etree.iterparse
747         tostring = self.etree.tostring
748
749         f = BytesIO('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
750         events = list(iterparse(f, remove_comments=True,
751                                 events=('end', 'comment')))
752         root = events[-1][1]
753         self.assertEqual(3, len(events))
754         self.assertEqual(['c', 'b', 'a'],
755                           [ el.tag for (event, el) in events ])
756         self.assertEqual(
757             _bytes('<a><b><c/></b></a>'),
758             tostring(root))
759
760     def test_iterparse_broken(self):
761         iterparse = self.etree.iterparse
762         f = BytesIO('<a><b><c/></a>')
763         # ET raises ExpatError, lxml raises XMLSyntaxError
764         self.assertRaises(self.etree.XMLSyntaxError, list, iterparse(f))
765
766     def test_iterparse_broken_recover(self):
767         iterparse = self.etree.iterparse
768         f = BytesIO('<a><b><c/></a>')
769         it = iterparse(f, events=('start', 'end'), recover=True)
770         events = [(ev, el.tag) for ev, el in it]
771         root = it.root
772         self.assertTrue(root is not None)
773
774         self.assertEqual(1, events.count(('start', 'a')))
775         self.assertEqual(1, events.count(('end', 'a')))
776
777         self.assertEqual(1, events.count(('start', 'b')))
778         self.assertEqual(1, events.count(('end', 'b')))
779
780         self.assertEqual(1, events.count(('start', 'c')))
781         self.assertEqual(1, events.count(('end', 'c')))
782
783     def test_iterparse_broken_multi_recover(self):
784         iterparse = self.etree.iterparse
785         f = BytesIO('<a><b><c/></d><b><c/></a></b>')
786         it = iterparse(f, events=('start', 'end'), recover=True)
787         events = [(ev, el.tag) for ev, el in it]
788         root = it.root
789         self.assertTrue(root is not None)
790
791         self.assertEqual(1, events.count(('start', 'a')))
792         self.assertEqual(1, events.count(('end', 'a')))
793
794         self.assertEqual(2, events.count(('start', 'b')))
795         self.assertEqual(2, events.count(('end', 'b')))
796
797         self.assertEqual(2, events.count(('start', 'c')))
798         self.assertEqual(2, events.count(('end', 'c')))
799
800     def test_iterparse_strip(self):
801         iterparse = self.etree.iterparse
802         f = BytesIO("""
803                <a>  \n \n  <b> b test </b>  \n
804
805                \n\t <c> \n </c> </a>  \n """)
806         iterator = iterparse(f, remove_blank_text=True)
807         text = [ (element.text, element.tail)
808                  for event, element in iterator ]
809         self.assertEqual(
810             [(" b test ", None), (" \n ", None), (None, None)],
811             text)
812
813     def test_iterparse_tag(self):
814         iterparse = self.etree.iterparse
815         f = BytesIO('<a><b><d/></b><c/></a>')
816
817         iterator = iterparse(f, tag="b", events=('start', 'end'))
818         events = list(iterator)
819         root = iterator.root
820         self.assertEqual(
821             [('start', root[0]), ('end', root[0])],
822             events)
823
824     def test_iterparse_tag_all(self):
825         iterparse = self.etree.iterparse
826         f = BytesIO('<a><b><d/></b><c/></a>')
827
828         iterator = iterparse(f, tag="*", events=('start', 'end'))
829         events = list(iterator)
830         self.assertEqual(
831             8,
832             len(events))
833
834     def test_iterparse_tag_ns(self):
835         iterparse = self.etree.iterparse
836         f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
837
838         iterator = iterparse(f, tag="{urn:test:1}b", events=('start', 'end'))
839         events = list(iterator)
840         root = iterator.root
841         self.assertEqual(
842             [('start', root[0]), ('end', root[0])],
843             events)
844
845     def test_iterparse_tag_ns_empty(self):
846         iterparse = self.etree.iterparse
847         f = BytesIO('<a><b><d/></b><c/></a>')
848         iterator = iterparse(f, tag="{}b", events=('start', 'end'))
849         events = list(iterator)
850         root = iterator.root
851         self.assertEqual(
852             [('start', root[0]), ('end', root[0])],
853             events)
854
855         f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
856         iterator = iterparse(f, tag="{}b", events=('start', 'end'))
857         events = list(iterator)
858         root = iterator.root
859         self.assertEqual([], events)
860
861     def test_iterparse_tag_ns_all(self):
862         iterparse = self.etree.iterparse
863         f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
864         iterator = iterparse(f, tag="{urn:test:1}*", events=('start', 'end'))
865         events = list(iterator)
866         self.assertEqual(8, len(events))
867
868     def test_iterparse_tag_ns_empty_all(self):
869         iterparse = self.etree.iterparse
870         f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
871         iterator = iterparse(f, tag="{}*", events=('start', 'end'))
872         events = list(iterator)
873         self.assertEqual([], events)
874
875         f = BytesIO('<a><b><d/></b><c/></a>')
876         iterator = iterparse(f, tag="{}*", events=('start', 'end'))
877         events = list(iterator)
878         self.assertEqual(8, len(events))
879
880     def test_iterparse_encoding_error(self):
881         text = _str('Søk pÃ¥ nettet')
882         wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
883         xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
884                       ).encode('iso-8859-1')
885
886         self.assertRaises(self.etree.ParseError,
887                           list, self.etree.iterparse(BytesIO(xml_latin1)))
888
889     def test_iterparse_encoding_8bit_override(self):
890         text = _str('Søk pÃ¥ nettet', encoding="UTF-8")
891         wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
892         xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
893                       ).encode('iso-8859-1')
894
895         iterator = self.etree.iterparse(BytesIO(xml_latin1),
896                                         encoding="iso-8859-1")
897         self.assertEqual(1, len(list(iterator)))
898
899         a = iterator.root
900         self.assertEqual(a.text, text)
901
902     def test_iterparse_keep_cdata(self):
903         tostring = self.etree.tostring
904         f = BytesIO('<root><![CDATA[test]]></root>')
905         context = self.etree.iterparse(f, strip_cdata=False)
906         content = [ el.text for event,el in context ]
907
908         self.assertEqual(['test'], content)
909         self.assertEqual(_bytes('<root><![CDATA[test]]></root>'),
910                           tostring(context.root))
911
912     def test_parser_encoding_unknown(self):
913         self.assertRaises(
914             LookupError, self.etree.XMLParser, encoding="hopefully unknown")
915
916     def test_parser_encoding(self):
917         self.etree.XMLParser(encoding="ascii")
918         self.etree.XMLParser(encoding="utf-8")
919         self.etree.XMLParser(encoding="iso-8859-1")
920
921     def test_feed_parser_recover(self):
922         parser = self.etree.XMLParser(recover=True)
923
924         parser.feed('<?xml version=')
925         parser.feed('"1.0"?><ro')
926         parser.feed('ot><')
927         parser.feed('a test="works"')
928         parser.feed('><othertag/></root') # <a> not closed!
929         parser.feed('>')
930
931         root = parser.close()
932
933         self.assertEqual(root.tag, "root")
934         self.assertEqual(len(root), 1)
935         self.assertEqual(root[0].tag, "a")
936         self.assertEqual(root[0].get("test"), "works")
937         self.assertEqual(len(root[0]), 1)
938         self.assertEqual(root[0][0].tag, "othertag")
939         # FIXME: would be nice to get some errors logged ...
940         #self.assertTrue(len(parser.error_log) > 0, "error log is empty")
941
942     def test_feed_parser_recover_no_id_dict(self):
943         # test that recover mode plays nicely with the no-id-dict setup
944         parser = self.etree.XMLParser(recover=True, collect_ids=False)
945
946         parser.feed('<?xml version=')
947         parser.feed('"1.0"?><ro')
948         parser.feed('ot xml:id="123"><')
949         parser.feed('a test="works" xml:id=')
950         parser.feed('"321"><othertag/></root') # <a> not closed!
951         parser.feed('>')
952
953         root = parser.close()
954
955         self.assertEqual(root.tag, "root")
956         self.assertEqual(len(root), 1)
957         self.assertEqual(root[0].tag, "a")
958         self.assertEqual(root[0].get("test"), "works")
959         self.assertEqual(root[0].attrib, {
960             'test': 'works',
961             '{http://www.w3.org/XML/1998/namespace}id': '321'})
962         self.assertEqual(len(root[0]), 1)
963         self.assertEqual(root[0][0].tag, "othertag")
964         # FIXME: would be nice to get some errors logged ...
965         #self.assertTrue(len(parser.error_log) > 0, "error log is empty")
966
967     def test_elementtree_parser_target_type_error(self):
968         assertEqual = self.assertEqual
969         assertFalse  = self.assertFalse
970
971         events = []
972         class Target(object):
973             def start(self, tag, attrib):
974                 events.append("start")
975                 assertFalse(attrib)
976                 assertEqual("TAG", tag)
977             def end(self, tag):
978                 events.append("end")
979                 assertEqual("TAG", tag)
980             def close(self):
981                 return "DONE" # no Element!
982
983         parser = self.etree.XMLParser(target=Target())
984         tree = self.etree.ElementTree()
985
986         self.assertRaises(TypeError,
987                           tree.parse, BytesIO("<TAG/>"), parser=parser)
988         self.assertEqual(["start", "end"], events)
989
990     def test_parser_target_feed_exception(self):
991         # ET doesn't call .close() on errors
992         events = []
993         class Target(object):
994             def start(self, tag, attrib):
995                 events.append("start-" + tag)
996             def end(self, tag):
997                 events.append("end-" + tag)
998                 if tag == 'a':
999                     raise ValueError("dead and gone")
1000             def data(self, data):
1001                 events.append("data-" + data)
1002             def close(self):
1003                 events.append("close")
1004                 return "DONE"
1005
1006         parser = self.etree.XMLParser(target=Target())
1007
1008         try:
1009             parser.feed(_bytes('<root>A<a>ca</a>B</root>'))
1010             done = parser.close()
1011             self.fail("error expected, but parsing succeeded")
1012         except ValueError:
1013             done = 'value error received as expected'
1014
1015         self.assertEqual(["start-root", "data-A", "start-a",
1016                            "data-ca", "end-a", "close"],
1017                           events)
1018
1019     def test_parser_target_fromstring_exception(self):
1020         # ET doesn't call .close() on errors
1021         events = []
1022         class Target(object):
1023             def start(self, tag, attrib):
1024                 events.append("start-" + tag)
1025             def end(self, tag):
1026                 events.append("end-" + tag)
1027                 if tag == 'a':
1028                     raise ValueError("dead and gone")
1029             def data(self, data):
1030                 events.append("data-" + data)
1031             def close(self):
1032                 events.append("close")
1033                 return "DONE"
1034
1035         parser = self.etree.XMLParser(target=Target())
1036
1037         try:
1038             done = self.etree.fromstring(_bytes('<root>A<a>ca</a>B</root>'),
1039                                          parser=parser)
1040             self.fail("error expected, but parsing succeeded")
1041         except ValueError:
1042             done = 'value error received as expected'
1043
1044         self.assertEqual(["start-root", "data-A", "start-a",
1045                           "data-ca", "end-a", "close"],
1046                          events)
1047
1048     def test_parser_target_feed_no_id_dict(self):
1049         # test that target parsing works nicely with the no-id-hash setup
1050         events = []
1051         class Target(object):
1052             def start(self, tag, attrib):
1053                 events.append("start-" + tag)
1054             def end(self, tag):
1055                 events.append("end-" + tag)
1056             def data(self, data):
1057                 events.append("data-" + data)
1058             def comment(self, text):
1059                 events.append("comment-" + text)
1060             def close(self):
1061                 return "DONE"
1062
1063         parser = self.etree.XMLParser(target=Target(), collect_ids=False)
1064
1065         parser.feed(_bytes('<!--a--><root xml:id="123">A<!--b-->'))
1066         parser.feed(_bytes('<sub xml:id="321"/>B</root>'))
1067         done = parser.close()
1068
1069         self.assertEqual("DONE", done)
1070         self.assertEqual(["comment-a", "start-root", "data-A", "comment-b",
1071                           "start-sub", "end-sub", "data-B", "end-root"],
1072                          events)
1073
1074     def test_parser_target_comment(self):
1075         events = []
1076         class Target(object):
1077             def start(self, tag, attrib):
1078                 events.append("start-" + tag)
1079             def end(self, tag):
1080                 events.append("end-" + tag)
1081             def data(self, data):
1082                 events.append("data-" + data)
1083             def comment(self, text):
1084                 events.append("comment-" + text)
1085             def close(self):
1086                 return "DONE"
1087
1088         parser = self.etree.XMLParser(target=Target())
1089
1090         parser.feed(_bytes('<!--a--><root>A<!--b--><sub/><!--c-->B</root><!--d-->'))
1091         done = parser.close()
1092
1093         self.assertEqual("DONE", done)
1094         self.assertEqual(["comment-a", "start-root", "data-A", "comment-b",
1095                            "start-sub", "end-sub", "comment-c", "data-B",
1096                            "end-root", "comment-d"],
1097                           events)
1098
1099     def test_parser_target_pi(self):
1100         events = []
1101         class Target(object):
1102             def start(self, tag, attrib):
1103                 events.append("start-" + tag)
1104             def end(self, tag):
1105                 events.append("end-" + tag)
1106             def data(self, data):
1107                 events.append("data-" + data)
1108             def pi(self, target, data):
1109                 events.append("pi-" + target + "-" + data)
1110             def close(self):
1111                 return "DONE"
1112
1113         parser = self.etree.XMLParser(target=Target())
1114
1115         parser.feed(_bytes('<?test a?><root>A<?test b?>B</root><?test c?>'))
1116         done = parser.close()
1117
1118         self.assertEqual("DONE", done)
1119         self.assertEqual(["pi-test-a", "start-root", "data-A", "pi-test-b",
1120                            "data-B", "end-root", "pi-test-c"],
1121                           events)
1122
1123     def test_parser_target_cdata(self):
1124         events = []
1125         class Target(object):
1126             def start(self, tag, attrib):
1127                 events.append("start-" + tag)
1128             def end(self, tag):
1129                 events.append("end-" + tag)
1130             def data(self, data):
1131                 events.append("data-" + data)
1132             def close(self):
1133                 return "DONE"
1134
1135         parser = self.etree.XMLParser(target=Target(),
1136                                       strip_cdata=False)
1137
1138         parser.feed(_bytes('<root>A<a><![CDATA[ca]]></a>B</root>'))
1139         done = parser.close()
1140
1141         self.assertEqual("DONE", done)
1142         self.assertEqual(["start-root", "data-A", "start-a",
1143                            "data-ca", "end-a", "data-B", "end-root"],
1144                           events)
1145
1146     def test_parser_target_recover(self):
1147         events = []
1148         class Target(object):
1149             def start(self, tag, attrib):
1150                 events.append("start-" + tag)
1151             def end(self, tag):
1152                 events.append("end-" + tag)
1153             def data(self, data):
1154                 events.append("data-" + data)
1155             def close(self):
1156                 events.append("close")
1157                 return "DONE"
1158
1159         parser = self.etree.XMLParser(target=Target(),
1160                                       recover=True)
1161
1162         parser.feed(_bytes('<root>A<a>ca</a>B</not-root>'))
1163         done = parser.close()
1164
1165         self.assertEqual("DONE", done)
1166         self.assertEqual(["start-root", "data-A", "start-a",
1167                            "data-ca", "end-a", "data-B",
1168                            "end-root", "close"],
1169                           events)
1170
1171     def test_iterwalk_tag(self):
1172         iterwalk = self.etree.iterwalk
1173         root = self.etree.XML(_bytes('<a><b><d/></b><c/></a>'))
1174
1175         iterator = iterwalk(root, tag="b", events=('start', 'end'))
1176         events = list(iterator)
1177         self.assertEqual(
1178             [('start', root[0]), ('end', root[0])],
1179             events)
1180
1181     def test_iterwalk_tag_all(self):
1182         iterwalk = self.etree.iterwalk
1183         root = self.etree.XML(_bytes('<a><b><d/></b><c/></a>'))
1184
1185         iterator = iterwalk(root, tag="*", events=('start', 'end'))
1186         events = list(iterator)
1187         self.assertEqual(
1188             8,
1189             len(events))
1190
1191     def test_iterwalk(self):
1192         iterwalk = self.etree.iterwalk
1193         root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1194
1195         events = list(iterwalk(root))
1196         self.assertEqual(
1197             [('end', root[0]), ('end', root[1]), ('end', root)],
1198             events)
1199
1200     def test_iterwalk_comments_root_element(self):
1201         iterwalk = self.etree.iterwalk
1202         root = self.etree.XML(
1203             b'<!--C0--><a><!--Ca--><b><!--Cb--></b><!--Cc--><c/></a><!--C99-->')
1204
1205         iterator = iterwalk(root, events=('start', 'end', 'comment'))
1206         events = list(iterator)
1207         self.assertEqual(
1208             [('start', root), ('comment', root[0]),
1209              ('start', root[1]), ('comment', root[1][0]), ('end', root[1]),
1210              ('comment', root[2]), ('start', root[3]), ('end', root[3]),
1211              ('end', root),
1212              ],
1213             events)
1214
1215     def test_iterwalk_comments_tree(self):
1216         iterwalk = self.etree.iterwalk
1217         root = self.etree.XML(
1218             b'<!--C0--><a><!--Ca--><b><!--Cb--></b><!--Cc--><c/></a><!--C99-->')
1219
1220         iterator = iterwalk(self.etree.ElementTree(root), events=('start', 'end', 'comment'))
1221         events = list(iterator)
1222         self.assertEqual(
1223             [('comment', root.getprevious()),
1224              ('start', root), ('comment', root[0]),  # <a>
1225              ('start', root[1]), ('comment', root[1][0]), ('end', root[1]),  # <b>
1226              ('comment', root[2]), ('start', root[3]), ('end', root[3]),  # <c>
1227              ('end', root), ('comment', root.getnext()),
1228              ],
1229             events)
1230
1231     def test_iterwalk_pis_root_element(self):
1232         iterwalk = self.etree.iterwalk
1233         root = self.etree.XML(
1234             b'<?C0?><a><?Ca?><b><?Cb?></b><?Cc?><c/></a><?C99?>')
1235
1236         iterator = iterwalk(root, events=('start', 'end', 'pi'))
1237         events = list(iterator)
1238         self.assertEqual(
1239             [('start', root), ('pi', root[0]),
1240              ('start', root[1]), ('pi', root[1][0]), ('end', root[1]),
1241              ('pi', root[2]), ('start', root[3]), ('end', root[3]),
1242              ('end', root),
1243              ],
1244             events)
1245
1246     def test_iterwalk_pis_tree(self):
1247         iterwalk = self.etree.iterwalk
1248         root = self.etree.XML(
1249             b'<?C0?><a><?Ca?><b><?Cb?></b><?Cc?><c/></a><?C99?>')
1250
1251         iterator = iterwalk(self.etree.ElementTree(root), events=('start', 'end', 'pi'))
1252         events = list(iterator)
1253         self.assertEqual(
1254             [('pi', root.getprevious()),
1255              ('start', root), ('pi', root[0]),  # <a>
1256              ('start', root[1]), ('pi', root[1][0]), ('end', root[1]),  # <b>
1257              ('pi', root[2]), ('start', root[3]), ('end', root[3]),  # <c>
1258              ('end', root), ('pi', root.getnext()),
1259              ],
1260             events)
1261
1262     def test_iterwalk_pis_comments_tree(self):
1263         iterwalk = self.etree.iterwalk
1264         root = self.etree.XML(
1265             b'<!--C0--><?C0?><!--C1--><a><?Ca?><b><!--Cb--></b><?Cc?><c/></a><!--C99--><?C99?>')
1266
1267         iterator = iterwalk(self.etree.ElementTree(root), events=('start', 'end', 'pi', 'comment'))
1268         events = list(iterator)
1269         self.assertEqual(
1270             [('comment', root.getprevious().getprevious().getprevious()),
1271              ('pi', root.getprevious().getprevious()),
1272              ('comment', root.getprevious()),
1273              ('start', root), ('pi', root[0]),  # <a>
1274              ('start', root[1]), ('comment', root[1][0]), ('end', root[1]),  # <b>
1275              ('pi', root[2]), ('start', root[3]), ('end', root[3]),  # <c>
1276              ('end', root), ('comment', root.getnext()), ('pi', root.getnext().getnext()),
1277              ],
1278             events)
1279
1280     def test_iterwalk_pis_comments_tree_no_events(self):
1281         iterwalk = self.etree.iterwalk
1282         root = self.etree.XML(
1283             b'<!--C0--><?C0?><!--C1--><a><?Ca?><b><!--Cb--></b><?Cc?><c/></a><!--C99--><?C99?>')
1284
1285         iterator = iterwalk(self.etree.ElementTree(root), events=('start', 'end'))
1286         events = list(iterator)
1287         self.assertEqual(
1288             [('start', root),  # <a>
1289              ('start', root[1]), ('end', root[1]),  # <b>
1290              ('start', root[3]), ('end', root[3]),  # <c>
1291              ('end', root),
1292              ],
1293             events)
1294
1295     def test_iterwalk_start(self):
1296         iterwalk = self.etree.iterwalk
1297         root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1298
1299         iterator = iterwalk(root, events=('start',))
1300         events = list(iterator)
1301         self.assertEqual(
1302             [('start', root), ('start', root[0]), ('start', root[1])],
1303             events)
1304
1305     def test_iterwalk_start_end(self):
1306         iterwalk = self.etree.iterwalk
1307         root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1308
1309         iterator = iterwalk(root, events=('start','end'))
1310         events = list(iterator)
1311         self.assertEqual(
1312             [('start', root), ('start', root[0]), ('end', root[0]),
1313              ('start', root[1]), ('end', root[1]), ('end', root)],
1314             events)
1315
1316     def test_iterwalk_start_tags(self):
1317         iterwalk = self.etree.iterwalk
1318         root = self.etree.XML(_bytes('<a><b></b><c/><b><d/></b></a>'))
1319
1320         iterator = iterwalk(root, events=('start',), tag='b')
1321         events = list(iterator)
1322         self.assertEqual(
1323             [('start', root[0]), ('start', root[2])],
1324             events)
1325
1326     def test_iterwalk_start_end_tags(self):
1327         iterwalk = self.etree.iterwalk
1328         root = self.etree.XML(_bytes('<a><b></b><c/><b><d/></b></a>'))
1329
1330         iterator = iterwalk(root, events=('start', 'end'), tag='b')
1331         events = list(iterator)
1332         self.assertEqual(
1333             [('start', root[0]), ('end', root[0]), ('start', root[2]), ('end', root[2])],
1334             events)
1335
1336     def test_iterwalk_start_end_tags_with_root(self):
1337         iterwalk = self.etree.iterwalk
1338         root = self.etree.XML(_bytes('<a><b></b><c/><b><d/></b></a>'))
1339
1340         iterator = iterwalk(root, events=('start', 'end'), tag=('b', 'a'))
1341         events = list(iterator)
1342         self.assertEqual(
1343             [('start', root),
1344              ('start', root[0]), ('end', root[0]),
1345              ('start', root[2]), ('end', root[2]),
1346              ('end', root),
1347              ],
1348             events)
1349
1350     def test_iterwalk_clear(self):
1351         iterwalk = self.etree.iterwalk
1352         root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1353
1354         iterator = iterwalk(root)
1355         for event, elem in iterator:
1356             elem.clear()
1357
1358         self.assertEqual(0,
1359                           len(root))
1360
1361     def test_iterwalk_attrib_ns(self):
1362         iterwalk = self.etree.iterwalk
1363         root = self.etree.XML(_bytes('<a xmlns="ns1"><b><c xmlns="ns2"/></b></a>'))
1364
1365         attr_name = '{testns}bla'
1366         events = []
1367         iterator = iterwalk(root, events=('start','end','start-ns','end-ns'))
1368         for event, elem in iterator:
1369             events.append(event)
1370             if event == 'start':
1371                 if elem.tag != '{ns1}a':
1372                     elem.set(attr_name, 'value')
1373
1374         self.assertEqual(
1375             ['start-ns', 'start', 'start', 'start-ns', 'start',
1376              'end', 'end-ns', 'end', 'end', 'end-ns'],
1377             events)
1378
1379         self.assertEqual(
1380             None,
1381             root.get(attr_name))
1382         self.assertEqual(
1383             'value',
1384             root[0].get(attr_name))
1385
1386     def test_iterwalk_end_skip(self):
1387         iterwalk = self.etree.iterwalk
1388         root = self.etree.XML(_bytes('<a><b><c/></b><d><e/></d></a>'))
1389
1390         iterator = iterwalk(root)
1391         tags = []
1392         for event, elem in iterator:
1393             tags.append(elem.tag)
1394             # requesting a skip after an 'end' event should never have an effect
1395             iterator.skip_subtree()
1396
1397         self.assertEqual(['c', 'b', 'e', 'd', 'a'], tags)
1398
1399     def test_iterwalk_start_end_skip(self):
1400         iterwalk = self.etree.iterwalk
1401         root = self.etree.XML(_bytes('<a><b><c/></b><d><e/></d></a>'))
1402
1403         iterator = iterwalk(root, events=('start', 'end'))
1404         tags = []
1405         for event, elem in iterator:
1406             tags.append((event, elem.tag))
1407             if elem.tag in ('b', 'e'):
1408                 # skipping should only have an effect on 'start', not on 'end'
1409                 iterator.skip_subtree()
1410
1411         self.assertEqual(
1412             [('start', 'a'),
1413              ('start', 'b'), ('end', 'b'),  # ignored child 'c'
1414              ('start', 'd'),
1415              ('start', 'e'), ('end', 'e'),
1416              ('end', 'd'),
1417              ('end', 'a')],
1418             tags)
1419
1420     def test_iterwalk_ns_skip(self):
1421         iterwalk = self.etree.iterwalk
1422         root = self.etree.XML(_bytes(
1423             '<a xmlns="ns1"><b xmlns="nsb"><c xmlns="ns2"/></b><d xmlns="ns2"><e/></d></a>'))
1424
1425         events = []
1426         iterator = iterwalk(root, events=('start','start-ns','end-ns'))
1427         for event, elem in iterator:
1428             if event in ('start-ns', 'end-ns'):
1429                 events.append((event, elem))
1430                 if event == 'start-ns' and elem == ('', 'nsb'):
1431                     events.append('skip')
1432                     iterator.skip_subtree()
1433             else:
1434                 events.append((event, elem.tag))
1435
1436         self.assertEqual(
1437             [('start-ns', ('', 'ns1')),
1438              ('start', '{ns1}a'),
1439              ('start-ns', ('', 'nsb')),
1440              'skip',
1441              ('start', '{nsb}b'),
1442              ('end-ns', None),
1443              ('start-ns', ('', 'ns2')),
1444              ('start', '{ns2}d'),
1445              ('start', '{ns2}e'),
1446              ('end-ns', None),
1447              ('end-ns', None)
1448              ],
1449             events)
1450
1451     def test_iterwalk_getiterator(self):
1452         iterwalk = self.etree.iterwalk
1453         root = self.etree.XML(_bytes('<a><b><d/></b><c/></a>'))
1454
1455         counts = []
1456         for event, elem in iterwalk(root):
1457             counts.append(len(list(elem.getiterator())))
1458         self.assertEqual(
1459             [1,2,1,4],
1460             counts)
1461
1462     def test_itertext_comment_pi(self):
1463         # https://bugs.launchpad.net/lxml/+bug/1844674
1464         XML = self.etree.XML
1465         root = XML(_bytes(
1466             "<root>RTEXT<a></a>ATAIL<b/><!-- COMMENT -->CTAIL<?PI PITEXT?> PITAIL </root>"
1467         ))
1468
1469         text = list(root.itertext())
1470         self.assertEqual(["RTEXT", "ATAIL", "CTAIL", " PITAIL "],
1471                           text)
1472
1473     def test_resolve_string_dtd(self):
1474         parse = self.etree.parse
1475         parser = self.etree.XMLParser(dtd_validation=True)
1476         assertEqual = self.assertEqual
1477         test_url = _str("__nosuch.dtd")
1478
1479         class MyResolver(self.etree.Resolver):
1480             def resolve(self, url, id, context):
1481                 assertEqual(url, test_url)
1482                 return self.resolve_string(
1483                     _str('''<!ENTITY myentity "%s">
1484                         <!ELEMENT doc ANY>''') % url, context)
1485
1486         parser.resolvers.add(MyResolver())
1487
1488         xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1489         tree = parse(StringIO(xml), parser)
1490         root = tree.getroot()
1491         self.assertEqual(root.text, test_url)
1492
1493     def test_resolve_bytes_dtd(self):
1494         parse = self.etree.parse
1495         parser = self.etree.XMLParser(dtd_validation=True)
1496         assertEqual = self.assertEqual
1497         test_url = _str("__nosuch.dtd")
1498
1499         class MyResolver(self.etree.Resolver):
1500             def resolve(self, url, id, context):
1501                 assertEqual(url, test_url)
1502                 return self.resolve_string(
1503                     (_str('''<!ENTITY myentity "%s">
1504                              <!ELEMENT doc ANY>''') % url).encode('utf-8'),
1505                     context)
1506
1507         parser.resolvers.add(MyResolver())
1508
1509         xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1510         tree = parse(StringIO(xml), parser)
1511         root = tree.getroot()
1512         self.assertEqual(root.text, test_url)
1513
1514     def test_resolve_filelike_dtd(self):
1515         parse = self.etree.parse
1516         parser = self.etree.XMLParser(dtd_validation=True)
1517         assertEqual = self.assertEqual
1518         test_url = _str("__nosuch.dtd")
1519
1520         class MyResolver(self.etree.Resolver):
1521             def resolve(self, url, id, context):
1522                 assertEqual(url, test_url)
1523                 return self.resolve_file(
1524                     SillyFileLike(
1525                         _str('''<!ENTITY myentity "%s">
1526                         <!ELEMENT doc ANY>''') % url), context)
1527
1528         parser.resolvers.add(MyResolver())
1529
1530         xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1531         tree = parse(StringIO(xml), parser)
1532         root = tree.getroot()
1533         self.assertEqual(root.text, test_url)
1534
1535     def test_resolve_filename_dtd(self):
1536         parse = self.etree.parse
1537         parser = self.etree.XMLParser(attribute_defaults=True)
1538         assertEqual = self.assertEqual
1539         test_url = _str("__nosuch.dtd")
1540
1541         class MyResolver(self.etree.Resolver):
1542             def resolve(self, url, id, context):
1543                 assertEqual(url, test_url)
1544                 return self.resolve_filename(
1545                     fileInTestDir('test.dtd'), context)
1546
1547         parser.resolvers.add(MyResolver())
1548
1549         xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1550         tree = parse(StringIO(xml), parser)
1551         root = tree.getroot()
1552         self.assertEqual(
1553             root.attrib,    {'default': 'valueA'})
1554         self.assertEqual(
1555             root[0].attrib, {'default': 'valueB'})
1556
1557     def test_resolve_filename_dtd_relative(self):
1558         parse = self.etree.parse
1559         parser = self.etree.XMLParser(attribute_defaults=True)
1560         assertEqual = self.assertEqual
1561         test_url = _str("__nosuch.dtd")
1562
1563         class MyResolver(self.etree.Resolver):
1564             def resolve(self, url, id, context):
1565                 expected = fileUrlInTestDir(test_url)
1566                 url = url.replace('file://', 'file:')  # depends on libxml2 version
1567                 expected = expected.replace('file://', 'file:')
1568                 assertEqual(url, expected)
1569                 return self.resolve_filename(
1570                     fileUrlInTestDir('test.dtd'), context)
1571
1572         parser.resolvers.add(MyResolver())
1573
1574         xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1575         tree = parse(StringIO(xml), parser,
1576                      base_url=fileUrlInTestDir('__test.xml'))
1577         root = tree.getroot()
1578         self.assertEqual(
1579             root.attrib,    {'default': 'valueA'})
1580         self.assertEqual(
1581             root[0].attrib, {'default': 'valueB'})
1582
1583     def test_resolve_file_dtd(self):
1584         parse = self.etree.parse
1585         parser = self.etree.XMLParser(attribute_defaults=True)
1586         assertEqual = self.assertEqual
1587         test_url = _str("__nosuch.dtd")
1588
1589         class MyResolver(self.etree.Resolver):
1590             def resolve(self, url, id, context):
1591                 assertEqual(url, test_url)
1592                 return self.resolve_file(
1593                     open(fileInTestDir('test.dtd'), 'rb'), context)
1594
1595         parser.resolvers.add(MyResolver())
1596
1597         xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1598         tree = parse(StringIO(xml), parser)
1599         root = tree.getroot()
1600         self.assertEqual(
1601             root.attrib,    {'default': 'valueA'})
1602         self.assertEqual(
1603             root[0].attrib, {'default': 'valueB'})
1604
1605     def test_resolve_empty(self):
1606         parse = self.etree.parse
1607         parser = self.etree.XMLParser(load_dtd=True)
1608         assertEqual = self.assertEqual
1609         test_url = _str("__nosuch.dtd")
1610
1611         class check(object):
1612             resolved = False
1613
1614         class MyResolver(self.etree.Resolver):
1615             def resolve(self, url, id, context):
1616                 assertEqual(url, test_url)
1617                 check.resolved = True
1618                 return self.resolve_empty(context)
1619
1620         parser.resolvers.add(MyResolver())
1621
1622         xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1623         self.assertRaises(etree.XMLSyntaxError, parse, StringIO(xml), parser)
1624         self.assertTrue(check.resolved)
1625
1626     def test_resolve_error(self):
1627         parse = self.etree.parse
1628         parser = self.etree.XMLParser(dtd_validation=True)
1629
1630         class _LocalException(Exception):
1631             pass
1632
1633         class MyResolver(self.etree.Resolver):
1634             def resolve(self, url, id, context):
1635                 raise _LocalException
1636
1637         parser.resolvers.add(MyResolver())
1638
1639         xml = '<!DOCTYPE doc SYSTEM "test"><doc>&myentity;</doc>'
1640         self.assertRaises(_LocalException, parse, BytesIO(xml), parser)
1641
1642     def test_entity_parse(self):
1643         parse = self.etree.parse
1644         tostring = self.etree.tostring
1645         parser = self.etree.XMLParser(resolve_entities=False)
1646         Entity = self.etree.Entity
1647
1648         xml = _bytes('<!DOCTYPE doc SYSTEM "test"><doc>&myentity;</doc>')
1649         tree = parse(BytesIO(xml), parser)
1650         root = tree.getroot()
1651         self.assertEqual(root[0].tag, Entity)
1652         self.assertEqual(root[0].text, "&myentity;")
1653         self.assertEqual(root[0].tail, None)
1654         self.assertEqual(root[0].name, "myentity")
1655
1656         self.assertEqual(_bytes('<doc>&myentity;</doc>'),
1657                           tostring(root))
1658
1659     def test_entity_restructure(self):
1660         xml = _bytes('''<!DOCTYPE root [ <!ENTITY nbsp "&#160;"> ]>
1661             <root>
1662               <child1/>
1663               <child2/>
1664               <child3>&nbsp;</child3>
1665             </root>''')
1666
1667         parser = self.etree.XMLParser(resolve_entities=False)
1668         root = etree.fromstring(xml, parser)
1669         self.assertEqual([ el.tag for el in root ],
1670                           ['child1', 'child2', 'child3'])
1671
1672         root[0] = root[-1]
1673         self.assertEqual([ el.tag for el in root ],
1674                           ['child3', 'child2'])
1675         self.assertEqual(root[0][0].text, '&nbsp;')
1676         self.assertEqual(root[0][0].name, 'nbsp')
1677
1678     def test_entity_append(self):
1679         Entity = self.etree.Entity
1680         Element = self.etree.Element
1681         tostring = self.etree.tostring
1682
1683         root = Element("root")
1684         root.append( Entity("test") )
1685
1686         self.assertEqual(root[0].tag, Entity)
1687         self.assertEqual(root[0].text, "&test;")
1688         self.assertEqual(root[0].tail, None)
1689         self.assertEqual(root[0].name, "test")
1690
1691         self.assertEqual(_bytes('<root>&test;</root>'),
1692                           tostring(root))
1693
1694     def test_entity_append_parsed(self):
1695         Entity = self.etree.Entity
1696         Element = self.etree.Element
1697         parser = self.etree.XMLParser(resolve_entities=False)
1698         entity = self.etree.XML('''<!DOCTYPE data [
1699         <!ENTITY a "a">
1700         <!ENTITY b "&a;">
1701         ]>
1702         <data>&b;</data>
1703         ''', parser)
1704
1705         el = Element('test')
1706         el.append(entity)
1707         self.assertEqual(el.tag, 'test')
1708         self.assertEqual(el[0].tag, 'data')
1709         self.assertEqual(el[0][0].tag, Entity)
1710         self.assertEqual(el[0][0].name, 'b')
1711
1712     def test_entity_values(self):
1713         Entity = self.etree.Entity
1714         self.assertEqual(Entity("test").text, '&test;')
1715         self.assertEqual(Entity("#17683").text, '&#17683;')
1716         self.assertEqual(Entity("#x1768").text, '&#x1768;')
1717         self.assertEqual(Entity("#x98AF").text, '&#x98AF;')
1718
1719     def test_entity_error(self):
1720         Entity = self.etree.Entity
1721         self.assertRaises(ValueError, Entity, 'a b c')
1722         self.assertRaises(ValueError, Entity, 'a,b')
1723         self.assertRaises(ValueError, Entity, 'a\0b')
1724         self.assertRaises(ValueError, Entity, '#abc')
1725         self.assertRaises(ValueError, Entity, '#xxyz')
1726
1727     def test_cdata(self):
1728         CDATA = self.etree.CDATA
1729         Element = self.etree.Element
1730         tostring = self.etree.tostring
1731
1732         root = Element("root")
1733         root.text = CDATA('test')
1734
1735         self.assertEqual('test',
1736                           root.text)
1737         self.assertEqual(_bytes('<root><![CDATA[test]]></root>'),
1738                           tostring(root))
1739
1740     def test_cdata_tail(self):
1741         CDATA = self.etree.CDATA
1742         Element = self.etree.Element
1743         SubElement = self.etree.SubElement
1744         tostring = self.etree.tostring
1745
1746         root = Element("root")
1747         child = SubElement(root, 'child')
1748         child.tail = CDATA('test')
1749
1750         self.assertEqual('test', child.tail)
1751         self.assertEqual(_bytes('<root><child/><![CDATA[test]]></root>'),
1752                          tostring(root))
1753
1754         root = Element("root")
1755         root.tail = CDATA('test')
1756
1757         self.assertEqual('test', root.tail)
1758         self.assertEqual(_bytes('<root/><![CDATA[test]]>'),
1759                          tostring(root))
1760
1761     def test_cdata_type(self):
1762         CDATA = self.etree.CDATA
1763         Element = self.etree.Element
1764         root = Element("root")
1765
1766         root.text = CDATA("test")
1767         self.assertEqual('test', root.text)
1768
1769         root.text = CDATA(_str("test"))
1770         self.assertEqual('test', root.text)
1771
1772         self.assertRaises(TypeError, CDATA, 1)
1773
1774     def test_cdata_errors(self):
1775         CDATA = self.etree.CDATA
1776         Element = self.etree.Element
1777
1778         root = Element("root")
1779         cdata = CDATA('test')
1780
1781         self.assertRaises(TypeError,
1782                           root.set, 'attr', cdata)
1783         self.assertRaises(TypeError,
1784                           operator.setitem, root.attrib, 'attr', cdata)
1785
1786     def test_cdata_parser(self):
1787         tostring = self.etree.tostring
1788         parser = self.etree.XMLParser(strip_cdata=False)
1789         root = self.etree.XML(_bytes('<root><![CDATA[test]]></root>'), parser)
1790
1791         self.assertEqual('test', root.text)
1792         self.assertEqual(_bytes('<root><![CDATA[test]]></root>'),
1793                           tostring(root))
1794
1795     def test_cdata_xpath(self):
1796         tostring = self.etree.tostring
1797         parser = self.etree.XMLParser(strip_cdata=False)
1798         root = self.etree.XML(_bytes('<root><![CDATA[test]]></root>'), parser)
1799         self.assertEqual(_bytes('<root><![CDATA[test]]></root>'),
1800                           tostring(root))
1801
1802         self.assertEqual(['test'], root.xpath('//text()'))
1803
1804     # TypeError in etree, AssertionError in ElementTree;
1805     def test_setitem_assert(self):
1806         Element = self.etree.Element
1807         SubElement = self.etree.SubElement
1808
1809         a = Element('a')
1810         b = SubElement(a, 'b')
1811         
1812         self.assertRaises(TypeError,
1813                           a.__setitem__, 0, 'foo')
1814
1815     def test_append_error(self):
1816         Element = self.etree.Element
1817         root = Element('root')
1818         # raises AssertionError in ElementTree
1819         self.assertRaises(TypeError, root.append, None)
1820         self.assertRaises(TypeError, root.extend, [None])
1821         self.assertRaises(TypeError, root.extend, [Element('one'), None])
1822         self.assertEqual('one', root[0].tag)
1823
1824     def test_append_recursive_error(self):
1825         Element = self.etree.Element
1826         SubElement = self.etree.SubElement
1827         root = Element('root')
1828         self.assertRaises(ValueError, root.append, root)
1829         child = SubElement(root, 'child')
1830         self.assertRaises(ValueError, child.append, root)
1831         child2 = SubElement(child, 'child2')
1832         self.assertRaises(ValueError, child2.append, root)
1833         self.assertRaises(ValueError, child2.append, child)
1834         self.assertEqual('child2', root[0][0].tag)
1835
1836     def test_addnext(self):
1837         Element = self.etree.Element
1838         SubElement = self.etree.SubElement
1839         root = Element('root')
1840         SubElement(root, 'a')
1841         SubElement(root, 'b')
1842
1843         self.assertEqual(['a', 'b'],
1844                           [c.tag for c in root])
1845         root[1].addnext(root[0])
1846         self.assertEqual(['b', 'a'],
1847                           [c.tag for c in root])
1848
1849     def test_addprevious(self):
1850         Element = self.etree.Element
1851         SubElement = self.etree.SubElement
1852         root = Element('root')
1853         SubElement(root, 'a')
1854         SubElement(root, 'b')
1855
1856         self.assertEqual(['a', 'b'],
1857                           [c.tag for c in root])
1858         root[0].addprevious(root[1])
1859         self.assertEqual(['b', 'a'],
1860                           [c.tag for c in root])
1861
1862     def test_addnext_cycle(self):
1863         Element = self.etree.Element
1864         SubElement = self.etree.SubElement
1865         root = Element('root')
1866         a = SubElement(root, 'a')
1867         b = SubElement(a, 'b')
1868         # appending parent as sibling is forbidden
1869         self.assertRaises(ValueError, b.addnext, a)
1870         self.assertEqual(['a'], [c.tag for c in root])
1871         self.assertEqual(['b'], [c.tag for c in a])
1872
1873     def test_addprevious_cycle(self):
1874         Element = self.etree.Element
1875         SubElement = self.etree.SubElement
1876         root = Element('root')
1877         a = SubElement(root, 'a')
1878         b = SubElement(a, 'b')
1879         # appending parent as sibling is forbidden
1880         self.assertRaises(ValueError, b.addprevious, a)
1881         self.assertEqual(['a'], [c.tag for c in root])
1882         self.assertEqual(['b'], [c.tag for c in a])
1883
1884     def test_addnext_cycle_long(self):
1885         Element = self.etree.Element
1886         SubElement = self.etree.SubElement
1887         root = Element('root')
1888         a = SubElement(root, 'a')
1889         b = SubElement(a, 'b')
1890         c = SubElement(b, 'c')
1891         # appending parent as sibling is forbidden
1892         self.assertRaises(ValueError, c.addnext, a)
1893
1894     def test_addprevious_cycle_long(self):
1895         Element = self.etree.Element
1896         SubElement = self.etree.SubElement
1897         root = Element('root')
1898         a = SubElement(root, 'a')
1899         b = SubElement(a, 'b')
1900         c = SubElement(b, 'c')
1901         # appending parent as sibling is forbidden
1902         self.assertRaises(ValueError, c.addprevious, a)
1903
1904     def test_addprevious_noops(self):
1905         Element = self.etree.Element
1906         SubElement = self.etree.SubElement
1907         root = Element('root')
1908         a = SubElement(root, 'a')
1909         b = SubElement(root, 'b')
1910         a.addprevious(a)
1911         self.assertEqual('a', root[0].tag)
1912         self.assertEqual('b', root[1].tag)
1913         b.addprevious(b)
1914         self.assertEqual('a', root[0].tag)
1915         self.assertEqual('b', root[1].tag)
1916         b.addprevious(a)
1917         self.assertEqual('a', root[0].tag)
1918         self.assertEqual('b', root[1].tag)
1919
1920     def test_addnext_noops(self):
1921         Element = self.etree.Element
1922         SubElement = self.etree.SubElement
1923         root = Element('root')
1924         a = SubElement(root, 'a')
1925         b = SubElement(root, 'b')
1926         a.addnext(a)
1927         self.assertEqual('a', root[0].tag)
1928         self.assertEqual('b', root[1].tag)
1929         b.addnext(b)
1930         self.assertEqual('a', root[0].tag)
1931         self.assertEqual('b', root[1].tag)
1932         a.addnext(b)
1933         self.assertEqual('a', root[0].tag)
1934         self.assertEqual('b', root[1].tag)
1935
1936     def test_addnext_root(self):
1937         Element = self.etree.Element
1938         a = Element('a')
1939         b = Element('b')
1940         self.assertRaises(TypeError, a.addnext, b)
1941
1942     def test_addprevious_pi(self):
1943         Element = self.etree.Element
1944         SubElement = self.etree.SubElement
1945         PI = self.etree.PI
1946         root = Element('root')
1947         SubElement(root, 'a')
1948         pi = PI('TARGET', 'TEXT')
1949         pi.tail = "TAIL"
1950
1951         self.assertEqual(_bytes('<root><a></a></root>'),
1952                           self._writeElement(root))
1953         root[0].addprevious(pi)
1954         self.assertEqual(_bytes('<root><?TARGET TEXT?>TAIL<a></a></root>'),
1955                           self._writeElement(root))
1956
1957     def test_addprevious_root_pi(self):
1958         Element = self.etree.Element
1959         PI = self.etree.PI
1960         root = Element('root')
1961         pi = PI('TARGET', 'TEXT')
1962         pi.tail = "TAIL"
1963
1964         self.assertEqual(_bytes('<root></root>'),
1965                           self._writeElement(root))
1966         root.addprevious(pi)
1967         self.assertEqual(_bytes('<?TARGET TEXT?>\n<root></root>'),
1968                           self._writeElement(root))
1969
1970     def test_addnext_pi(self):
1971         Element = self.etree.Element
1972         SubElement = self.etree.SubElement
1973         PI = self.etree.PI
1974         root = Element('root')
1975         SubElement(root, 'a')
1976         pi = PI('TARGET', 'TEXT')
1977         pi.tail = "TAIL"
1978
1979         self.assertEqual(_bytes('<root><a></a></root>'),
1980                           self._writeElement(root))
1981         root[0].addnext(pi)
1982         self.assertEqual(_bytes('<root><a></a><?TARGET TEXT?>TAIL</root>'),
1983                           self._writeElement(root))
1984
1985     def test_addnext_root_pi(self):
1986         Element = self.etree.Element
1987         PI = self.etree.PI
1988         root = Element('root')
1989         pi = PI('TARGET', 'TEXT')
1990         pi.tail = "TAIL"
1991
1992         self.assertEqual(_bytes('<root></root>'),
1993                           self._writeElement(root))
1994         root.addnext(pi)
1995         self.assertEqual(_bytes('<root></root>\n<?TARGET TEXT?>'),
1996                           self._writeElement(root))
1997
1998     def test_addnext_comment(self):
1999         Element = self.etree.Element
2000         SubElement = self.etree.SubElement
2001         Comment = self.etree.Comment
2002         root = Element('root')
2003         SubElement(root, 'a')
2004         comment = Comment('TEXT ')
2005         comment.tail = "TAIL"
2006
2007         self.assertEqual(_bytes('<root><a></a></root>'),
2008                           self._writeElement(root))
2009         root[0].addnext(comment)
2010         self.assertEqual(_bytes('<root><a></a><!--TEXT -->TAIL</root>'),
2011                           self._writeElement(root))
2012
2013     def test_addnext_root_comment(self):
2014         Element = self.etree.Element
2015         Comment = self.etree.Comment
2016         root = Element('root')
2017         comment = Comment('TEXT ')
2018         comment.tail = "TAIL"
2019
2020         self.assertEqual(_bytes('<root></root>'),
2021                           self._writeElement(root))
2022         root.addnext(comment)
2023         self.assertEqual(_bytes('<root></root>\n<!--TEXT -->'),
2024                           self._writeElement(root))
2025
2026     def test_addprevious_comment(self):
2027         Element = self.etree.Element
2028         SubElement = self.etree.SubElement
2029         Comment = self.etree.Comment
2030         root = Element('root')
2031         SubElement(root, 'a')
2032         comment = Comment('TEXT ')
2033         comment.tail = "TAIL"
2034
2035         self.assertEqual(_bytes('<root><a></a></root>'),
2036                           self._writeElement(root))
2037         root[0].addprevious(comment)
2038         self.assertEqual(_bytes('<root><!--TEXT -->TAIL<a></a></root>'),
2039                           self._writeElement(root))
2040
2041     def test_addprevious_root_comment(self):
2042         Element = self.etree.Element
2043         Comment = self.etree.Comment
2044         root = Element('root')
2045         comment = Comment('TEXT ')
2046         comment.tail = "TAIL"
2047
2048         self.assertEqual(_bytes('<root></root>'),
2049                           self._writeElement(root))
2050         root.addprevious(comment)
2051         self.assertEqual(_bytes('<!--TEXT -->\n<root></root>'),
2052                           self._writeElement(root))
2053
2054     # ET's Elements have items() and key(), but not values()
2055     def test_attribute_values(self):
2056         XML = self.etree.XML
2057         
2058         root = XML(_bytes('<doc alpha="Alpha" beta="Beta" gamma="Gamma"/>'))
2059         values = root.values()
2060         values.sort()
2061         self.assertEqual(['Alpha', 'Beta', 'Gamma'], values)
2062
2063     # gives error in ElementTree
2064     def test_comment_empty(self):
2065         Element = self.etree.Element
2066         Comment = self.etree.Comment
2067
2068         a = Element('a')
2069         a.append(Comment())
2070         self.assertEqual(
2071             _bytes('<a><!----></a>'),
2072             self._writeElement(a))
2073
2074     # ElementTree ignores comments
2075     def test_comment_parse_empty(self):
2076         ElementTree = self.etree.ElementTree
2077         tostring = self.etree.tostring
2078
2079         xml = _bytes('<a><b/><!----><c/></a>')
2080         f = BytesIO(xml)
2081         doc = ElementTree(file=f)
2082         a = doc.getroot()
2083         self.assertEqual(
2084             '',
2085             a[1].text)
2086         self.assertEqual(
2087             xml,
2088             tostring(a))
2089
2090     # ElementTree ignores comments
2091     def test_comment_no_proxy_yet(self):
2092         ElementTree = self.etree.ElementTree
2093         
2094         f = BytesIO('<a><b></b><!-- hoi --><c></c></a>')
2095         doc = ElementTree(file=f)
2096         a = doc.getroot()
2097         self.assertEqual(
2098             ' hoi ',
2099             a[1].text)
2100
2101     # does not raise an exception in ElementTree
2102     def test_comment_immutable(self):
2103         Element = self.etree.Element
2104         Comment = self.etree.Comment
2105
2106         c = Comment()
2107         el = Element('myel')
2108
2109         self.assertRaises(TypeError, c.append, el)
2110         self.assertRaises(TypeError, c.insert, 0, el)
2111         self.assertRaises(TypeError, c.set, "myattr", "test")
2112
2113     def test_comment_immutable_attrib(self):
2114         c = self.etree.Comment()
2115         self.assertEqual(0, len(c.attrib))
2116
2117         self.assertFalse(c.attrib.__contains__('nope'))
2118         self.assertFalse('nope' in c.attrib)
2119         self.assertFalse('nope' in c.attrib.keys())
2120         self.assertFalse('nope' in c.attrib.values())
2121         self.assertFalse(('nope', 'huhu') in c.attrib.items())
2122
2123         self.assertEqual([], list(c.attrib))
2124         self.assertEqual([], list(c.attrib.keys()))
2125         self.assertEqual([], list(c.attrib.items()))
2126         self.assertEqual([], list(c.attrib.values()))
2127         self.assertEqual([], list(c.attrib.iterkeys()))
2128         self.assertEqual([], list(c.attrib.iteritems()))
2129         self.assertEqual([], list(c.attrib.itervalues()))
2130
2131         self.assertEqual('HUHU', c.attrib.pop('nope', 'HUHU'))
2132         self.assertRaises(KeyError, c.attrib.pop, 'nope')
2133
2134         self.assertRaises(KeyError, c.attrib.__getitem__, 'only')
2135         self.assertRaises(KeyError, c.attrib.__getitem__, 'names')
2136         self.assertRaises(KeyError, c.attrib.__getitem__, 'nope')
2137         self.assertRaises(KeyError, c.attrib.__setitem__, 'nope', 'yep')
2138         self.assertRaises(KeyError, c.attrib.__delitem__, 'nope')
2139
2140     # test passing 'None' to dump()
2141     def test_dump_none(self):
2142         self.assertRaises(TypeError, self.etree.dump, None)
2143
2144     def test_prefix(self):
2145         ElementTree = self.etree.ElementTree
2146         
2147         f = BytesIO('<a xmlns:foo="http://www.infrae.com/ns/1"><foo:b/></a>')
2148         doc = ElementTree(file=f)
2149         a = doc.getroot()
2150         self.assertEqual(
2151             None,
2152             a.prefix)
2153         self.assertEqual(
2154             'foo',
2155             a[0].prefix)
2156
2157     def test_prefix_default_ns(self):
2158         ElementTree = self.etree.ElementTree
2159         
2160         f = BytesIO('<a xmlns="http://www.infrae.com/ns/1"><b/></a>')
2161         doc = ElementTree(file=f)
2162         a = doc.getroot()
2163         self.assertEqual(
2164             None,
2165             a.prefix)
2166         self.assertEqual(
2167             None,
2168             a[0].prefix)
2169
2170     def test_getparent(self):
2171         Element = self.etree.Element
2172         SubElement = self.etree.SubElement
2173
2174         a = Element('a')
2175         b = SubElement(a, 'b')
2176         c = SubElement(a, 'c')
2177         d = SubElement(b, 'd')
2178         self.assertEqual(
2179             None,
2180             a.getparent())
2181         self.assertEqual(
2182             a,
2183             b.getparent())
2184         self.assertEqual(
2185             b.getparent(),
2186             c.getparent())
2187         self.assertEqual(
2188             b,
2189             d.getparent())
2190
2191     def test_iterchildren(self):
2192         XML = self.etree.XML
2193         
2194         root = XML(_bytes('<doc><one/><two>Two</two>Hm<three/></doc>'))
2195         result = []
2196         for el in root.iterchildren():
2197             result.append(el.tag)
2198         self.assertEqual(['one', 'two', 'three'], result)
2199
2200     def test_iterchildren_reversed(self):
2201         XML = self.etree.XML
2202         
2203         root = XML(_bytes('<doc><one/><two>Two</two>Hm<three/></doc>'))
2204         result = []
2205         for el in root.iterchildren(reversed=True):
2206             result.append(el.tag)
2207         self.assertEqual(['three', 'two', 'one'], result)
2208
2209     def test_iterchildren_tag(self):
2210         XML = self.etree.XML
2211         
2212         root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two></doc>'))
2213         result = []
2214         for el in root.iterchildren(tag='two'):
2215             result.append(el.text)
2216         self.assertEqual(['Two', 'Bla'], result)
2217
2218     def test_iterchildren_tag_posarg(self):
2219         XML = self.etree.XML
2220
2221         root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two></doc>'))
2222         result = []
2223         for el in root.iterchildren('two'):
2224             result.append(el.text)
2225         self.assertEqual(['Two', 'Bla'], result)
2226
2227     def test_iterchildren_tag_reversed(self):
2228         XML = self.etree.XML
2229         
2230         root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two></doc>'))
2231         result = []
2232         for el in root.iterchildren(reversed=True, tag='two'):
2233             result.append(el.text)
2234         self.assertEqual(['Bla', 'Two'], result)
2235
2236     def test_iterchildren_tag_multiple(self):
2237         XML = self.etree.XML
2238
2239         root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
2240         result = []
2241         for el in root.iterchildren(tag=['two', 'three']):
2242             result.append(el.text)
2243         self.assertEqual(['Two', 'Bla', None], result)
2244
2245     def test_iterchildren_tag_multiple_posarg(self):
2246         XML = self.etree.XML
2247
2248         root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
2249         result = []
2250         for el in root.iterchildren('two', 'three'):
2251             result.append(el.text)
2252         self.assertEqual(['Two', 'Bla', None], result)
2253
2254     def test_iterchildren_tag_multiple_reversed(self):
2255         XML = self.etree.XML
2256
2257         root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
2258         result = []
2259         for el in root.iterchildren(reversed=True, tag=['two', 'three']):
2260             result.append(el.text)
2261         self.assertEqual([None, 'Bla', 'Two'], result)
2262
2263     def test_iterancestors(self):
2264         Element    = self.etree.Element
2265         SubElement = self.etree.SubElement
2266
2267         a = Element('a')
2268         b = SubElement(a, 'b')
2269         c = SubElement(a, 'c')
2270         d = SubElement(b, 'd')
2271         self.assertEqual(
2272             [],
2273             list(a.iterancestors()))
2274         self.assertEqual(
2275             [a],
2276             list(b.iterancestors()))
2277         self.assertEqual(
2278             [a],
2279             list(c.iterancestors()))
2280         self.assertEqual(
2281             [b, a],
2282             list(d.iterancestors()))
2283
2284     def test_iterancestors_tag(self):
2285         Element    = self.etree.Element
2286         SubElement = self.etree.SubElement
2287
2288         a = Element('a')
2289         b = SubElement(a, 'b')
2290         c = SubElement(a, 'c')
2291         d = SubElement(b, 'd')
2292         self.assertEqual(
2293             [a],
2294             list(d.iterancestors('a')))
2295         self.assertEqual(
2296             [a],
2297             list(d.iterancestors(tag='a')))
2298
2299         self.assertEqual(
2300             [b, a],
2301             list(d.iterancestors('*')))
2302         self.assertEqual(
2303             [b, a],
2304             list(d.iterancestors(tag='*')))
2305
2306     def test_iterancestors_tag_multiple(self):
2307         Element    = self.etree.Element
2308         SubElement = self.etree.SubElement
2309
2310         a = Element('a')
2311         b = SubElement(a, 'b')
2312         c = SubElement(a, 'c')
2313         d = SubElement(b, 'd')
2314         self.assertEqual(
2315             [b, a],
2316             list(d.iterancestors(tag=('a', 'b'))))
2317         self.assertEqual(
2318             [b, a],
2319             list(d.iterancestors('a', 'b')))
2320
2321         self.assertEqual(
2322             [],
2323             list(d.iterancestors(tag=('w', 'x', 'y', 'z'))))
2324         self.assertEqual(
2325             [],
2326             list(d.iterancestors('w', 'x', 'y', 'z')))
2327
2328         self.assertEqual(
2329             [],
2330             list(d.iterancestors(tag=('d', 'x'))))
2331         self.assertEqual(
2332             [],
2333             list(d.iterancestors('d', 'x')))
2334
2335         self.assertEqual(
2336             [b, a],
2337             list(d.iterancestors(tag=('b', '*'))))
2338         self.assertEqual(
2339             [b, a],
2340             list(d.iterancestors('b', '*')))
2341
2342         self.assertEqual(
2343             [b],
2344             list(d.iterancestors(tag=('b', 'c'))))
2345         self.assertEqual(
2346             [b],
2347             list(d.iterancestors('b', 'c')))
2348
2349     def test_iterdescendants(self):
2350         Element = self.etree.Element
2351         SubElement = self.etree.SubElement
2352
2353         a = Element('a')
2354         b = SubElement(a, 'b')
2355         c = SubElement(a, 'c')
2356         d = SubElement(b, 'd')
2357         e = SubElement(c, 'e')
2358
2359         self.assertEqual(
2360             [b, d, c, e],
2361             list(a.iterdescendants()))
2362         self.assertEqual(
2363             [],
2364             list(d.iterdescendants()))
2365
2366     def test_iterdescendants_tag(self):
2367         Element = self.etree.Element
2368         SubElement = self.etree.SubElement
2369
2370         a = Element('a')
2371         b = SubElement(a, 'b')
2372         c = SubElement(a, 'c')
2373         d = SubElement(b, 'd')
2374         e = SubElement(c, 'e')
2375
2376         self.assertEqual(
2377             [],
2378             list(a.iterdescendants('a')))
2379         self.assertEqual(
2380             [],
2381             list(a.iterdescendants(tag='a')))
2382
2383         a2 = SubElement(e, 'a')
2384         self.assertEqual(
2385             [a2],
2386             list(a.iterdescendants('a')))
2387
2388         self.assertEqual(
2389             [a2],
2390             list(c.iterdescendants('a')))
2391         self.assertEqual(
2392             [a2],
2393             list(c.iterdescendants(tag='a')))
2394
2395     def test_iterdescendants_tag_multiple(self):
2396         Element = self.etree.Element
2397         SubElement = self.etree.SubElement
2398
2399         a = Element('a')
2400         b = SubElement(a, 'b')
2401         c = SubElement(a, 'c')
2402         d = SubElement(b, 'd')
2403         e = SubElement(c, 'e')
2404
2405         self.assertEqual(
2406             [b, e],
2407             list(a.iterdescendants(tag=('a', 'b', 'e'))))
2408         self.assertEqual(
2409             [b, e],
2410             list(a.iterdescendants('a', 'b', 'e')))
2411
2412         a2 = SubElement(e, 'a')
2413         self.assertEqual(
2414             [b, a2],
2415             list(a.iterdescendants(tag=('a', 'b'))))
2416         self.assertEqual(
2417             [b, a2],
2418             list(a.iterdescendants('a', 'b')))
2419
2420         self.assertEqual(
2421             [],
2422             list(c.iterdescendants(tag=('x', 'y', 'z'))))
2423         self.assertEqual(
2424             [],
2425             list(c.iterdescendants('x', 'y', 'z')))
2426
2427         self.assertEqual(
2428             [b, d, c, e, a2],
2429             list(a.iterdescendants(tag=('x', 'y', 'z', '*'))))
2430         self.assertEqual(
2431             [b, d, c, e, a2],
2432             list(a.iterdescendants('x', 'y', 'z', '*')))
2433
2434     def test_getroottree(self):
2435         Element = self.etree.Element
2436         SubElement = self.etree.SubElement
2437
2438         a = Element('a')
2439         b = SubElement(a, 'b')
2440         c = SubElement(a, 'c')
2441         d = SubElement(b, 'd')
2442         self.assertEqual(
2443             a,
2444             a.getroottree().getroot())
2445         self.assertEqual(
2446             a,
2447             b.getroottree().getroot())
2448         self.assertEqual(
2449             a,
2450             d.getroottree().getroot())
2451
2452     def test_getnext(self):
2453         Element    = self.etree.Element
2454         SubElement = self.etree.SubElement
2455
2456         a = Element('a')
2457         b = SubElement(a, 'b')
2458         c = SubElement(a, 'c')
2459         self.assertEqual(
2460             None,
2461             a.getnext())
2462         self.assertEqual(
2463             c,
2464             b.getnext())
2465         self.assertEqual(
2466             None,
2467             c.getnext())
2468
2469     def test_getprevious(self):
2470         Element    = self.etree.Element
2471         SubElement = self.etree.SubElement
2472
2473         a = Element('a')
2474         b = SubElement(a, 'b')
2475         c = SubElement(a, 'c')
2476         d = SubElement(b, 'd')
2477         self.assertEqual(
2478             None,
2479             a.getprevious())
2480         self.assertEqual(
2481             b,
2482             c.getprevious())
2483         self.assertEqual(
2484             None,
2485             b.getprevious())
2486
2487     def test_itersiblings(self):
2488         Element    = self.etree.Element
2489         SubElement = self.etree.SubElement
2490
2491         a = Element('a')
2492         b = SubElement(a, 'b')
2493         c = SubElement(a, 'c')
2494         d = SubElement(b, 'd')
2495         self.assertEqual(
2496             [],
2497             list(a.itersiblings()))
2498         self.assertEqual(
2499             [c],
2500             list(b.itersiblings()))
2501         self.assertEqual(
2502             [],
2503             list(c.itersiblings()))
2504         self.assertEqual(
2505             [b],
2506             list(c.itersiblings(preceding=True)))
2507         self.assertEqual(
2508             [],
2509             list(b.itersiblings(preceding=True)))
2510
2511     def test_itersiblings_tag(self):
2512         Element    = self.etree.Element
2513         SubElement = self.etree.SubElement
2514
2515         a = Element('a')
2516         b = SubElement(a, 'b')
2517         c = SubElement(a, 'c')
2518         d = SubElement(b, 'd')
2519         self.assertEqual(
2520             [],
2521             list(a.itersiblings(tag='XXX')))
2522         self.assertEqual(
2523             [c],
2524             list(b.itersiblings(tag='c')))
2525         self.assertEqual(
2526             [c],
2527             list(b.itersiblings(tag='*')))
2528         self.assertEqual(
2529             [b],
2530             list(c.itersiblings(preceding=True, tag='b')))
2531         self.assertEqual(
2532             [],
2533             list(c.itersiblings(preceding=True, tag='c')))
2534
2535     def test_itersiblings_tag_multiple(self):
2536         Element    = self.etree.Element
2537         SubElement = self.etree.SubElement
2538
2539         a = Element('a')
2540         b = SubElement(a, 'b')
2541         c = SubElement(a, 'c')
2542         d = SubElement(b, 'd')
2543         e = SubElement(a, 'e')
2544         self.assertEqual(
2545             [],
2546             list(a.itersiblings(tag=('XXX', 'YYY'))))
2547         self.assertEqual(
2548             [c, e],
2549             list(b.itersiblings(tag=('c', 'd', 'e'))))
2550         self.assertEqual(
2551             [b],
2552             list(c.itersiblings(preceding=True, tag=('b', 'b', 'c', 'd'))))
2553         self.assertEqual(
2554             [c, b],
2555             list(e.itersiblings(preceding=True, tag=('c', '*'))))
2556
2557     def test_parseid(self):
2558         parseid = self.etree.parseid
2559         XML     = self.etree.XML
2560         xml_text = _bytes('''
2561         <!DOCTYPE document [
2562         <!ELEMENT document (h1,p)*>
2563         <!ELEMENT h1 (#PCDATA)>
2564         <!ATTLIST h1 myid ID #REQUIRED>
2565         <!ELEMENT p  (#PCDATA)>
2566         <!ATTLIST p  someid ID #REQUIRED>
2567         ]>
2568         <document>
2569           <h1 myid="chapter1">...</h1>
2570           <p id="note1" class="note">...</p>
2571           <p>Regular paragraph.</p>
2572           <p xml:id="xmlid">XML:ID paragraph.</p>
2573           <p someid="warn1" class="warning">...</p>
2574         </document>
2575         ''')
2576
2577         tree, dic = parseid(BytesIO(xml_text))
2578         root = tree.getroot()
2579         root2 = XML(xml_text)
2580         self.assertEqual(self._writeElement(root),
2581                           self._writeElement(root2))
2582         expected = {
2583             "chapter1" : root[0],
2584             "xmlid"    : root[3],
2585             "warn1"    : root[4]
2586             }
2587         self.assertTrue("chapter1" in dic)
2588         self.assertTrue("warn1" in dic)
2589         self.assertTrue("xmlid" in dic)
2590         self._checkIDDict(dic, expected)
2591
2592     def test_XMLDTDID(self):
2593         XMLDTDID = self.etree.XMLDTDID
2594         XML      = self.etree.XML
2595         xml_text = _bytes('''
2596         <!DOCTYPE document [
2597         <!ELEMENT document (h1,p)*>
2598         <!ELEMENT h1 (#PCDATA)>
2599         <!ATTLIST h1 myid ID #REQUIRED>
2600         <!ELEMENT p  (#PCDATA)>
2601         <!ATTLIST p  someid ID #REQUIRED>
2602         ]>
2603         <document>
2604           <h1 myid="chapter1">...</h1>
2605           <p id="note1" class="note">...</p>
2606           <p>Regular paragraph.</p>
2607           <p xml:id="xmlid">XML:ID paragraph.</p>
2608           <p someid="warn1" class="warning">...</p>
2609         </document>
2610         ''')
2611
2612         root, dic = XMLDTDID(xml_text)
2613         root2 = XML(xml_text)
2614         self.assertEqual(self._writeElement(root),
2615                           self._writeElement(root2))
2616         expected = {
2617             "chapter1" : root[0],
2618             "xmlid"    : root[3],
2619             "warn1"    : root[4]
2620             }
2621         self.assertTrue("chapter1" in dic)
2622         self.assertTrue("warn1" in dic)
2623         self.assertTrue("xmlid" in dic)
2624         self._checkIDDict(dic, expected)
2625
2626     def test_XMLDTDID_empty(self):
2627         XMLDTDID = self.etree.XMLDTDID
2628         XML      = self.etree.XML
2629         xml_text = _bytes('''
2630         <document>
2631           <h1 myid="chapter1">...</h1>
2632           <p id="note1" class="note">...</p>
2633           <p>Regular paragraph.</p>
2634           <p someid="warn1" class="warning">...</p>
2635         </document>
2636         ''')
2637
2638         root, dic = XMLDTDID(xml_text)
2639         root2 = XML(xml_text)
2640         self.assertEqual(self._writeElement(root),
2641                           self._writeElement(root2))
2642         expected = {}
2643         self._checkIDDict(dic, expected)
2644
2645     def test_XMLDTDID_no_id_dict(self):
2646         XMLDTDID = self.etree.XMLDTDID
2647         XML      = self.etree.XML
2648         xml_text = _bytes('''
2649         <!DOCTYPE document [
2650         <!ELEMENT document (h1,p)*>
2651         <!ELEMENT h1 (#PCDATA)>
2652         <!ATTLIST h1 myid ID #REQUIRED>
2653         <!ELEMENT p  (#PCDATA)>
2654         <!ATTLIST p  someid ID #REQUIRED>
2655         ]>
2656         <document>
2657           <h1 myid="chapter1">...</h1>
2658           <p id="note1" class="note">...</p>
2659           <p>Regular paragraph.</p>
2660           <p xml:id="xmlid">XML:ID paragraph.</p>
2661           <p someid="warn1" class="warning">...</p>
2662         </document>
2663         ''')
2664
2665         parser = etree.XMLParser(collect_ids=False)
2666         root, dic = XMLDTDID(xml_text, parser=parser)
2667         root2 = XML(xml_text)
2668         self.assertEqual(self._writeElement(root),
2669                          self._writeElement(root2))
2670         self.assertFalse(dic)
2671         self._checkIDDict(dic, {})
2672
2673     def _checkIDDict(self, dic, expected):
2674         self.assertEqual(len(dic),
2675                           len(expected))
2676         self.assertEqual(sorted(dic.items()),
2677                           sorted(expected.items()))
2678         if sys.version_info < (3,):
2679             self.assertEqual(sorted(dic.iteritems()),
2680                               sorted(expected.iteritems()))
2681         self.assertEqual(sorted(dic.keys()),
2682                           sorted(expected.keys()))
2683         if sys.version_info < (3,):
2684             self.assertEqual(sorted(dic.iterkeys()),
2685                               sorted(expected.iterkeys()))
2686         if sys.version_info < (3,):
2687             self.assertEqual(sorted(dic.values()),
2688                               sorted(expected.values()))
2689             self.assertEqual(sorted(dic.itervalues()),
2690                               sorted(expected.itervalues()))
2691
2692     def test_register_namespace_xml(self):
2693         self.assertRaises(ValueError, self.etree.register_namespace,
2694                           "XML", "http://www.w3.org/XML/1998/namespace")
2695         self.assertRaises(ValueError, self.etree.register_namespace,
2696                           "xml", "http://www.w3.org/XML/2345")
2697         self.etree.register_namespace("xml", "http://www.w3.org/XML/1998/namespace")  # ok
2698
2699     def test_namespaces(self):
2700         etree = self.etree
2701
2702         r = {'foo': 'http://ns.infrae.com/foo'}
2703         e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2704         self.assertEqual(
2705             'foo',
2706             e.prefix)
2707         self.assertEqual(
2708             _bytes('<foo:bar xmlns:foo="http://ns.infrae.com/foo"></foo:bar>'),
2709             self._writeElement(e))
2710         
2711     def test_namespaces_default(self):
2712         etree = self.etree
2713
2714         r = {None: 'http://ns.infrae.com/foo'}
2715         e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2716         self.assertEqual(
2717             None,
2718             e.prefix)
2719         self.assertEqual(
2720             '{http://ns.infrae.com/foo}bar',
2721             e.tag)
2722         self.assertEqual(
2723             _bytes('<bar xmlns="http://ns.infrae.com/foo"></bar>'),
2724             self._writeElement(e))
2725
2726     def test_namespaces_default_and_other(self):
2727         etree = self.etree
2728
2729         r = {None: 'http://ns.infrae.com/foo', 'p': 'http://test/'}
2730         e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2731         self.assertEqual(None, e.prefix)
2732         self.assertEqual('{http://ns.infrae.com/foo}bar', e.tag)
2733         self.assertEqual(
2734             _bytes('<bar xmlns="http://ns.infrae.com/foo" xmlns:p="http://test/"></bar>'),
2735             self._writeElement(e))
2736
2737     def test_namespaces_default_and_attr(self):
2738         etree = self.etree
2739
2740         r = {None: 'http://ns.infrae.com/foo',
2741              'hoi': 'http://ns.infrae.com/hoi'}
2742         e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2743         e.set('{http://ns.infrae.com/hoi}test', 'value')
2744         self.assertEqual(
2745             _bytes('<bar xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi" hoi:test="value"></bar>'),
2746             self._writeElement(e))
2747
2748     def test_attribute_keeps_namespace_prefix_on_merge(self):
2749         etree = self.etree
2750
2751         root = etree.Element('{http://test/ns}root',
2752                              nsmap={None: 'http://test/ns'})
2753         sub = etree.Element('{http://test/ns}sub',
2754                             nsmap={'test': 'http://test/ns'})
2755
2756         sub.attrib['{http://test/ns}attr'] = 'value'
2757         self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2758         self.assertEqual(
2759             _bytes('<test:sub xmlns:test="http://test/ns" test:attr="value"/>'),
2760             etree.tostring(sub))
2761
2762         root.append(sub)
2763         self.assertEqual(
2764             _bytes('<root xmlns="http://test/ns">'
2765                    '<sub xmlns:test="http://test/ns" test:attr="value"/>'
2766                    '</root>'),
2767             etree.tostring(root))
2768
2769     def test_attribute_keeps_namespace_prefix_on_merge_with_nons(self):
2770         etree = self.etree
2771
2772         root = etree.Element('root')
2773         sub = etree.Element('{http://test/ns}sub',
2774                             nsmap={'test': 'http://test/ns'})
2775
2776         sub.attrib['{http://test/ns}attr'] = 'value'
2777         self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2778         self.assertEqual(
2779             _bytes('<test:sub xmlns:test="http://test/ns" test:attr="value"/>'),
2780             etree.tostring(sub))
2781
2782         root.append(sub)
2783         self.assertEqual(
2784             _bytes('<root>'
2785                    '<test:sub xmlns:test="http://test/ns" test:attr="value"/>'
2786                    '</root>'),
2787             etree.tostring(root))
2788
2789     def test_attribute_gets_namespace_prefix_on_merge_with_nons(self):
2790         etree = self.etree
2791
2792         root = etree.Element('root')
2793         sub = etree.Element('{http://test/ns}sub',
2794                             nsmap={None: 'http://test/ns'})
2795
2796         sub.attrib['{http://test/ns}attr'] = 'value'
2797         self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2798         self.assertEqual(
2799             _bytes('<sub xmlns="http://test/ns" '
2800                    'xmlns:ns0="http://test/ns" ns0:attr="value"/>'),
2801             etree.tostring(sub))
2802
2803         root.append(sub)
2804         self.assertEqual(
2805             _bytes('<root>'
2806                    '<sub xmlns="http://test/ns"'
2807                    ' xmlns:ns0="http://test/ns" ns0:attr="value"/>'
2808                    '</root>'),
2809             etree.tostring(root))
2810
2811     def test_attribute_gets_namespace_prefix_on_merge(self):
2812         etree = self.etree
2813
2814         root = etree.Element('{http://test/ns}root',
2815                              nsmap={'test': 'http://test/ns',
2816                                     None: 'http://test/ns'})
2817         sub = etree.Element('{http://test/ns}sub',
2818                             nsmap={None: 'http://test/ns'})
2819
2820         sub.attrib['{http://test/ns}attr'] = 'value'
2821         self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2822         self.assertEqual(
2823             _bytes('<sub xmlns="http://test/ns" '
2824                    'xmlns:ns0="http://test/ns" ns0:attr="value"/>'),
2825             etree.tostring(sub))
2826
2827         root.append(sub)
2828         self.assertEqual(
2829             _bytes('<test:root xmlns:test="http://test/ns" xmlns="http://test/ns">'
2830                    '<test:sub test:attr="value"/>'
2831                    '</test:root>'),
2832             etree.tostring(root))
2833
2834     def test_namespaces_elementtree(self):
2835         etree = self.etree
2836         r = {None: 'http://ns.infrae.com/foo',
2837              'hoi': 'http://ns.infrae.com/hoi'} 
2838         e = etree.Element('{http://ns.infrae.com/foo}z', nsmap=r)
2839         tree = etree.ElementTree(element=e)
2840         etree.SubElement(e, '{http://ns.infrae.com/hoi}x')
2841         self.assertEqual(
2842             _bytes('<z xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi"><hoi:x></hoi:x></z>'),
2843             self._writeElement(e))
2844
2845     def test_namespaces_default_copy_element(self):
2846         etree = self.etree
2847
2848         r = {None: 'http://ns.infrae.com/foo'}
2849         e1 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2850         e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2851
2852         e1.append(e2)
2853
2854         self.assertEqual(
2855             None,
2856             e1.prefix)
2857         self.assertEqual(
2858             None,
2859             e1[0].prefix)
2860         self.assertEqual(
2861             '{http://ns.infrae.com/foo}bar',
2862             e1.tag)
2863         self.assertEqual(
2864             '{http://ns.infrae.com/foo}bar',
2865             e1[0].tag)
2866
2867     def test_namespaces_copy_element(self):
2868         etree = self.etree
2869
2870         r = {None: 'http://ns.infrae.com/BAR'}
2871         e1 = etree.Element('{http://ns.infrae.com/BAR}bar', nsmap=r)
2872         e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2873
2874         e1.append(e2)
2875
2876         self.assertEqual(
2877             None,
2878             e1.prefix)
2879         self.assertNotEqual(
2880             None,
2881             e2.prefix)
2882         self.assertEqual(
2883             '{http://ns.infrae.com/BAR}bar',
2884             e1.tag)
2885         self.assertEqual(
2886             '{http://ns.infrae.com/foo}bar',
2887             e2.tag)
2888
2889     def test_namespaces_reuse_after_move(self):
2890         ns_href = "http://a.b.c"
2891         one = self.etree.fromstring(
2892             _bytes('<foo><bar xmlns:ns="%s"><ns:baz/></bar></foo>' % ns_href))
2893         baz = one[0][0]
2894
2895         two = self.etree.fromstring(
2896             _bytes('<root xmlns:ns="%s"/>' % ns_href))
2897         two.append(baz)
2898         del one # make sure the source document is deallocated
2899
2900         self.assertEqual('{%s}baz' % ns_href, baz.tag)
2901         self.assertEqual(
2902             _bytes('<root xmlns:ns="%s"><ns:baz/></root>' % ns_href),
2903             self.etree.tostring(two))
2904
2905     def test_namespace_cleanup(self):
2906         xml = _bytes(
2907             '<foo xmlns="F" xmlns:x="x">'
2908             '<bar xmlns:ns="NS" xmlns:b="b" xmlns="B">'
2909             '<ns:baz/>'
2910             '</bar></foo>'
2911         )
2912         root = self.etree.fromstring(xml)
2913         self.assertEqual(xml, self.etree.tostring(root))
2914         self.etree.cleanup_namespaces(root)
2915         self.assertEqual(
2916             _bytes('<foo xmlns="F"><bar xmlns:ns="NS" xmlns="B"><ns:baz/></bar></foo>'),
2917             self.etree.tostring(root))
2918
2919     def test_namespace_cleanup_attributes(self):
2920         xml = _bytes(
2921             '<foo xmlns="F" xmlns:x="X" xmlns:a="A">'
2922             '<bar xmlns:ns="NS" xmlns:b="b" xmlns="B">'
2923             '<ns:baz a:test="attr"/>'
2924             '</bar></foo>'
2925         )
2926         root = self.etree.fromstring(xml)
2927         self.assertEqual(xml, self.etree.tostring(root))
2928         self.etree.cleanup_namespaces(root)
2929         self.assertEqual(
2930             _bytes('<foo xmlns="F" xmlns:a="A">'
2931                    '<bar xmlns:ns="NS" xmlns="B">'
2932                    '<ns:baz a:test="attr"/>'
2933                    '</bar></foo>'),
2934             self.etree.tostring(root))
2935
2936     def test_namespace_cleanup_many(self):
2937         xml = ('<n12:foo ' +
2938                ' '.join('xmlns:n{n}="NS{n}"'.format(n=i) for i in range(100)) +
2939                '><n68:a/></n12:foo>').encode('utf8')
2940         root = self.etree.fromstring(xml)
2941         self.assertEqual(xml, self.etree.tostring(root))
2942         self.etree.cleanup_namespaces(root)
2943         self.assertEqual(
2944             b'<n12:foo xmlns:n12="NS12" xmlns:n68="NS68"><n68:a/></n12:foo>',
2945             self.etree.tostring(root))
2946
2947     def test_namespace_cleanup_deep(self):
2948         xml = ('<root>' +
2949                ''.join('<a xmlns:n{n}="NS{n}">'.format(n=i) for i in range(100)) +
2950                '<n64:x/>' + '</a>'*100 + '</root>').encode('utf8')
2951         root = self.etree.fromstring(xml)
2952         self.assertEqual(xml, self.etree.tostring(root))
2953         self.etree.cleanup_namespaces(root)
2954         self.assertEqual(
2955             b'<root>' + b'<a>'*64 + b'<a xmlns:n64="NS64">' + b'<a>'*35 +
2956             b'<n64:x/>' + b'</a>'*100 + b'</root>',
2957             self.etree.tostring(root))
2958
2959     def test_namespace_cleanup_deep_to_top(self):
2960         xml = ('<root>' +
2961                ''.join('<a xmlns:n{n}="NS{n}">'.format(n=i) for i in range(100)) +
2962                '<n64:x xmlns:a="A" a:attr="X"/>' +
2963                '</a>'*100 +
2964                '</root>').encode('utf8')
2965         root = self.etree.fromstring(xml)
2966         self.assertEqual(xml, self.etree.tostring(root))
2967         self.etree.cleanup_namespaces(root, top_nsmap={'n64': 'NS64'})
2968         self.assertEqual(
2969             b'<root xmlns:n64="NS64">' + b'<a>'*100 +
2970             b'<n64:x xmlns:a="A" a:attr="X"/>' + b'</a>'*100 + b'</root>',
2971             self.etree.tostring(root))
2972
2973     def test_namespace_cleanup_keep_prefixes(self):
2974         xml = ('<root xmlns:n64="NS64" xmlns:foo="FOO" xmlns:unused1="UNUSED" xmlns:no="NO">'
2975                '<a xmlns:unused2="UNUSED"><n64:x xmlns:a="A" a:attr="X"/></a>'
2976                '<foo>foo:bar</foo>'
2977                '</root>').encode('utf8')
2978         root = self.etree.fromstring(xml)
2979         self.assertEqual(xml, self.etree.tostring(root))
2980         self.etree.cleanup_namespaces(root, keep_ns_prefixes=['foo'])
2981         self.assertEqual(
2982             b'<root xmlns:n64="NS64" xmlns:foo="FOO">'
2983             b'<a><n64:x xmlns:a="A" a:attr="X"/></a>'
2984             b'<foo>foo:bar</foo>'
2985             b'</root>',
2986             self.etree.tostring(root))
2987
2988     def test_namespace_cleanup_keep_prefixes_top(self):
2989         xml = ('<root xmlns:n64="NS64" xmlns:unused1="UNUSED" xmlns:no="NO">'
2990                '<sub xmlns:foo="FOO">'
2991                '<a xmlns:unused2="UNUSED"><n64:x xmlns:a="A" a:attr="X"/></a>'
2992                '<foo>foo:bar</foo>'
2993                '</sub>'
2994                '</root>').encode('utf8')
2995         root = self.etree.fromstring(xml)
2996         self.assertEqual(xml, self.etree.tostring(root))
2997         self.etree.cleanup_namespaces(
2998             root,
2999             top_nsmap={'foo': 'FOO', 'unused1': 'UNUSED'},
3000             keep_ns_prefixes=['foo'])
3001         self.assertEqual(
3002             b'<root xmlns:n64="NS64" xmlns:foo="FOO">'
3003             b'<sub>'
3004             b'<a><n64:x xmlns:a="A" a:attr="X"/></a>'
3005             b'<foo>foo:bar</foo>'
3006             b'</sub>'
3007             b'</root>',
3008             self.etree.tostring(root))
3009
3010     def test_element_nsmap(self):
3011         etree = self.etree
3012
3013         r = {None: 'http://ns.infrae.com/foo',
3014              'hoi': 'http://ns.infrae.com/hoi'}
3015         e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
3016         self.assertEqual(
3017             r,
3018             e.nsmap)
3019
3020     def test_subelement_nsmap(self):
3021         etree = self.etree
3022
3023         re = {None: 'http://ns.infrae.com/foo',
3024              'hoi': 'http://ns.infrae.com/hoi'}
3025         e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=re)
3026
3027         rs = {None: 'http://ns.infrae.com/honk',
3028              'top': 'http://ns.infrae.com/top'}
3029         s = etree.SubElement(e, '{http://ns.infrae.com/honk}bar', nsmap=rs)
3030
3031         r = re.copy()
3032         r.update(rs)
3033         self.assertEqual(re, e.nsmap)
3034         self.assertEqual(r,  s.nsmap)
3035
3036     def test_html_prefix_nsmap(self):
3037         etree = self.etree
3038         el = etree.HTML('<hha:page-description>aa</hha:page-description>').find('.//page-description')
3039         self.assertEqual({'hha': None}, el.nsmap)
3040
3041     def test_getchildren(self):
3042         Element = self.etree.Element
3043         SubElement = self.etree.SubElement
3044
3045         a = Element('a')
3046         b = SubElement(a, 'b')
3047         c = SubElement(a, 'c')
3048         d = SubElement(b, 'd')
3049         e = SubElement(c, 'e')
3050         self.assertEqual(
3051             _bytes('<a><b><d></d></b><c><e></e></c></a>'),
3052             self.etree.tostring(a, method="c14n"))
3053         self.assertEqual(
3054             [b, c],
3055             a.getchildren())
3056         self.assertEqual(
3057             [d],
3058             b.getchildren())
3059         self.assertEqual(
3060             [],
3061             d.getchildren())
3062
3063     def test_getiterator(self):
3064         Element = self.etree.Element
3065         SubElement = self.etree.SubElement
3066
3067         a = Element('a')
3068         b = SubElement(a, 'b')
3069         c = SubElement(a, 'c')
3070         d = SubElement(b, 'd')
3071         e = SubElement(c, 'e')
3072
3073         self.assertEqual(
3074             [a, b, d, c, e],
3075             list(a.getiterator()))
3076         self.assertEqual(
3077             [d],
3078             list(d.getiterator()))
3079
3080     def test_getiterator_empty(self):
3081         Element = self.etree.Element
3082         SubElement = self.etree.SubElement
3083
3084         a = Element('a')
3085         b = SubElement(a, 'b')
3086         c = SubElement(a, 'c')
3087         d = SubElement(b, 'd')
3088         e = SubElement(c, 'e')
3089
3090         self.assertEqual(
3091             [],
3092             list(a.getiterator('none')))
3093         self.assertEqual(
3094             [],
3095             list(e.getiterator('none')))
3096         self.assertEqual(
3097             [e],
3098             list(e.getiterator()))
3099
3100     def test_getiterator_filter(self):
3101         Element = self.etree.Element
3102         SubElement = self.etree.SubElement
3103
3104         a = Element('a')
3105         b = SubElement(a, 'b')
3106         c = SubElement(a, 'c')
3107         d = SubElement(b, 'd')
3108         e = SubElement(c, 'e')
3109
3110         self.assertEqual(
3111             [a],
3112             list(a.getiterator('a')))
3113         a2 = SubElement(e, 'a')
3114         self.assertEqual(
3115             [a, a2],
3116             list(a.getiterator('a')))
3117         self.assertEqual(
3118             [a2],
3119             list(c.getiterator('a')))
3120
3121     def test_getiterator_filter_all(self):
3122         Element = self.etree.Element
3123         SubElement = self.etree.SubElement
3124
3125         a = Element('a')
3126         b = SubElement(a, 'b')
3127         c = SubElement(a, 'c')
3128         d = SubElement(b, 'd')
3129         e = SubElement(c, 'e')
3130
3131         self.assertEqual(
3132             [a, b, d, c, e],
3133             list(a.getiterator('*')))
3134
3135     def test_getiterator_filter_comment(self):
3136         Element = self.etree.Element
3137         Comment = self.etree.Comment
3138         SubElement = self.etree.SubElement
3139
3140         a = Element('a')
3141         b = SubElement(a, 'b')
3142         comment_b = Comment("TEST-b")
3143         b.append(comment_b)
3144
3145         self.assertEqual(
3146             [comment_b],
3147             list(a.getiterator(Comment)))
3148
3149         comment_a = Comment("TEST-a")
3150         a.append(comment_a)
3151
3152         self.assertEqual(
3153             [comment_b, comment_a],
3154             list(a.getiterator(Comment)))
3155
3156         self.assertEqual(
3157             [comment_b],
3158             list(b.getiterator(Comment)))
3159
3160     def test_getiterator_filter_pi(self):
3161         Element = self.etree.Element
3162         PI = self.etree.ProcessingInstruction
3163         SubElement = self.etree.SubElement
3164
3165         a = Element('a')
3166         b = SubElement(a, 'b')
3167         pi_b = PI("TEST-b")
3168         b.append(pi_b)
3169
3170         self.assertEqual(
3171             [pi_b],
3172             list(a.getiterator(PI)))
3173
3174         pi_a = PI("TEST-a")
3175         a.append(pi_a)
3176
3177         self.assertEqual(
3178             [pi_b, pi_a],
3179             list(a.getiterator(PI)))
3180
3181         self.assertEqual(
3182             [pi_b],
3183             list(b.getiterator(PI)))
3184
3185     def test_getiterator_with_text(self):
3186         Element = self.etree.Element
3187         SubElement = self.etree.SubElement
3188
3189         a = Element('a')
3190         a.text = 'a'
3191         b = SubElement(a, 'b')
3192         b.text = 'b'
3193         b.tail = 'b1'
3194         c = SubElement(a, 'c')
3195         c.text = 'c'
3196         c.tail = 'c1'
3197         d = SubElement(b, 'd')
3198         d.text = 'd'
3199         d.tail = 'd1'
3200         e = SubElement(c, 'e')
3201         e.text = 'e'
3202         e.tail = 'e1'
3203
3204         self.assertEqual(
3205             [a, b, d, c, e],
3206             list(a.getiterator()))
3207         #self.assertEqual(
3208         #    [d],
3209         #    list(d.getiterator()))
3210
3211     def test_getiterator_filter_with_text(self):
3212         Element = self.etree.Element
3213         SubElement = self.etree.SubElement
3214
3215         a = Element('a')
3216         a.text = 'a'
3217         b = SubElement(a, 'b')
3218         b.text = 'b'
3219         b.tail = 'b1'
3220         c = SubElement(a, 'c')
3221         c.text = 'c'
3222         c.tail = 'c1'
3223         d = SubElement(b, 'd')
3224         d.text = 'd'
3225         d.tail = 'd1'
3226         e = SubElement(c, 'e')
3227         e.text = 'e'
3228         e.tail = 'e1'
3229
3230         self.assertEqual(
3231             [a],
3232             list(a.getiterator('a')))
3233         a2 = SubElement(e, 'a')
3234         self.assertEqual(
3235             [a, a2],
3236             list(a.getiterator('a')))
3237         self.assertEqual(
3238             [a2],
3239             list(e.getiterator('a')))
3240
3241     def test_getiterator_filter_multiple(self):
3242         Element = self.etree.Element
3243         SubElement = self.etree.SubElement
3244
3245         a = Element('a')
3246         b = SubElement(a, 'b')
3247         c = SubElement(a, 'c')
3248         d = SubElement(b, 'd')
3249         e = SubElement(c, 'e')
3250         f = SubElement(c, 'f')
3251
3252         self.assertEqual(
3253             [a, b],
3254                list(a.getiterator('a', 'b')))
3255         self.assertEqual(
3256             [],
3257               list(a.getiterator('x', 'y')))
3258         self.assertEqual(
3259             [a, f],
3260               list(a.getiterator('f', 'a')))
3261         self.assertEqual(
3262             [c, e, f],
3263                list(c.getiterator('c', '*', 'a')))
3264         self.assertEqual(
3265             [],
3266                   list(a.getiterator( (), () )))
3267
3268     def test_getiterator_filter_multiple_tuple(self):
3269         Element = self.etree.Element
3270         SubElement = self.etree.SubElement
3271
3272         a = Element('a')
3273         b = SubElement(a, 'b')
3274         c = SubElement(a, 'c')
3275         d = SubElement(b, 'd')
3276         e = SubElement(c, 'e')
3277         f = SubElement(c, 'f')
3278
3279         self.assertEqual(
3280             [a, b],
3281                   list(a.getiterator( ('a', 'b') )))
3282         self.assertEqual(
3283             [],
3284               list(a.getiterator( ('x', 'y') )))
3285         self.assertEqual(
3286             [a, f],
3287                   list(a.getiterator( ('f', 'a') )))
3288         self.assertEqual(
3289             [c, e, f],
3290                      list(c.getiterator( ('c', '*', 'a') )))
3291         self.assertEqual(
3292             [],
3293               list(a.getiterator( () )))
3294
3295     def test_getiterator_filter_namespace(self):
3296         Element = self.etree.Element
3297         SubElement = self.etree.SubElement
3298
3299         a = Element('{a}a')
3300         b = SubElement(a, '{a}b')
3301         c = SubElement(a, '{a}c')
3302         d = SubElement(b, '{b}d')
3303         e = SubElement(c, '{a}e')
3304         f = SubElement(c, '{b}f')
3305         g = SubElement(c, 'g')
3306
3307         self.assertEqual(
3308             [a],
3309             list(a.getiterator('{a}a')))
3310         self.assertEqual(
3311             [],
3312             list(a.getiterator('{b}a')))
3313         self.assertEqual(
3314             [],
3315             list(a.getiterator('a')))
3316         self.assertEqual(
3317             [a,b,d,c,e,f,g],
3318             list(a.getiterator('*')))
3319         self.assertEqual(
3320             [f],
3321             list(c.getiterator('{b}*')))
3322         self.assertEqual(
3323             [d, f],
3324             list(a.getiterator('{b}*')))
3325         self.assertEqual(
3326             [g],
3327             list(a.getiterator('g')))
3328         self.assertEqual(
3329             [g],
3330             list(a.getiterator('{}g')))
3331         self.assertEqual(
3332             [g],
3333             list(a.getiterator('{}*')))
3334
3335     def test_getiterator_filter_local_name(self):
3336         Element = self.etree.Element
3337         Comment = self.etree.Comment
3338         SubElement = self.etree.SubElement
3339
3340         a = Element('{a}a')
3341         b = SubElement(a, '{nsA}b')
3342         c = SubElement(b, '{nsB}b')
3343         d = SubElement(a, 'b')
3344         e = SubElement(a, '{nsA}e')
3345         f = SubElement(e, '{nsB}e')
3346         g = SubElement(e, 'e')
3347         a.append(Comment('test'))
3348
3349         self.assertEqual(
3350             [b, c, d],
3351             list(a.getiterator('{*}b')))
3352         self.assertEqual(
3353             [e, f, g],
3354             list(a.getiterator('{*}e')))
3355         self.assertEqual(
3356             [a, b, c, d, e, f, g],
3357             list(a.getiterator('{*}*')))
3358
3359     def test_getiterator_filter_entities(self):
3360         Element = self.etree.Element
3361         Entity = self.etree.Entity
3362         SubElement = self.etree.SubElement
3363
3364         a = Element('a')
3365         b = SubElement(a, 'b')
3366         entity_b = Entity("TEST-b")
3367         b.append(entity_b)
3368
3369         self.assertEqual(
3370             [entity_b],
3371             list(a.getiterator(Entity)))
3372
3373         entity_a = Entity("TEST-a")
3374         a.append(entity_a)
3375
3376         self.assertEqual(
3377             [entity_b, entity_a],
3378             list(a.getiterator(Entity)))
3379
3380         self.assertEqual(
3381             [entity_b],
3382             list(b.getiterator(Entity)))
3383
3384     def test_getiterator_filter_element(self):
3385         Element = self.etree.Element
3386         Comment = self.etree.Comment
3387         PI = self.etree.PI
3388         SubElement = self.etree.SubElement
3389
3390         a = Element('a')
3391         b = SubElement(a, 'b')
3392         a.append(Comment("test"))
3393         a.append(PI("pi", "content"))
3394         c = SubElement(a, 'c')
3395
3396         self.assertEqual(
3397             [a, b, c],
3398             list(a.getiterator(Element)))
3399
3400     def test_getiterator_filter_all_comment_pi(self):
3401         # ElementTree iterates over everything here
3402         Element = self.etree.Element
3403         Comment = self.etree.Comment
3404         PI = self.etree.PI
3405         SubElement = self.etree.SubElement
3406
3407         a = Element('a')
3408         b = SubElement(a, 'b')
3409         a.append(Comment("test"))
3410         a.append(PI("pi", "content"))
3411         c = SubElement(a, 'c')
3412
3413         self.assertEqual(
3414             [a, b, c],
3415             list(a.getiterator('*')))
3416
3417     def test_elementtree_getiterator(self):
3418         Element = self.etree.Element
3419         SubElement = self.etree.SubElement
3420         ElementTree = self.etree.ElementTree
3421
3422         a = Element('a')
3423         b = SubElement(a, 'b')
3424         c = SubElement(a, 'c')
3425         d = SubElement(b, 'd')
3426         e = SubElement(c, 'e')
3427         t = ElementTree(element=a)
3428
3429         self.assertEqual(
3430             [a, b, d, c, e],
3431             list(t.getiterator()))
3432
3433     def test_elementtree_getiterator_filter(self):
3434         Element = self.etree.Element
3435         SubElement = self.etree.SubElement
3436         ElementTree = self.etree.ElementTree
3437         a = Element('a')
3438         b = SubElement(a, 'b')
3439         c = SubElement(a, 'c')
3440         d = SubElement(b, 'd')
3441         e = SubElement(c, 'e')
3442         t = ElementTree(element=a)
3443
3444         self.assertEqual(
3445             [a],
3446             list(t.getiterator('a')))
3447         a2 = SubElement(e, 'a')
3448         self.assertEqual(
3449             [a, a2],
3450             list(t.getiterator('a')))
3451
3452     def test_elementtree_getelementpath(self):
3453         a  = etree.Element("a")
3454         b  = etree.SubElement(a, "b")
3455         c  = etree.SubElement(a, "c")
3456         d1 = etree.SubElement(c, "d")
3457         d2 = etree.SubElement(c, "d")
3458         c.text = d1.text = 'TEXT'
3459
3460         tree = etree.ElementTree(a)
3461         self.assertEqual('.', tree.getelementpath(a))
3462         self.assertEqual('c/d[1]', tree.getelementpath(d1))
3463         self.assertEqual('c/d[2]', tree.getelementpath(d2))
3464
3465         self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
3466         self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3467
3468         tree = etree.ElementTree(c)
3469         self.assertEqual('.', tree.getelementpath(c))
3470         self.assertEqual('d[2]', tree.getelementpath(d2))
3471         self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3472
3473         tree = etree.ElementTree(b)  # not a parent of a/c/d1/d2
3474         self.assertEqual('.', tree.getelementpath(b))
3475         self.assertRaises(ValueError, tree.getelementpath, a)
3476         self.assertRaises(ValueError, tree.getelementpath, c)
3477         self.assertRaises(ValueError, tree.getelementpath, d2)
3478
3479     def test_elementtree_getelementpath_ns(self):
3480         a  = etree.Element("{http://ns1/}a")
3481         b  = etree.SubElement(a, "{http://ns1/}b")
3482         c  = etree.SubElement(a, "{http://ns1/}c")
3483         d1 = etree.SubElement(c, "{http://ns1/}d")
3484         d2 = etree.SubElement(c, "{http://ns2/}d")
3485         d3 = etree.SubElement(c, "{http://ns1/}d")
3486
3487         tree = etree.ElementTree(a)
3488         self.assertEqual('.', tree.getelementpath(a))
3489         self.assertEqual('{http://ns1/}c/{http://ns1/}d[1]',
3490                          tree.getelementpath(d1))
3491         self.assertEqual('{http://ns1/}c/{http://ns2/}d',
3492                          tree.getelementpath(d2))
3493         self.assertEqual('{http://ns1/}c/{http://ns1/}d[2]',
3494                          tree.getelementpath(d3))
3495
3496         self.assertEqual(a, tree.find(tree.getelementpath(a)))
3497         self.assertEqual(b, tree.find(tree.getelementpath(b)))
3498         self.assertEqual(c, tree.find(tree.getelementpath(c)))
3499         self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
3500         self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3501         self.assertEqual(d3, tree.find(tree.getelementpath(d3)))
3502
3503         tree = etree.ElementTree(c)
3504         self.assertEqual('{http://ns1/}d[1]', tree.getelementpath(d1))
3505         self.assertEqual('{http://ns2/}d', tree.getelementpath(d2))
3506         self.assertEqual('{http://ns1/}d[2]', tree.getelementpath(d3))
3507         self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
3508         self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3509         self.assertEqual(d3, tree.find(tree.getelementpath(d3)))
3510
3511         tree = etree.ElementTree(b)  # not a parent of d1/d2
3512         self.assertRaises(ValueError, tree.getelementpath, d1)
3513         self.assertRaises(ValueError, tree.getelementpath, d2)
3514
3515     def test_elementtree_iter_qname(self):
3516         XML = self.etree.XML
3517         ElementTree = self.etree.ElementTree
3518         QName = self.etree.QName
3519         tree = ElementTree(XML(
3520                 _bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><b/></a>')))
3521         self.assertEqual(
3522             list(tree.iter(QName("b"))),
3523             list(tree.iter("b")),
3524         )
3525         self.assertEqual(
3526             list(tree.iter(QName("X", "b"))),
3527             list(tree.iter("{X}b")),
3528         )
3529
3530         self.assertEqual(
3531             [e.tag for e in tree.iter(QName("X", "b"), QName("b"))],
3532             ['{X}b', 'b', '{X}b', 'b', 'b']
3533         )
3534         self.assertEqual(
3535             list(tree.iter(QName("X", "b"), QName("b"))),
3536             list(tree.iter("{X}b", "b"))
3537         )
3538
3539     def test_elementtree_find_qname(self):
3540         XML = self.etree.XML
3541         ElementTree = self.etree.ElementTree
3542         QName = self.etree.QName
3543         tree = ElementTree(XML(_bytes('<a><b><c/></b><b/><c><b/></c></a>')))
3544         self.assertEqual(tree.find(QName("c")), tree.getroot()[2])
3545
3546     def test_elementtree_findall_qname(self):
3547         XML = self.etree.XML
3548         ElementTree = self.etree.ElementTree
3549         QName = self.etree.QName
3550         tree = ElementTree(XML(_bytes('<a><b><c/></b><b/><c><b/></c></a>')))
3551         self.assertEqual(len(list(tree.findall(QName("c")))), 1)
3552
3553     def test_elementtree_findall_ns_qname(self):
3554         XML = self.etree.XML
3555         ElementTree = self.etree.ElementTree
3556         QName = self.etree.QName
3557         tree = ElementTree(XML(
3558                 _bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><b/></a>')))
3559         self.assertEqual(len(list(tree.findall(QName("b")))), 2)
3560         self.assertEqual(len(list(tree.findall(QName("X", "b")))), 1)
3561
3562     def test_findall_ns(self):
3563         XML = self.etree.XML
3564         root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><b/></a>'))
3565         self.assertEqual(len(root.findall(".//{X}b")), 2)
3566         self.assertEqual(len(root.findall(".//{X}*")), 2)
3567         self.assertEqual(len(root.findall(".//b")), 3)
3568
3569     def test_findall_different_nsmaps(self):
3570         XML = self.etree.XML
3571         root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>'))
3572         nsmap = {'xx': 'X'}
3573         self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
3574         self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 2)
3575         self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
3576         nsmap = {'xx': 'Y'}
3577         self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
3578         self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 1)
3579         self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
3580
3581     def test_findall_empty_prefix(self):
3582         XML = self.etree.XML
3583         root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>'))
3584         nsmap = {'xx': 'X'}
3585         self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
3586         nsmap = {'xx': 'X', None: 'Y'}
3587         self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1)
3588         nsmap = {'xx': 'X', '': 'Y'}
3589         self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1)
3590
3591     def test_findall_syntax_error(self):
3592         XML = self.etree.XML
3593         root = XML(_bytes('<a><b><c/></b><b/><c><b/><b/></c><b/></a>'))
3594         self.assertRaises(SyntaxError, root.findall, '')
3595         self.assertRaises(SyntaxError, root.findall, '//')  # absolute path on Element
3596         self.assertRaises(SyntaxError, root.findall, './//')
3597
3598     def test_index(self):
3599         etree = self.etree
3600         e = etree.Element('foo')
3601         for i in range(10):
3602             etree.SubElement(e, 'a%s' % i)
3603         for i in range(10):
3604             self.assertEqual(
3605                 i,
3606                 e.index(e[i]))
3607         self.assertEqual(
3608             3, e.index(e[3], 3))
3609         self.assertRaises(
3610             ValueError, e.index, e[3], 4)
3611         self.assertRaises(
3612             ValueError, e.index, e[3], 0, 2)
3613         self.assertRaises(
3614             ValueError, e.index, e[8], 0, -3)
3615         self.assertRaises(
3616             ValueError, e.index, e[8], -5, -3)
3617         self.assertEqual(
3618             8, e.index(e[8], 0, -1))
3619         self.assertEqual(
3620             8, e.index(e[8], -12, -1))
3621         self.assertEqual(
3622             0, e.index(e[0], -12, -1))
3623
3624     def test_replace(self):
3625         etree = self.etree
3626         e = etree.Element('foo')
3627         for i in range(10):
3628             el = etree.SubElement(e, 'a%s' % i)
3629             el.text = "text%d" % i
3630             el.tail = "tail%d" % i
3631
3632         child0 = e[0]
3633         child1 = e[1]
3634         child2 = e[2]
3635
3636         e.replace(e[0], e[1])
3637         self.assertEqual(
3638             9, len(e))
3639         self.assertEqual(
3640             child1, e[0])
3641         self.assertEqual(
3642             child1.text, "text1")
3643         self.assertEqual(
3644             child1.tail, "tail1")
3645         self.assertEqual(
3646             child0.tail, "tail0")
3647         self.assertEqual(
3648             child2, e[1])
3649
3650         e.replace(e[-1], e[0])
3651         self.assertEqual(
3652             child1, e[-1])
3653         self.assertEqual(
3654             child1.text, "text1")
3655         self.assertEqual(
3656             child1.tail, "tail1")
3657         self.assertEqual(
3658             child2, e[0])
3659
3660     def test_replace_new(self):
3661         etree = self.etree
3662         e = etree.Element('foo')
3663         for i in range(10):
3664             etree.SubElement(e, 'a%s' % i)
3665
3666         new_element = etree.Element("test")
3667         new_element.text = "TESTTEXT"
3668         new_element.tail = "TESTTAIL"
3669         child1 = e[1]
3670         e.replace(e[0], new_element)
3671         self.assertEqual(
3672             new_element, e[0])
3673         self.assertEqual(
3674             "TESTTEXT",
3675             e[0].text)
3676         self.assertEqual(
3677             "TESTTAIL",
3678             e[0].tail)
3679         self.assertEqual(
3680             child1, e[1])
3681
3682     def test_setslice_all_reversed(self):
3683         Element = self.etree.Element
3684         SubElement = self.etree.SubElement
3685
3686         a = Element('a')
3687
3688         e = Element('e')
3689         f = Element('f')
3690         g = Element('g')
3691
3692         a[:] = [e, f, g]
3693         self.assertEqual(
3694             [e, f, g],
3695             list(a))
3696
3697         a[::-1] = [e, f, g]
3698         self.assertEqual(
3699             [g, f, e],
3700             list(a))
3701
3702     def test_setslice_step(self):
3703         Element = self.etree.Element
3704         SubElement = self.etree.SubElement
3705
3706         a = Element('a')
3707         b = SubElement(a, 'b')
3708         c = SubElement(a, 'c')
3709         d = SubElement(a, 'd')
3710         e = SubElement(a, 'e')
3711
3712         x = Element('x')
3713         y = Element('y')
3714
3715         a[1::2] = [x, y]
3716         self.assertEqual(
3717             [b, x, d, y],
3718             list(a))
3719
3720     def test_setslice_step_negative(self):
3721         Element = self.etree.Element
3722         SubElement = self.etree.SubElement
3723
3724         a = Element('a')
3725         b = SubElement(a, 'b')
3726         c = SubElement(a, 'c')
3727         d = SubElement(a, 'd')
3728         e = SubElement(a, 'e')
3729
3730         x = Element('x')
3731         y = Element('y')
3732
3733         a[1::-1] = [x, y]
3734         self.assertEqual(
3735             [y, x, d, e],
3736             list(a))
3737
3738     def test_setslice_step_negative2(self):
3739         Element = self.etree.Element
3740         SubElement = self.etree.SubElement
3741
3742         a = Element('a')
3743         b = SubElement(a, 'b')
3744         c = SubElement(a, 'c')
3745         d = SubElement(a, 'd')
3746         e = SubElement(a, 'e')
3747
3748         x = Element('x')
3749         y = Element('y')
3750
3751         a[::-2] = [x, y]
3752         self.assertEqual(
3753             [b, y, d, x],
3754             list(a))
3755
3756     def test_setslice_step_overrun(self):
3757         Element = self.etree.Element
3758         SubElement = self.etree.SubElement
3759         try:
3760             slice
3761         except NameError:
3762             print("slice() not found")
3763             return
3764
3765         a = Element('a')
3766         b = SubElement(a, 'b')
3767         c = SubElement(a, 'c')
3768         d = SubElement(a, 'd')
3769         e = SubElement(a, 'e')
3770
3771         x = Element('x')
3772         y = Element('y')
3773         z = Element('z')
3774
3775         self.assertRaises(
3776             ValueError,
3777             operator.setitem, a, slice(1,None,2), [x, y, z])
3778
3779         self.assertEqual(
3780             [b, c, d, e],
3781             list(a))
3782
3783     def test_sourceline_XML(self):
3784         XML = self.etree.XML
3785         root = XML(_bytes('''<?xml version="1.0"?>
3786         <root><test>
3787
3788         <bla/></test>
3789         </root>
3790         '''))
3791
3792         self.assertEqual(
3793             [2, 2, 4],
3794             [ el.sourceline for el in root.getiterator() ])
3795
3796     def test_large_sourceline_XML(self):
3797         XML = self.etree.XML
3798         root = XML(_bytes(
3799             '<?xml version="1.0"?>\n'
3800             '<root>' + '\n' * 65536 +
3801             '<p>' + '\n' * 65536 + '</p>\n' +
3802             '<br/>\n'
3803             '</root>'))
3804
3805         if self.etree.LIBXML_VERSION >= (2, 9):
3806             expected = [2, 131074, 131076]
3807         else:
3808             expected = [2, 65535, 65535]
3809
3810         self.assertEqual(expected, [el.sourceline for el in root.iter()])
3811
3812     def test_sourceline_parse(self):
3813         parse = self.etree.parse
3814         tree = parse(fileInTestDir('include/test_xinclude.xml'))
3815
3816         self.assertEqual(
3817             [1, 2, 3],
3818             [ el.sourceline for el in tree.getiterator() ])
3819
3820     def test_sourceline_iterparse_end(self):
3821         iterparse = self.etree.iterparse
3822         lines = [ el.sourceline for (event, el) in 
3823                   iterparse(fileInTestDir('include/test_xinclude.xml')) ]
3824
3825         self.assertEqual(
3826             [2, 3, 1],
3827             lines)
3828
3829     def test_sourceline_iterparse_start(self):
3830         iterparse = self.etree.iterparse
3831         lines = [ el.sourceline for (event, el) in 
3832                   iterparse(fileInTestDir('include/test_xinclude.xml'),
3833                             events=("start",)) ]
3834
3835         self.assertEqual(
3836             [1, 2, 3],
3837             lines)
3838
3839     def test_sourceline_element(self):
3840         Element = self.etree.Element
3841         SubElement = self.etree.SubElement
3842         el = Element("test")
3843         self.assertEqual(None, el.sourceline)
3844
3845         child = SubElement(el, "test")
3846         self.assertEqual(None, el.sourceline)
3847         self.assertEqual(None, child.sourceline)
3848
3849     def test_XML_base_url_docinfo(self):
3850         etree = self.etree
3851         root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3852         docinfo = root.getroottree().docinfo
3853         self.assertEqual(docinfo.URL, "http://no/such/url")
3854
3855     def test_XML_set_base_url_docinfo(self):
3856         etree = self.etree
3857         root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3858         docinfo = root.getroottree().docinfo
3859         self.assertEqual(docinfo.URL, "http://no/such/url")
3860         docinfo.URL = "https://secret/url"
3861         self.assertEqual(docinfo.URL, "https://secret/url")
3862
3863     def test_parse_stringio_base_url(self):
3864         etree = self.etree
3865         tree = etree.parse(BytesIO("<root/>"), base_url="http://no/such/url")
3866         docinfo = tree.docinfo
3867         self.assertEqual(docinfo.URL, "http://no/such/url")
3868
3869     def test_parse_base_url_docinfo(self):
3870         etree = self.etree
3871         tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
3872                            base_url="http://no/such/url")
3873         docinfo = tree.docinfo
3874         self.assertEqual(docinfo.URL, "http://no/such/url")
3875
3876     def test_HTML_base_url_docinfo(self):
3877         etree = self.etree
3878         root = etree.HTML(_bytes("<html/>"), base_url="http://no/such/url")
3879         docinfo = root.getroottree().docinfo
3880         self.assertEqual(docinfo.URL, "http://no/such/url")
3881
3882     def test_docinfo_public(self):
3883         etree = self.etree
3884         xml_header = '<?xml version="1.0" encoding="ascii"?>'
3885         pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
3886         sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
3887         doctype_string = '<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id)
3888
3889         xml = _bytes(xml_header + doctype_string + '<html><body></body></html>')
3890
3891         tree = etree.parse(BytesIO(xml))
3892         docinfo = tree.docinfo
3893         self.assertEqual(docinfo.encoding,    "ascii")
3894         self.assertEqual(docinfo.xml_version, "1.0")
3895         self.assertEqual(docinfo.public_id,   pub_id)
3896         self.assertEqual(docinfo.system_url,  sys_id)
3897         self.assertEqual(docinfo.root_name,   'html')
3898         self.assertEqual(docinfo.doctype, doctype_string)
3899
3900     def test_docinfo_system(self):
3901         etree = self.etree
3902         xml_header = '<?xml version="1.0" encoding="UTF-8"?>'
3903         sys_id = "some.dtd"
3904         doctype_string = '<!DOCTYPE html SYSTEM "%s">' % sys_id
3905         xml = _bytes(xml_header + doctype_string + '<html><body></body></html>')
3906
3907         tree = etree.parse(BytesIO(xml))
3908         docinfo = tree.docinfo
3909         self.assertEqual(docinfo.encoding,    "UTF-8")
3910         self.assertEqual(docinfo.xml_version, "1.0")
3911         self.assertEqual(docinfo.public_id,   None)
3912         self.assertEqual(docinfo.system_url,  sys_id)
3913         self.assertEqual(docinfo.root_name,   'html')
3914         self.assertEqual(docinfo.doctype, doctype_string)
3915
3916     def test_docinfo_empty(self):
3917         etree = self.etree
3918         xml = _bytes('<html><body></body></html>')
3919         tree = etree.parse(BytesIO(xml))
3920         docinfo = tree.docinfo
3921         self.assertEqual(docinfo.encoding,    "UTF-8")
3922         self.assertEqual(docinfo.xml_version, "1.0")
3923         self.assertEqual(docinfo.public_id,   None)
3924         self.assertEqual(docinfo.system_url,  None)
3925         self.assertEqual(docinfo.root_name,   'html')
3926         self.assertEqual(docinfo.doctype, '')
3927
3928     def test_docinfo_name_only(self):
3929         etree = self.etree
3930         xml = _bytes('<!DOCTYPE root><root></root>')
3931         tree = etree.parse(BytesIO(xml))
3932         docinfo = tree.docinfo
3933         self.assertEqual(docinfo.encoding,    "UTF-8")
3934         self.assertEqual(docinfo.xml_version, "1.0")
3935         self.assertEqual(docinfo.public_id,   None)
3936         self.assertEqual(docinfo.system_url,  None)
3937         self.assertEqual(docinfo.root_name,   'root')
3938         self.assertEqual(docinfo.doctype, '<!DOCTYPE root>')
3939
3940     def test_doctype_name_only_roundtrip(self):
3941         etree = self.etree
3942         xml = _bytes('<!DOCTYPE root>\n<root/>')
3943         tree = etree.parse(BytesIO(xml))
3944         self.assertEqual(xml, etree.tostring(tree))
3945
3946     def test_doctype_output_override(self):
3947         etree = self.etree
3948         pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
3949         sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
3950         doctype_string = _bytes('<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id))
3951
3952         xml = _bytes('<!DOCTYPE root>\n<root/>')
3953         tree = etree.parse(BytesIO(xml))
3954         self.assertEqual(xml.replace(_bytes('<!DOCTYPE root>'), doctype_string),
3955                           etree.tostring(tree, doctype=doctype_string))
3956
3957     def test_xml_base(self):
3958         etree = self.etree
3959         root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3960         self.assertEqual(root.base, "http://no/such/url")
3961         self.assertEqual(
3962             root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
3963         root.base = "https://secret/url"
3964         self.assertEqual(root.base, "https://secret/url")
3965         self.assertEqual(
3966             root.get('{http://www.w3.org/XML/1998/namespace}base'),
3967             "https://secret/url")
3968
3969     def test_xml_base_attribute(self):
3970         etree = self.etree
3971         root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3972         self.assertEqual(root.base, "http://no/such/url")
3973         self.assertEqual(
3974             root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
3975         root.set('{http://www.w3.org/XML/1998/namespace}base',
3976                  "https://secret/url")
3977         self.assertEqual(root.base, "https://secret/url")
3978         self.assertEqual(
3979             root.get('{http://www.w3.org/XML/1998/namespace}base'),
3980             "https://secret/url")
3981
3982     def test_html_base(self):
3983         etree = self.etree
3984         root = etree.HTML(_bytes("<html><body></body></html>"),
3985                           base_url="http://no/such/url")
3986         self.assertEqual(root.base, "http://no/such/url")
3987
3988     def test_html_base_tag(self):
3989         etree = self.etree
3990         root = etree.HTML(_bytes('<html><head><base href="http://no/such/url"></head></html>'))
3991         self.assertEqual(root.base, "http://no/such/url")
3992
3993     def test_indent(self):
3994         ET = self.etree
3995         elem = ET.XML("<root></root>")
3996         ET.indent(elem)
3997         self.assertEqual(ET.tostring(elem), b'<root/>')
3998
3999         elem = ET.XML("<html><body>text</body></html>")
4000         ET.indent(elem)
4001         self.assertEqual(ET.tostring(elem), b'<html>\n  <body>text</body>\n</html>')
4002
4003         elem = ET.XML("<html> <body>text</body>  </html>")
4004         ET.indent(elem)
4005         self.assertEqual(ET.tostring(elem), b'<html>\n  <body>text</body>\n</html>')
4006
4007         elem = ET.XML("<html> <body>text</body>   </html>")
4008         ET.indent(elem)
4009         self.assertEqual(ET.tostring(elem), b'<html>\n  <body>text</body>\n</html>')
4010
4011         elem = ET.XML("<html><body>text</body>tail</html>")
4012         ET.indent(elem)
4013         self.assertEqual(ET.tostring(elem), b'<html>\n  <body>text</body>tail</html>')
4014
4015         elem = ET.XML("<html><body><p>par</p>\n<p>text</p>\t<p><br/></p></body></html>")
4016         ET.indent(elem)
4017         self.assertEqual(
4018             ET.tostring(elem),
4019             b'<html>\n'
4020             b'  <body>\n'
4021             b'    <p>par</p>\n'
4022             b'    <p>text</p>\n'
4023             b'    <p>\n'
4024             b'      <br/>\n'
4025             b'    </p>\n'
4026             b'  </body>\n'
4027             b'</html>'
4028         )
4029
4030         elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
4031         ET.indent(elem)
4032         self.assertEqual(
4033             ET.tostring(elem),
4034             b'<html>\n'
4035             b'  <body>\n'
4036             b'    <p>pre<br/>post</p>\n'
4037             b'    <p>text</p>\n'
4038             b'  </body>\n'
4039             b'</html>'
4040         )
4041
4042     def test_indent_space(self):
4043         ET = self.etree
4044         elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
4045         ET.indent(elem, space='\t')
4046         self.assertEqual(
4047             ET.tostring(elem),
4048             b'<html>\n'
4049             b'\t<body>\n'
4050             b'\t\t<p>pre<br/>post</p>\n'
4051             b'\t\t<p>text</p>\n'
4052             b'\t</body>\n'
4053             b'</html>'
4054         )
4055
4056         elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
4057         ET.indent(elem, space='')
4058         self.assertEqual(
4059             ET.tostring(elem),
4060             b'<html>\n'
4061             b'<body>\n'
4062             b'<p>pre<br/>post</p>\n'
4063             b'<p>text</p>\n'
4064             b'</body>\n'
4065             b'</html>'
4066         )
4067
4068     def test_indent_space_caching(self):
4069         ET = self.etree
4070         elem = ET.XML("<html><body><p>par</p><p>text</p><p><br/></p><p /></body></html>")
4071         ET.indent(elem)
4072         self.assertEqual(
4073             {el.tail for el in elem.iter()},
4074             {None, "\n", "\n  ", "\n    "}
4075         )
4076         self.assertEqual(
4077             {el.text for el in elem.iter()},
4078             {None, "\n  ", "\n    ", "\n      ", "par", "text"}
4079         )
4080         # NOTE: lxml does not reuse Python text strings across elements.
4081         #self.assertEqual(
4082         #    len({el.tail for el in elem.iter()}),
4083         #    len({id(el.tail) for el in elem.iter()}),
4084         #)
4085
4086     def test_indent_level(self):
4087         ET = self.etree
4088         elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
4089         try:
4090             ET.indent(elem, level=-1)
4091         except ValueError:
4092             pass
4093         else:
4094             self.assertTrue(False, "ValueError not raised")
4095         self.assertEqual(
4096             ET.tostring(elem),
4097             b"<html><body><p>pre<br/>post</p><p>text</p></body></html>"
4098         )
4099
4100         ET.indent(elem, level=2)
4101         self.assertEqual(
4102             ET.tostring(elem),
4103             b'<html>\n'
4104             b'      <body>\n'
4105             b'        <p>pre<br/>post</p>\n'
4106             b'        <p>text</p>\n'
4107             b'      </body>\n'
4108             b'    </html>'
4109         )
4110
4111         elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
4112         ET.indent(elem, level=1, space=' ')
4113         self.assertEqual(
4114             ET.tostring(elem),
4115             b'<html>\n'
4116             b'  <body>\n'
4117             b'   <p>pre<br/>post</p>\n'
4118             b'   <p>text</p>\n'
4119             b'  </body>\n'
4120             b' </html>'
4121         )
4122
4123     def test_parse_fileobject_unicode(self):
4124         # parse from a file object that returns unicode strings
4125         f = LargeFileLikeUnicode()
4126         tree = self.etree.parse(f)
4127         root = tree.getroot()
4128         self.assertTrue(root.tag.endswith('root'))
4129
4130     def test_dtd_io(self):
4131         # check that DTDs that go in also go back out
4132         xml = _bytes('''\
4133         <!DOCTYPE test SYSTEM "test.dtd" [
4134           <!ENTITY entity "tasty">
4135           <!ELEMENT test (a)>
4136           <!ELEMENT a (#PCDATA)>
4137         ]>
4138         <test><a>test-test</a></test>\
4139         ''')
4140         tree = self.etree.parse(BytesIO(xml))
4141         self.assertEqual(self.etree.tostring(tree).replace(_bytes(" "), _bytes("")),
4142                          xml.replace(_bytes(" "), _bytes("")))
4143
4144     def test_byte_zero(self):
4145         Element = self.etree.Element
4146
4147         a = Element('a')
4148         self.assertRaises(ValueError, setattr, a, "text", 'ha\0ho')
4149         self.assertRaises(ValueError, setattr, a, "tail", 'ha\0ho')
4150
4151         self.assertRaises(ValueError, Element, 'ha\0ho')
4152
4153     def test_unicode_byte_zero(self):
4154         Element = self.etree.Element
4155
4156         a = Element('a')
4157         self.assertRaises(ValueError, setattr, a, "text",
4158                           _str('ha\0ho'))
4159         self.assertRaises(ValueError, setattr, a, "tail",
4160                           _str('ha\0ho'))
4161
4162         self.assertRaises(ValueError, Element,
4163                           _str('ha\0ho'))
4164
4165     def test_byte_invalid(self):
4166         Element = self.etree.Element
4167
4168         a = Element('a')
4169         self.assertRaises(ValueError, setattr, a, "text", 'ha\x07ho')
4170         self.assertRaises(ValueError, setattr, a, "text", 'ha\x02ho')
4171
4172         self.assertRaises(ValueError, setattr, a, "tail", 'ha\x07ho')
4173         self.assertRaises(ValueError, setattr, a, "tail", 'ha\x02ho')
4174
4175         self.assertRaises(ValueError, Element, 'ha\x07ho')
4176         self.assertRaises(ValueError, Element, 'ha\x02ho')
4177
4178     def test_unicode_byte_invalid(self):
4179         Element = self.etree.Element
4180
4181         a = Element('a')
4182         self.assertRaises(ValueError, setattr, a, "text",
4183                           _str('ha\x07ho'))
4184         self.assertRaises(ValueError, setattr, a, "text",
4185                           _str('ha\x02ho'))
4186
4187         self.assertRaises(ValueError, setattr, a, "tail",
4188                           _str('ha\x07ho'))
4189         self.assertRaises(ValueError, setattr, a, "tail",
4190                           _str('ha\x02ho'))
4191
4192         self.assertRaises(ValueError, Element,
4193                           _str('ha\x07ho'))
4194         self.assertRaises(ValueError, Element,
4195                           _str('ha\x02ho'))
4196
4197     def test_unicode_byte_invalid_sequence(self):
4198         Element = self.etree.Element
4199
4200         a = Element('a')
4201         self.assertRaises(ValueError, setattr, a, "text",
4202                           _str('ha\u1234\x07ho'))
4203         self.assertRaises(ValueError, setattr, a, "text",
4204                           _str('ha\u1234\x02ho'))
4205
4206         self.assertRaises(ValueError, setattr, a, "tail",
4207                           _str('ha\u1234\x07ho'))
4208         self.assertRaises(ValueError, setattr, a, "tail",
4209                           _str('ha\u1234\x02ho'))
4210
4211         self.assertRaises(ValueError, Element,
4212                           _str('ha\u1234\x07ho'))
4213         self.assertRaises(ValueError, Element,
4214                           _str('ha\u1234\x02ho'))
4215
4216     def test_encoding_tostring_utf16(self):
4217         # ElementTree fails to serialize this
4218         tostring = self.etree.tostring
4219         Element = self.etree.Element
4220         SubElement = self.etree.SubElement
4221         
4222         a = Element('a')
4223         b = SubElement(a, 'b')
4224         c = SubElement(a, 'c')
4225
4226         result = tostring(a, encoding='UTF-16')
4227         self.assertEqual(_bytes('<a><b></b><c></c></a>'),
4228                           canonicalize(result))
4229
4230     def test_tostring_none(self):
4231         # ElementTree raises an AssertionError here
4232         tostring = self.etree.tostring
4233         self.assertRaises(TypeError, self.etree.tostring, None)
4234
4235     def test_tostring_pretty(self):
4236         tostring = self.etree.tostring
4237         Element = self.etree.Element
4238         SubElement = self.etree.SubElement
4239
4240         a = Element('a')
4241         b = SubElement(a, 'b')
4242         c = SubElement(a, 'c')
4243
4244         result = tostring(a)
4245         self.assertEqual(result, _bytes("<a><b/><c/></a>"))
4246
4247         result = tostring(a, pretty_print=False)
4248         self.assertEqual(result, _bytes("<a><b/><c/></a>"))
4249
4250         result = tostring(a, pretty_print=True)
4251         self.assertEqual(result, _bytes("<a>\n  <b/>\n  <c/>\n</a>\n"))
4252
4253     def test_tostring_with_tail(self):
4254         tostring = self.etree.tostring
4255         Element = self.etree.Element
4256         SubElement = self.etree.SubElement
4257
4258         a = Element('a')
4259         a.tail = "aTAIL"
4260         b = SubElement(a, 'b')
4261         b.tail = "bTAIL"
4262         c = SubElement(a, 'c')
4263
4264         result = tostring(a)
4265         self.assertEqual(result, _bytes("<a><b/>bTAIL<c/></a>aTAIL"))
4266
4267         result = tostring(a, with_tail=False)
4268         self.assertEqual(result, _bytes("<a><b/>bTAIL<c/></a>"))
4269
4270         result = tostring(a, with_tail=True)
4271         self.assertEqual(result, _bytes("<a><b/>bTAIL<c/></a>aTAIL"))
4272
4273     def test_tostring_method_html_with_tail(self):
4274         tostring = self.etree.tostring
4275         html = self.etree.fromstring(
4276             '<html><body>'
4277             '<div><p>Some text<i>\r\n</i></p></div>\r\n'
4278             '</body></html>',
4279             parser=self.etree.HTMLParser())
4280         self.assertEqual(html.tag, 'html')
4281         div = html.find('.//div')
4282         self.assertEqual(div.tail, '\r\n')
4283         result = tostring(div, method='html')
4284         self.assertEqual(
4285             result,
4286             _bytes("<div><p>Some text<i>\r\n</i></p></div>\r\n"))
4287         result = tostring(div, method='html', with_tail=True)
4288         self.assertEqual(
4289             result,
4290             _bytes("<div><p>Some text<i>\r\n</i></p></div>\r\n"))
4291         result = tostring(div, method='html', with_tail=False)
4292         self.assertEqual(
4293             result,
4294             _bytes("<div><p>Some text<i>\r\n</i></p></div>"))
4295
4296     def test_standalone(self):
4297         tostring = self.etree.tostring
4298         XML = self.etree.XML
4299         ElementTree = self.etree.ElementTree
4300         Element = self.etree.Element
4301
4302         tree = Element("root").getroottree()
4303         self.assertEqual(None, tree.docinfo.standalone)
4304
4305         tree = XML(_bytes("<root/>")).getroottree()
4306         self.assertEqual(None, tree.docinfo.standalone)
4307
4308         tree = XML(_bytes(
4309             "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"
4310             )).getroottree()
4311         self.assertEqual(True, tree.docinfo.standalone)
4312
4313         tree = XML(_bytes(
4314             "<?xml version='1.0' encoding='ASCII' standalone='no'?>\n<root/>"
4315             )).getroottree()
4316         self.assertEqual(False, tree.docinfo.standalone)
4317
4318     def test_tostring_standalone(self):
4319         tostring = self.etree.tostring
4320         XML = self.etree.XML
4321         ElementTree = self.etree.ElementTree
4322
4323         root = XML(_bytes("<root/>"))
4324
4325         tree = ElementTree(root)
4326         self.assertEqual(None, tree.docinfo.standalone)
4327
4328         result = tostring(root, xml_declaration=True, encoding="ASCII")
4329         self.assertEqual(result, _bytes(
4330             "<?xml version='1.0' encoding='ASCII'?>\n<root/>"))
4331
4332         result = tostring(root, xml_declaration=True, encoding="ASCII",
4333                           standalone=True)
4334         self.assertEqual(result, _bytes(
4335             "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"))
4336
4337         tree = ElementTree(XML(result))
4338         self.assertEqual(True, tree.docinfo.standalone)
4339
4340         result = tostring(root, xml_declaration=True, encoding="ASCII",
4341                           standalone=False)
4342         self.assertEqual(result, _bytes(
4343             "<?xml version='1.0' encoding='ASCII' standalone='no'?>\n<root/>"))
4344
4345         tree = ElementTree(XML(result))
4346         self.assertEqual(False, tree.docinfo.standalone)
4347
4348     def test_tostring_standalone_in_out(self):
4349         tostring = self.etree.tostring
4350         XML = self.etree.XML
4351         ElementTree = self.etree.ElementTree
4352
4353         root = XML(_bytes(
4354             "<?xml version='1.0' encoding='UTF-8' standalone='yes'?>\n<root/>"))
4355
4356         tree = ElementTree(root)
4357         self.assertEqual(True, tree.docinfo.standalone)
4358
4359         result = tostring(root, xml_declaration=True, encoding="ASCII")
4360         self.assertEqual(result, _bytes(
4361             "<?xml version='1.0' encoding='ASCII'?>\n<root/>"))
4362
4363         result = tostring(root, xml_declaration=True, encoding="ASCII",
4364                           standalone=True)
4365         self.assertEqual(result, _bytes(
4366             "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"))
4367
4368     def test_tostring_method_text_encoding(self):
4369         tostring = self.etree.tostring
4370         Element = self.etree.Element
4371         SubElement = self.etree.SubElement
4372         
4373         a = Element('a')
4374         a.text = "A"
4375         a.tail = "tail"
4376         b = SubElement(a, 'b')
4377         b.text = "B"
4378         b.tail = _str("Søk pÃ¥ nettet")
4379         c = SubElement(a, 'c')
4380         c.text = "C"
4381
4382         result = tostring(a, method="text", encoding="UTF-16")
4383
4384         self.assertEqual(_str('ABSøk pÃ¥ nettetCtail').encode("UTF-16"),
4385                           result)
4386
4387     def test_tostring_method_text_unicode(self):
4388         tostring = self.etree.tostring
4389         Element = self.etree.Element
4390         SubElement = self.etree.SubElement
4391         
4392         a = Element('a')
4393         a.text = _str('Søk pÃ¥ nettetA')
4394         a.tail = "tail"
4395         b = SubElement(a, 'b')
4396         b.text = "B"
4397         b.tail = _str('Søk pÃ¥ nettetB')
4398         c = SubElement(a, 'c')
4399         c.text = "C"
4400         
4401         self.assertRaises(UnicodeEncodeError,
4402                           tostring, a, method="text")
4403         
4404         self.assertEqual(
4405             _str('Søk pÃ¥ nettetABSøk pÃ¥ nettetBCtail').encode('utf-8'),
4406             tostring(a, encoding="UTF-8", method="text"))
4407
4408     def test_tounicode(self):
4409         tounicode = self.etree.tounicode
4410         Element = self.etree.Element
4411         SubElement = self.etree.SubElement
4412         
4413         a = Element('a')
4414         b = SubElement(a, 'b')
4415         c = SubElement(a, 'c')
4416         
4417         self.assertTrue(isinstance(tounicode(a), _unicode))
4418         self.assertEqual(_bytes('<a><b></b><c></c></a>'),
4419                           canonicalize(tounicode(a)))
4420
4421     def test_tounicode_element(self):
4422         tounicode = self.etree.tounicode
4423         Element = self.etree.Element
4424         SubElement = self.etree.SubElement
4425         
4426         a = Element('a')
4427         b = SubElement(a, 'b')
4428         c = SubElement(a, 'c')
4429         d = SubElement(c, 'd')
4430         self.assertTrue(isinstance(tounicode(b), _unicode))
4431         self.assertTrue(isinstance(tounicode(c), _unicode))
4432         self.assertEqual(_bytes('<b></b>'),
4433                           canonicalize(tounicode(b)))
4434         self.assertEqual(_bytes('<c><d></d></c>'),
4435                           canonicalize(tounicode(c)))
4436
4437     def test_tounicode_none(self):
4438         tounicode = self.etree.tounicode
4439         self.assertRaises(TypeError, self.etree.tounicode, None)
4440
4441     def test_tounicode_element_tail(self):
4442         tounicode = self.etree.tounicode
4443         Element = self.etree.Element
4444         SubElement = self.etree.SubElement
4445         
4446         a = Element('a')
4447         b = SubElement(a, 'b')
4448         c = SubElement(a, 'c')
4449         d = SubElement(c, 'd')
4450         b.tail = 'Foo'
4451
4452         self.assertTrue(isinstance(tounicode(b), _unicode))
4453         self.assertTrue(tounicode(b) == '<b/>Foo' or
4454                      tounicode(b) == '<b />Foo')
4455
4456     def test_tounicode_pretty(self):
4457         tounicode = self.etree.tounicode
4458         Element = self.etree.Element
4459         SubElement = self.etree.SubElement
4460
4461         a = Element('a')
4462         b = SubElement(a, 'b')
4463         c = SubElement(a, 'c')
4464
4465         result = tounicode(a)
4466         self.assertEqual(result, "<a><b/><c/></a>")
4467
4468         result = tounicode(a, pretty_print=False)
4469         self.assertEqual(result, "<a><b/><c/></a>")
4470
4471         result = tounicode(a, pretty_print=True)
4472         self.assertEqual(result, "<a>\n  <b/>\n  <c/>\n</a>\n")
4473
4474     def test_tostring_unicode(self):
4475         tostring = self.etree.tostring
4476         Element = self.etree.Element
4477         SubElement = self.etree.SubElement
4478         
4479         a = Element('a')
4480         b = SubElement(a, 'b')
4481         c = SubElement(a, 'c')
4482         
4483         self.assertTrue(isinstance(tostring(a, encoding=_unicode), _unicode))
4484         self.assertEqual(_bytes('<a><b></b><c></c></a>'),
4485                           canonicalize(tostring(a, encoding=_unicode)))
4486
4487     def test_tostring_unicode_element(self):
4488         tostring = self.etree.tostring
4489         Element = self.etree.Element
4490         SubElement = self.etree.SubElement
4491         
4492         a = Element('a')
4493         b = SubElement(a, 'b')
4494         c = SubElement(a, 'c')
4495         d = SubElement(c, 'd')
4496         self.assertTrue(isinstance(tostring(b, encoding=_unicode), _unicode))
4497         self.assertTrue(isinstance(tostring(c, encoding=_unicode), _unicode))
4498         self.assertEqual(_bytes('<b></b>'),
4499                           canonicalize(tostring(b, encoding=_unicode)))
4500         self.assertEqual(_bytes('<c><d></d></c>'),
4501                           canonicalize(tostring(c, encoding=_unicode)))
4502
4503     def test_tostring_unicode_none(self):
4504         tostring = self.etree.tostring
4505         self.assertRaises(TypeError, self.etree.tostring,
4506                           None, encoding=_unicode)
4507
4508     def test_tostring_unicode_element_tail(self):
4509         tostring = self.etree.tostring
4510         Element = self.etree.Element
4511         SubElement = self.etree.SubElement
4512         
4513         a = Element('a')
4514         b = SubElement(a, 'b')
4515         c = SubElement(a, 'c')
4516         d = SubElement(c, 'd')
4517         b.tail = 'Foo'
4518
4519         self.assertTrue(isinstance(tostring(b, encoding=_unicode), _unicode))
4520         self.assertTrue(tostring(b, encoding=_unicode) == '<b/>Foo' or
4521                      tostring(b, encoding=_unicode) == '<b />Foo')
4522
4523     def test_tostring_unicode_pretty(self):
4524         tostring = self.etree.tostring
4525         Element = self.etree.Element
4526         SubElement = self.etree.SubElement
4527
4528         a = Element('a')
4529         b = SubElement(a, 'b')
4530         c = SubElement(a, 'c')
4531
4532         result = tostring(a, encoding=_unicode)
4533         self.assertEqual(result, "<a><b/><c/></a>")
4534
4535         result = tostring(a, encoding=_unicode, pretty_print=False)
4536         self.assertEqual(result, "<a><b/><c/></a>")
4537
4538         result = tostring(a, encoding=_unicode, pretty_print=True)
4539         self.assertEqual(result, "<a>\n  <b/>\n  <c/>\n</a>\n")
4540
4541     def test_pypy_proxy_collect(self):
4542         root = etree.Element('parent')
4543         etree.SubElement(root, 'child')
4544
4545         self.assertEqual(len(root), 1)
4546         self.assertEqual(root[0].tag, 'child')
4547
4548         # in PyPy, GC used to kill the Python proxy instance without cleanup
4549         gc.collect()
4550         self.assertEqual(len(root), 1)
4551         self.assertEqual(root[0].tag, 'child')
4552
4553     def test_element_refcycle(self):
4554         class SubEl(etree.ElementBase):
4555             pass
4556
4557         el1 = SubEl()
4558         el2 = SubEl()
4559         self.assertEqual('SubEl', el1.tag)
4560         self.assertEqual('SubEl', el2.tag)
4561         el1.other = el2
4562         el2.other = el1
4563
4564         del el1, el2
4565         gc.collect()
4566         # not really testing anything here, but it shouldn't crash
4567
4568     def test_proxy_collect_siblings(self):
4569         root = etree.Element('parent')
4570         c1 = etree.SubElement(root, 'child1')
4571         c2 = etree.SubElement(root, 'child2')
4572
4573         root.remove(c1)
4574         root.remove(c2)
4575         c1.addnext(c2)
4576         del c1
4577         # trigger deallocation attempt of c1
4578         c2.getprevious()
4579         # make sure it wasn't deallocated
4580         self.assertEqual('child1', c2.getprevious().tag)
4581
4582     def test_proxy_collect_siblings_text(self):
4583         root = etree.Element('parent')
4584         c1 = etree.SubElement(root, 'child1')
4585         c2 = etree.SubElement(root, 'child2')
4586
4587         root.remove(c1)
4588         root.remove(c2)
4589         c1.addnext(c2)
4590         c1.tail = 'abc'
4591         c2.tail = 'xyz'
4592         del c1
4593         # trigger deallocation attempt of c1
4594         c2.getprevious()
4595         # make sure it wasn't deallocated
4596         self.assertEqual('child1', c2.getprevious().tag)
4597         self.assertEqual('abc', c2.getprevious().tail)
4598
4599     # helper methods
4600
4601     def _writeElement(self, element, encoding='us-ascii', compression=0):
4602         """Write out element for comparison.
4603         """
4604         ElementTree = self.etree.ElementTree
4605         f = BytesIO()
4606         tree = ElementTree(element=element)
4607         tree.write(f, encoding=encoding, compression=compression)
4608         data = f.getvalue()
4609         if compression:
4610             data = zlib.decompress(data)
4611         return canonicalize(data)
4612
4613
4614 class _XIncludeTestCase(HelperTestCase):
4615     def test_xinclude_text(self):
4616         filename = fileInTestDir('test_broken.xml')
4617         root = etree.XML(_bytes('''\
4618         <doc xmlns:xi="http://www.w3.org/2001/XInclude">
4619           <xi:include href="%s" parse="text"/>
4620         </doc>
4621         ''' % path2url(filename)))
4622         old_text = root.text
4623         content = read_file(filename)
4624         old_tail = root[0].tail
4625
4626         self.include( etree.ElementTree(root) )
4627         self.assertEqual(old_text + content + old_tail,
4628                           root.text)
4629
4630     def test_xinclude(self):
4631         tree = etree.parse(fileInTestDir('include/test_xinclude.xml'))
4632         self.assertNotEqual(
4633             'a',
4634             tree.getroot()[1].tag)
4635         # process xincludes
4636         self.include( tree )
4637         # check whether we find it replaced with included data
4638         self.assertEqual(
4639             'a',
4640             tree.getroot()[1].tag)
4641
4642     def test_xinclude_resolver(self):
4643         class res(etree.Resolver):
4644             include_text = read_file(fileInTestDir('test.xml'))
4645             called = {}
4646             def resolve(self, url, id, context):
4647                 if url.endswith(".dtd"):
4648                     self.called["dtd"] = True
4649                     return self.resolve_filename(
4650                         fileInTestDir('test.dtd'), context)
4651                 elif url.endswith("test_xinclude.xml"):
4652                     self.called["input"] = True
4653                     return None # delegate to default resolver
4654                 else:
4655                     self.called["include"] = True
4656                     return self.resolve_string(self.include_text, context)
4657
4658         res_instance = res()
4659         parser = etree.XMLParser(load_dtd = True)
4660         parser.resolvers.add(res_instance)
4661
4662         tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
4663                            parser = parser)
4664
4665         self.include(tree)
4666
4667         called = list(res_instance.called.items())
4668         called.sort()
4669         self.assertEqual(
4670             [("dtd", True), ("include", True), ("input", True)],
4671             called)
4672
4673     def test_xinclude_resolver_recursive(self):
4674         data = textwrap.dedent('''
4675         <doc xmlns:xi="http://www.w3.org/2001/XInclude">
4676         <foo/>
4677         <xi:include href="./test.xml" />
4678         </doc>
4679         ''')
4680
4681         class Resolver(etree.Resolver):
4682             called = {}
4683
4684             def resolve(self, url, id, context):
4685                 if url.endswith("test_xinclude.xml"):
4686                     assert not self.called.get("input")
4687                     self.called["input"] = True
4688                     return None  # delegate to default resolver
4689                 elif url.endswith('/test5.xml'):
4690                     assert not self.called.get("DONE")
4691                     self.called["DONE"] = True
4692                     return self.resolve_string('<DONE/>', context)
4693                 else:
4694                     _, filename = url.rsplit('/', 1)
4695                     assert not self.called.get(filename)
4696                     self.called[filename] = True
4697                     next_data = data.replace(
4698                         'test.xml', 'test%d.xml' % len(self.called))
4699                     return self.resolve_string(next_data, context)
4700
4701         res_instance = Resolver()
4702         parser = etree.XMLParser(load_dtd=True)
4703         parser.resolvers.add(res_instance)
4704
4705         tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
4706                            parser=parser)
4707
4708         self.include(tree)
4709
4710         called = list(res_instance.called.items())
4711         called.sort()
4712         self.assertEqual(
4713             [("DONE", True), ("input", True), ("test.xml", True),
4714              ("test2.xml", True), ("test3.xml", True), ("test4.xml", True)],
4715             called)
4716
4717
4718 class ETreeXIncludeTestCase(_XIncludeTestCase):
4719     def include(self, tree):
4720         tree.xinclude()
4721
4722
4723 class ElementIncludeTestCase(_XIncludeTestCase):
4724     from lxml import ElementInclude
4725
4726     def include(self, tree, loader=None, max_depth=None):
4727         self.ElementInclude.include(tree.getroot(), loader=loader, max_depth=max_depth)
4728
4729     XINCLUDE = {}
4730
4731     XINCLUDE["Recursive1.xml"] = """\
4732     <?xml version='1.0'?>
4733     <document xmlns:xi="http://www.w3.org/2001/XInclude">
4734       <p>The following is the source code of Recursive2.xml:</p>
4735       <xi:include href="Recursive2.xml"/>
4736     </document>
4737     """
4738
4739     XINCLUDE["Recursive2.xml"] = """\
4740     <?xml version='1.0'?>
4741     <document xmlns:xi="http://www.w3.org/2001/XInclude">
4742       <p>The following is the source code of Recursive3.xml:</p>
4743       <xi:include href="Recursive3.xml"/>
4744     </document>
4745     """
4746
4747     XINCLUDE["Recursive3.xml"] = """\
4748     <?xml version='1.0'?>
4749     <document xmlns:xi="http://www.w3.org/2001/XInclude">
4750       <p>The following is the source code of Recursive1.xml:</p>
4751       <xi:include href="Recursive1.xml"/>
4752     </document>
4753     """
4754
4755     XINCLUDE["NonRecursive1.xml"] = """\
4756     <?xml version='1.0'?>
4757     <document xmlns:xi="http://www.w3.org/2001/XInclude">
4758       <p>The following is multiple times the source code of NonRecursive3.xml:</p>
4759       <xi:include href="NonRecursive3.xml"/>
4760       <xi:include href="NonRecursive3.xml"/>
4761       <p>The following is multiple times the source code of Leaf.xml:</p>
4762       <xi:include href="Leaf.xml"/>
4763       <xi:include href="Leaf.xml"/>
4764       <xi:include href="Leaf.xml"/>
4765       <p>One more time the source code of NonRecursive3.xml:</p>
4766       <xi:include href="NonRecursive3.xml"/>
4767     </document>
4768     """
4769
4770     XINCLUDE["NonRecursive2.xml"] = """\
4771     <?xml version='1.0'?>
4772     <document xmlns:xi="http://www.w3.org/2001/XInclude">
4773       <p>The following is multiple times the source code of NonRecursive3.xml:</p>
4774       <xi:include href="NonRecursive3.xml"/>
4775       <xi:include href="NonRecursive3.xml"/>
4776     </document>
4777     """
4778
4779     XINCLUDE["NonRecursive3.xml"] = """\
4780     <?xml version='1.0'?>
4781     <document xmlns:xi="http://www.w3.org/2001/XInclude">
4782       <p>The following is multiple times the source code of Leaf.xml:</p>
4783       <xi:include href="Leaf.xml"/>
4784       <xi:include href="Leaf.xml"/>
4785     </document>
4786     """
4787
4788     XINCLUDE["Leaf.xml"] = """\
4789     <?xml version='1.0'?>
4790     <document xmlns:xi="http://www.w3.org/2001/XInclude">
4791       <p>No further includes</p>
4792     </document>
4793     """
4794
4795     def xinclude_loader(self, href, parse="xml", encoding=None):
4796         try:
4797             data = textwrap.dedent(self.XINCLUDE[href])
4798         except KeyError:
4799             raise OSError("resource not found")
4800         if parse == "xml":
4801             data = etree.fromstring(data)
4802         return data
4803
4804     def test_xinclude_failures(self):
4805         # Test infinitely recursive includes.
4806         document = self.xinclude_loader("Recursive1.xml").getroottree()
4807         with self.assertRaises(self.ElementInclude.FatalIncludeError) as cm:
4808             self.include(document, self.xinclude_loader)
4809         self.assertEqual(str(cm.exception),
4810                          "recursive include of 'Recursive2.xml' detected")
4811
4812         # Test 'max_depth' limitation.
4813         document = self.xinclude_loader("Recursive1.xml").getroottree()
4814         with self.assertRaises(self.ElementInclude.FatalIncludeError) as cm:
4815             self.include(document, self.xinclude_loader, max_depth=None)
4816         self.assertEqual(str(cm.exception),
4817                          "recursive include of 'Recursive2.xml' detected")
4818
4819         document = self.xinclude_loader("Recursive1.xml").getroottree()
4820         with self.assertRaises(self.ElementInclude.LimitedRecursiveIncludeError) as cm:
4821             self.include(document, self.xinclude_loader, max_depth=0)
4822         self.assertEqual(str(cm.exception),
4823                          "maximum xinclude depth reached when including file Recursive2.xml")
4824
4825         document = self.xinclude_loader("Recursive1.xml").getroottree()
4826         with self.assertRaises(self.ElementInclude.LimitedRecursiveIncludeError) as cm:
4827             self.include(document, self.xinclude_loader, max_depth=1)
4828         self.assertEqual(str(cm.exception),
4829                          "maximum xinclude depth reached when including file Recursive3.xml")
4830
4831         document = self.xinclude_loader("Recursive1.xml").getroottree()
4832         with self.assertRaises(self.ElementInclude.LimitedRecursiveIncludeError) as cm:
4833             self.include(document, self.xinclude_loader, max_depth=2)
4834         self.assertEqual(str(cm.exception),
4835                          "maximum xinclude depth reached when including file Recursive1.xml")
4836
4837         document = self.xinclude_loader("Recursive1.xml").getroottree()
4838         with self.assertRaises(self.ElementInclude.FatalIncludeError) as cm:
4839             self.include(document, self.xinclude_loader, max_depth=3)
4840         self.assertEqual(str(cm.exception),
4841                          "recursive include of 'Recursive2.xml' detected")
4842
4843     def test_multiple_include_of_same_file(self):
4844         # Test that including the same file multiple times, but on the same level
4845         # is not detected as recursive include
4846         document = self.xinclude_loader("NonRecursive3.xml").getroottree()
4847         self.include(document, self.xinclude_loader)
4848
4849         # same but for more than one level
4850         document = self.xinclude_loader("NonRecursive1.xml").getroottree()
4851         self.include(document, self.xinclude_loader)
4852
4853         # same but no Leaf.xml in top-level file
4854         document = self.xinclude_loader("NonRecursive2.xml").getroottree()
4855         self.include(document, self.xinclude_loader)
4856
4857
4858 class ETreeC14NTestCase(HelperTestCase):
4859     def test_c14n(self):
4860         tree = self.parse(_bytes('<a><b/></a>'))
4861         f = BytesIO()
4862         tree.write_c14n(f)
4863         s = f.getvalue()
4864         self.assertEqual(_bytes('<a><b></b></a>'),
4865                           s)
4866
4867     def test_c14n_gzip(self):
4868         tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4869         f = BytesIO()
4870         tree.write_c14n(f, compression=9)
4871         with gzip.GzipFile(fileobj=BytesIO(f.getvalue())) as gzfile:
4872             s = gzfile.read()
4873         self.assertEqual(_bytes('<a>'+'<b></b>'*200+'</a>'),
4874                           s)
4875
4876     def test_c14n_file(self):
4877         tree = self.parse(_bytes('<a><b/></a>'))
4878         with tmpfile() as filename:
4879             tree.write_c14n(filename)
4880             data = read_file(filename, 'rb')
4881         self.assertEqual(_bytes('<a><b></b></a>'),
4882                           data)
4883
4884     def test_c14n_file_gzip(self):
4885         tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4886         with tmpfile() as filename:
4887             tree.write_c14n(filename, compression=9)
4888             with gzip.open(filename, 'rb') as f:
4889                 data = f.read()
4890         self.assertEqual(_bytes('<a>'+'<b></b>'*200+'</a>'),
4891                           data)
4892
4893     def test_c14n2_file_gzip(self):
4894         tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4895         with tmpfile() as filename:
4896             tree.write(filename, method='c14n2', compression=9)
4897             with gzip.open(filename, 'rb') as f:
4898                 data = f.read()
4899         self.assertEqual(_bytes('<a>'+'<b></b>'*200+'</a>'),
4900                           data)
4901
4902     def test_c14n2_with_text(self):
4903         tree = self.parse(
4904             b'<?xml version="1.0"?>    <a> abc \n <b>  btext </b> btail <c/>    ctail </a>     ')
4905         f = BytesIO()
4906         tree.write(f, method='c14n2')
4907         s = f.getvalue()
4908         self.assertEqual(b'<a> abc \n <b>  btext </b> btail <c></c>    ctail </a>',
4909                          s)
4910
4911         f = BytesIO()
4912         tree.write(f, method='c14n2', strip_text=True)
4913         s = f.getvalue()
4914         self.assertEqual(b'<a>abc<b>btext</b>btail<c></c>ctail</a>',
4915                          s)
4916
4917     def test_c14n_with_comments(self):
4918         tree = self.parse(_bytes('<!--hi--><a><!--ho--><b/></a><!--hu-->'))
4919         f = BytesIO()
4920         tree.write_c14n(f)
4921         s = f.getvalue()
4922         self.assertEqual(_bytes('<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->'),
4923                           s)
4924         f = BytesIO()
4925         tree.write_c14n(f, with_comments=True)
4926         s = f.getvalue()
4927         self.assertEqual(_bytes('<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->'),
4928                           s)
4929         f = BytesIO()
4930         tree.write_c14n(f, with_comments=False)
4931         s = f.getvalue()
4932         self.assertEqual(_bytes('<a><b></b></a>'),
4933                           s)
4934
4935     def test_c14n2_with_comments(self):
4936         tree = self.parse(b'<!--hi--> <a> <!-- ho --> <b/> </a> <!-- hu -->')
4937         self.assertEqual(
4938             b'<!--hi-->\n<a> <!-- ho --> <b></b> </a>\n<!-- hu -->',
4939             etree.tostring(tree, method='c14n2'))
4940
4941         self.assertEqual(
4942             b'<!--hi-->\n<a> <!-- ho --> <b></b> </a>\n<!-- hu -->',
4943             etree.tostring(tree, method='c14n2', with_comments=True))
4944
4945         self.assertEqual(
4946             b'<a>  <b></b> </a>',
4947             etree.tostring(tree, method='c14n2', with_comments=False))
4948
4949     def test_c14n2_with_comments_strip_text(self):
4950         tree = self.parse(b'<!--hi--> <a> <!-- ho --> <b/> </a> <!-- hu -->')
4951         self.assertEqual(
4952             b'<!--hi-->\n<a><!-- ho --><b></b></a>\n<!-- hu -->',
4953             etree.tostring(tree, method='c14n2', with_comments=True, strip_text=True))
4954         self.assertEqual(
4955             b'<a><b></b></a>',
4956             etree.tostring(tree, method='c14n2', with_comments=False, strip_text=True))
4957
4958     def test_c14n_tostring_with_comments(self):
4959         tree = self.parse(_bytes('<!--hi--><a><!--ho--><b/></a><!--hu-->'))
4960         s = etree.tostring(tree, method='c14n')
4961         self.assertEqual(_bytes('<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->'),
4962                           s)
4963         s = etree.tostring(tree, method='c14n', with_comments=True)
4964         self.assertEqual(_bytes('<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->'),
4965                           s)
4966         s = etree.tostring(tree, method='c14n', with_comments=False)
4967         self.assertEqual(_bytes('<a><b></b></a>'),
4968                           s)
4969
4970     def test_c14n2_tostring_with_comments(self):
4971         tree = self.parse(b'<!--hi--><a><!--ho--><b/></a><!--hu-->')
4972         s = etree.tostring(tree, method='c14n2')
4973         self.assertEqual(b'<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->',
4974                           s)
4975         s = etree.tostring(tree, method='c14n2', with_comments=True)
4976         self.assertEqual(b'<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->',
4977                           s)
4978         s = etree.tostring(tree, method='c14n2', with_comments=False)
4979         self.assertEqual(b'<a><b></b></a>',
4980                           s)
4981
4982     def test_c14n_element_tostring_with_comments(self):
4983         tree = self.parse(_bytes('<!--hi--><a><!--ho--><b/></a><!--hu-->'))
4984         s = etree.tostring(tree.getroot(), method='c14n')
4985         self.assertEqual(_bytes('<a><!--ho--><b></b></a>'),
4986                           s)
4987         s = etree.tostring(tree.getroot(), method='c14n', with_comments=True)
4988         self.assertEqual(_bytes('<a><!--ho--><b></b></a>'),
4989                           s)
4990         s = etree.tostring(tree.getroot(), method='c14n', with_comments=False)
4991         self.assertEqual(_bytes('<a><b></b></a>'),
4992                           s)
4993
4994     def test_c14n_exclusive(self):
4995         tree = self.parse(_bytes(
4996                 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4997         f = BytesIO()
4998         tree.write_c14n(f)
4999         s = f.getvalue()
5000         self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
5001                           s)
5002         f = BytesIO()
5003         tree.write_c14n(f, exclusive=False)
5004         s = f.getvalue()
5005         self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
5006                           s)
5007         f = BytesIO()
5008         tree.write_c14n(f, exclusive=True)
5009         s = f.getvalue()
5010         self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
5011                           s)
5012
5013         f = BytesIO()
5014         tree.write_c14n(f, exclusive=True, inclusive_ns_prefixes=['z'])
5015         s = f.getvalue()
5016         self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:z="http://cde"><z:b></z:b></a>'),
5017                           s)
5018
5019     def test_c14n_tostring_exclusive(self):
5020         tree = self.parse(_bytes(
5021                 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
5022         s = etree.tostring(tree, method='c14n')
5023         self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
5024                           s)
5025         s = etree.tostring(tree, method='c14n', exclusive=False)
5026         self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
5027                           s)
5028         s = etree.tostring(tree, method='c14n', exclusive=True)
5029         self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
5030                           s)
5031
5032         s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
5033         self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd"><z:b xmlns:z="http://cde"></z:b></a>'),
5034                           s)
5035
5036     def test_c14n_element_tostring_exclusive(self):
5037         tree = self.parse(_bytes(
5038                 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
5039         s = etree.tostring(tree.getroot(), method='c14n')
5040         self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
5041                           s)
5042         s = etree.tostring(tree.getroot(), method='c14n', exclusive=False)
5043         self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
5044                           s)
5045         s = etree.tostring(tree.getroot(), method='c14n', exclusive=True)
5046         self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
5047                           s)
5048
5049         s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=False)
5050         self.assertEqual(_bytes('<z:b xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
5051                           s)
5052         s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True)
5053         self.assertEqual(_bytes('<z:b xmlns:z="http://cde"></z:b>'),
5054                           s)
5055
5056         s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
5057         self.assertEqual(_bytes('<z:b xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
5058                           s)
5059
5060     def test_c14n_tostring_inclusive_ns_prefixes(self):
5061         """ Regression test to fix memory allocation issues (use 3+ inclusive NS spaces)"""
5062         tree = self.parse(_bytes(
5063                 '<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
5064
5065         s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['x', 'y', 'z'])
5066         self.assertEqual(_bytes('<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
5067                           s)
5068
5069
5070 class ETreeWriteTestCase(HelperTestCase):
5071     def test_write(self):
5072         tree = self.parse(_bytes('<a><b/></a>'))
5073         f = BytesIO()
5074         tree.write(f)
5075         s = f.getvalue()
5076         self.assertEqual(_bytes('<a><b/></a>'),
5077                           s)
5078
5079     def test_write_doctype(self):
5080         tree = self.parse(_bytes('<a><b/></a>'))
5081         f = BytesIO()
5082         tree.write(f, doctype='HUHU')
5083         s = f.getvalue()
5084         self.assertEqual(_bytes('HUHU\n<a><b/></a>'),
5085                           s)
5086
5087     def test_write_gzip(self):
5088         tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
5089         f = BytesIO()
5090         tree.write(f, compression=9)
5091         with gzip.GzipFile(fileobj=BytesIO(f.getvalue())) as gzfile:
5092             s = gzfile.read()
5093         self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
5094                           s)
5095
5096     def test_write_gzip_doctype(self):
5097         tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
5098         f = BytesIO()
5099         tree.write(f, compression=9, doctype='<!DOCTYPE a>')
5100         with gzip.GzipFile(fileobj=BytesIO(f.getvalue())) as gzfile:
5101             s = gzfile.read()
5102         self.assertEqual(_bytes('<!DOCTYPE a>\n<a>'+'<b/>'*200+'</a>'),
5103                           s)
5104
5105     def test_write_gzip_level(self):
5106         tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
5107         f = BytesIO()
5108         tree.write(f, compression=0)
5109         s0 = f.getvalue()
5110
5111         f = BytesIO()
5112         tree.write(f)
5113         self.assertEqual(f.getvalue(), s0)
5114
5115         f = BytesIO()
5116         tree.write(f, compression=1)
5117         s = f.getvalue()
5118         self.assertTrue(len(s) <= len(s0))
5119         with gzip.GzipFile(fileobj=BytesIO(s)) as gzfile:
5120             s1 = gzfile.read()
5121
5122         f = BytesIO()
5123         tree.write(f, compression=9)
5124         s = f.getvalue()
5125         self.assertTrue(len(s) <= len(s0))
5126         with gzip.GzipFile(fileobj=BytesIO(s)) as gzfile:
5127             s9 = gzfile.read()
5128
5129         self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
5130                           s0)
5131         self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
5132                           s1)
5133         self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
5134                           s9)
5135
5136     def test_write_file(self):
5137         tree = self.parse(_bytes('<a><b/></a>'))
5138         with tmpfile() as filename:
5139             tree.write(filename)
5140             data = read_file(filename, 'rb')
5141         self.assertEqual(_bytes('<a><b/></a>'),
5142                           data)
5143
5144     def test_write_file_gzip(self):
5145         tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
5146         with tmpfile() as filename:
5147             tree.write(filename, compression=9)
5148             with gzip.open(filename, 'rb') as f:
5149                 data = f.read()
5150         self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
5151                           data)
5152
5153     def test_write_file_gzip_parse(self):
5154         tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
5155         with tmpfile() as filename:
5156             tree.write(filename, compression=9)
5157             data = etree.tostring(etree.parse(filename))
5158         self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
5159                           data)
5160
5161     def test_write_file_gzipfile_parse(self):
5162         tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
5163         with tmpfile() as filename:
5164             tree.write(filename, compression=9)
5165             with gzip.GzipFile(filename) as f:
5166                 data = etree.tostring(etree.parse(f))
5167         self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
5168                           data)
5169
5170     def test_write_file_url(self):
5171         xml = _bytes('<a>'+'<b/>'*200+'</a>')
5172         tree = self.parse(xml)
5173         with tmpfile(prefix="p+%20", suffix=".xml") as filename:
5174             url = 'file://' + (filename if sys.platform != 'win32'
5175                                else '/' + filename.replace('\\', '/'))
5176             tree.write(url)
5177             data = read_file(filename, 'rb').replace(_bytes('\n'), _bytes(''))
5178         self.assertEqual(data, xml)
5179
5180
5181 class ETreeErrorLogTest(HelperTestCase):
5182     etree = etree
5183
5184     def test_parse_error_logging(self):
5185         parse = self.etree.parse
5186         f = BytesIO('<a><b></c></b></a>')
5187         self.etree.clear_error_log()
5188         try:
5189             parse(f)
5190             logs = None
5191         except SyntaxError:
5192             e = sys.exc_info()[1]
5193             logs = e.error_log
5194         f.close()
5195         self.assertTrue([ log for log in logs
5196                        if 'mismatch' in log.message ])
5197         self.assertTrue([ log for log in logs
5198                        if 'PARSER'   in log.domain_name])
5199         self.assertTrue([ log for log in logs
5200                        if 'ERR_TAG_NAME_MISMATCH' in log.type_name ])
5201         self.assertTrue([ log for log in logs
5202                        if 1 == log.line ])
5203         self.assertTrue([ log for log in logs
5204                        if 15 == log.column ])
5205
5206     def _test_python_error_logging(self):
5207         """This can't really be tested as long as there isn't a way to
5208         reset the logging setup ...
5209         """
5210         parse = self.etree.parse
5211
5212         messages = []
5213         class Logger(self.etree.PyErrorLog):
5214             def log(self, entry, message, *args):
5215                 messages.append(message)
5216
5217         self.etree.use_global_python_log(Logger())
5218         f = BytesIO('<a><b></c></b></a>')
5219         try:
5220             parse(f)
5221         except SyntaxError:
5222             pass
5223         f.close()
5224
5225         self.assertTrue([ message for message in messages
5226                        if 'mismatch' in message ])
5227         self.assertTrue([ message for message in messages
5228                        if ':PARSER:'   in message])
5229         self.assertTrue([ message for message in messages
5230                        if ':ERR_TAG_NAME_MISMATCH:' in message ])
5231         self.assertTrue([ message for message in messages
5232                        if ':1:15:' in message ])
5233
5234
5235 class XMLPullParserTest(unittest.TestCase):
5236     etree = etree
5237
5238     def assert_event_tags(self, events, expected):
5239         self.assertEqual([(action, elem.tag) for action, elem in events],
5240                          expected)
5241
5242     def test_pull_from_simple_target(self):
5243         class Target(object):
5244             def start(self, tag, attrib):
5245                 return 'start(%s)' % tag
5246             def end(self, tag):
5247                 return 'end(%s)' % tag
5248             def close(self):
5249                 return 'close()'
5250
5251         parser = self.etree.XMLPullParser(target=Target())
5252         events = parser.read_events()
5253
5254         parser.feed('<root><element>')
5255         self.assertFalse(list(events))
5256         self.assertFalse(list(events))
5257         parser.feed('</element><child>')
5258         self.assertEqual([('end', 'end(element)')], list(events))
5259         parser.feed('</child>')
5260         self.assertEqual([('end', 'end(child)')], list(events))
5261         parser.feed('</root>')
5262         self.assertEqual([('end', 'end(root)')], list(events))
5263         self.assertFalse(list(events))
5264         self.assertEqual('close()', parser.close())
5265
5266     def test_pull_from_simple_target_start_end(self):
5267         class Target(object):
5268             def start(self, tag, attrib):
5269                 return 'start(%s)' % tag
5270             def end(self, tag):
5271                 return 'end(%s)' % tag
5272             def close(self):
5273                 return 'close()'
5274
5275         parser = self.etree.XMLPullParser(
5276             ['start', 'end'], target=Target())
5277         events = parser.read_events()
5278
5279         parser.feed('<root><element>')
5280         self.assertEqual(
5281             [('start', 'start(root)'), ('start', 'start(element)')],
5282             list(events))
5283         self.assertFalse(list(events))
5284         parser.feed('</element><child>')
5285         self.assertEqual(
5286             [('end', 'end(element)'), ('start', 'start(child)')],
5287             list(events))
5288         parser.feed('</child>')
5289         self.assertEqual(
5290             [('end', 'end(child)')],
5291             list(events))
5292         parser.feed('</root>')
5293         self.assertEqual(
5294             [('end', 'end(root)')],
5295             list(events))
5296         self.assertFalse(list(events))
5297         self.assertEqual('close()', parser.close())
5298
5299     def test_pull_from_tree_builder(self):
5300         parser = self.etree.XMLPullParser(
5301             ['start', 'end'], target=etree.TreeBuilder())
5302         events = parser.read_events()
5303
5304         parser.feed('<root><element>')
5305         self.assert_event_tags(
5306             events, [('start', 'root'), ('start', 'element')])
5307         self.assertFalse(list(events))
5308         parser.feed('</element><child>')
5309         self.assert_event_tags(
5310             events, [('end', 'element'), ('start', 'child')])
5311         parser.feed('</child>')
5312         self.assert_event_tags(
5313             events, [('end', 'child')])
5314         parser.feed('</root>')
5315         self.assert_event_tags(
5316             events, [('end', 'root')])
5317         self.assertFalse(list(events))
5318         root = parser.close()
5319         self.assertEqual('root', root.tag)
5320
5321     def test_pull_from_tree_builder_subclass(self):
5322         class Target(etree.TreeBuilder):
5323             def end(self, tag):
5324                 el = super(Target, self).end(tag)
5325                 el.tag += '-huhu'
5326                 return el
5327
5328         parser = self.etree.XMLPullParser(
5329             ['start', 'end'], target=Target())
5330         events = parser.read_events()
5331
5332         parser.feed('<root><element>')
5333         self.assert_event_tags(
5334             events, [('start', 'root'), ('start', 'element')])
5335         self.assertFalse(list(events))
5336         parser.feed('</element><child>')
5337         self.assert_event_tags(
5338             events, [('end', 'element-huhu'), ('start', 'child')])
5339         parser.feed('</child>')
5340         self.assert_event_tags(
5341             events, [('end', 'child-huhu')])
5342         parser.feed('</root>')
5343         self.assert_event_tags(
5344             events, [('end', 'root-huhu')])
5345         self.assertFalse(list(events))
5346         root = parser.close()
5347         self.assertEqual('root-huhu', root.tag)
5348
5349
5350 def test_suite():
5351     suite = unittest.TestSuite()
5352     suite.addTests([unittest.makeSuite(ETreeOnlyTestCase)])
5353     suite.addTests([unittest.makeSuite(ETreeXIncludeTestCase)])
5354     suite.addTests([unittest.makeSuite(ElementIncludeTestCase)])
5355     suite.addTests([unittest.makeSuite(ETreeC14NTestCase)])
5356     suite.addTests([unittest.makeSuite(ETreeWriteTestCase)])
5357     suite.addTests([unittest.makeSuite(ETreeErrorLogTest)])
5358     suite.addTests([unittest.makeSuite(XMLPullParserTest)])
5359
5360     # add original doctests from ElementTree selftest modules
5361     from . import selftest, selftest2
5362     suite.addTests(doctest.DocTestSuite(selftest))
5363     suite.addTests(doctest.DocTestSuite(selftest2))
5364
5365     # add doctests
5366     suite.addTests(doctest.DocTestSuite(etree))
5367     suite.addTests(
5368         [make_doctest('../../../doc/tutorial.txt')])
5369     suite.addTests(
5370         [make_doctest('../../../doc/api.txt')])
5371     suite.addTests(
5372         [make_doctest('../../../doc/FAQ.txt')])
5373     suite.addTests(
5374         [make_doctest('../../../doc/parsing.txt')])
5375     suite.addTests(
5376         [make_doctest('../../../doc/resolvers.txt')])
5377     return suite
5378
5379
5380 if __name__ == '__main__':
5381     print('to test use test.py %s' % __file__)