Imported Upstream version 2.3.5
[platform/upstream/python-lxml.git] / src / lxml / tests / test_etree.py
1 # -*- coding: utf-8 -*-
2
3 """
4 Tests specific to the extended etree API
5
6 Tests that apply to the general ElementTree API should go into
7 test_elementtree
8 """
9
10 import os.path
11 import unittest
12 import copy
13 import sys
14 import re
15 import operator
16 import tempfile
17 import gzip
18
19 this_dir = os.path.dirname(__file__)
20 if this_dir not in sys.path:
21     sys.path.insert(0, this_dir) # needed for Py3
22
23 from common_imports import etree, StringIO, BytesIO, HelperTestCase, fileInTestDir, read_file
24 from common_imports import SillyFileLike, LargeFileLikeUnicode, doctest, make_doctest
25 from common_imports import canonicalize, sorted, _str, _bytes
26
27 print("")
28 print("TESTED VERSION: %s" % etree.__version__)
29 print("    Python:           " + repr(sys.version_info))
30 print("    lxml.etree:       " + repr(etree.LXML_VERSION))
31 print("    libxml used:      " + repr(etree.LIBXML_VERSION))
32 print("    libxml compiled:  " + repr(etree.LIBXML_COMPILED_VERSION))
33 print("    libxslt used:     " + repr(etree.LIBXSLT_VERSION))
34 print("    libxslt compiled: " + repr(etree.LIBXSLT_COMPILED_VERSION))
35 print("")
36
37 try:
38     _unicode = unicode
39 except NameError:
40     # Python 3
41     _unicode = str
42
43 class ETreeOnlyTestCase(HelperTestCase):
44     """Tests only for etree, not ElementTree"""
45     etree = etree
46
47     def test_version(self):
48         self.assert_(isinstance(etree.__version__, _unicode))
49         self.assert_(isinstance(etree.LXML_VERSION, tuple))
50         self.assertEqual(len(etree.LXML_VERSION), 4)
51         self.assert_(isinstance(etree.LXML_VERSION[0], int))
52         self.assert_(isinstance(etree.LXML_VERSION[1], int))
53         self.assert_(isinstance(etree.LXML_VERSION[2], int))
54         self.assert_(isinstance(etree.LXML_VERSION[3], int))
55         self.assert_(etree.__version__.startswith(
56             str(etree.LXML_VERSION[0])))
57
58     def test_c_api(self):
59         if hasattr(self.etree, '__pyx_capi__'):
60             # newer Pyrex compatible C-API
61             self.assert_(isinstance(self.etree.__pyx_capi__, dict))
62             self.assert_(len(self.etree.__pyx_capi__) > 0)
63         else:
64             # older C-API mechanism
65             self.assert_(hasattr(self.etree, '_import_c_api'))
66
67     def test_element_names(self):
68         Element = self.etree.Element
69         el = Element('name')
70         self.assertEquals(el.tag, 'name')
71         el = Element('{}name')
72         self.assertEquals(el.tag, 'name')
73
74     def test_element_name_empty(self):
75         Element = self.etree.Element
76         el = Element('name')
77         self.assertRaises(ValueError, Element, '{}')
78         self.assertRaises(ValueError, setattr, el, 'tag', '{}')
79
80         self.assertRaises(ValueError, Element, '{test}')
81         self.assertRaises(ValueError, setattr, el, 'tag', '{test}')
82
83     def test_element_name_colon(self):
84         Element = self.etree.Element
85         self.assertRaises(ValueError, Element, 'p:name')
86         self.assertRaises(ValueError, Element, '{test}p:name')
87
88         el = Element('name')
89         self.assertRaises(ValueError, setattr, el, 'tag', 'p:name')
90
91     def test_element_name_quote(self):
92         Element = self.etree.Element
93         self.assertRaises(ValueError, Element, "p'name")
94         self.assertRaises(ValueError, Element, 'p"name')
95
96         self.assertRaises(ValueError, Element, "{test}p'name")
97         self.assertRaises(ValueError, Element, '{test}p"name')
98
99         el = Element('name')
100         self.assertRaises(ValueError, setattr, el, 'tag', "p'name")
101         self.assertRaises(ValueError, setattr, el, 'tag', 'p"name')
102
103     def test_element_name_space(self):
104         Element = self.etree.Element
105         self.assertRaises(ValueError, Element, ' name ')
106         self.assertRaises(ValueError, Element, 'na me')
107         self.assertRaises(ValueError, Element, '{test} name')
108
109         el = Element('name')
110         self.assertRaises(ValueError, setattr, el, 'tag', ' name ')
111
112     def test_subelement_name_empty(self):
113         Element = self.etree.Element
114         SubElement = self.etree.SubElement
115
116         el = Element('name')
117         self.assertRaises(ValueError, SubElement, el, '{}')
118         self.assertRaises(ValueError, SubElement, el, '{test}')
119
120     def test_subelement_name_colon(self):
121         Element = self.etree.Element
122         SubElement = self.etree.SubElement
123
124         el = Element('name')
125         self.assertRaises(ValueError, SubElement, el, 'p:name')
126         self.assertRaises(ValueError, SubElement, el, '{test}p:name')
127
128     def test_subelement_name_quote(self):
129         Element = self.etree.Element
130         SubElement = self.etree.SubElement
131
132         el = Element('name')
133         self.assertRaises(ValueError, SubElement, el, "p'name")
134         self.assertRaises(ValueError, SubElement, el, "{test}p'name")
135
136         self.assertRaises(ValueError, SubElement, el, 'p"name')
137         self.assertRaises(ValueError, SubElement, el, '{test}p"name')
138
139     def test_subelement_name_space(self):
140         Element = self.etree.Element
141         SubElement = self.etree.SubElement
142
143         el = Element('name')
144         self.assertRaises(ValueError, SubElement, el, ' name ')
145         self.assertRaises(ValueError, SubElement, el, 'na me')
146         self.assertRaises(ValueError, SubElement, el, '{test} name')
147
148     def test_subelement_attribute_invalid(self):
149         Element = self.etree.Element
150         SubElement = self.etree.SubElement
151
152         el = Element('name')
153         self.assertRaises(ValueError, SubElement, el, 'name', {'a b c' : 'abc'})
154         self.assertRaises(ValueError, SubElement, el, 'name', {'a' : 'a\0\n'})
155         self.assertEquals(0, len(el))
156
157     def test_qname_empty(self):
158         QName = self.etree.QName
159         self.assertRaises(ValueError, QName, '')
160         self.assertRaises(ValueError, QName, 'test', '')
161
162     def test_qname_colon(self):
163         QName = self.etree.QName
164         self.assertRaises(ValueError, QName, 'p:name')
165         self.assertRaises(ValueError, QName, 'test', 'p:name')
166
167     def test_qname_space(self):
168         QName = self.etree.QName
169         self.assertRaises(ValueError, QName, ' name ')
170         self.assertRaises(ValueError, QName, 'na me')
171         self.assertRaises(ValueError, QName, 'test', ' name')
172
173     def test_qname_namespace_localname(self):
174         # ET doesn't have namespace/localname properties on QNames
175         QName = self.etree.QName
176         namespace, localname = 'http://myns', 'a'
177         qname = QName(namespace, localname)
178         self.assertEquals(namespace, qname.namespace)
179         self.assertEquals(localname, qname.localname)
180
181     def test_qname_element(self):
182         # ET doesn't have namespace/localname properties on QNames
183         QName = self.etree.QName
184         qname1 = QName('http://myns', 'a')
185         a = self.etree.Element(qname1, nsmap={'p' : 'http://myns'})
186
187         qname2 = QName(a)
188         self.assertEquals(a.tag, qname1.text)
189         self.assertEquals(qname1.text, qname2.text)
190         self.assertEquals(qname1, qname2)
191
192     def test_qname_text_resolve(self):
193         # ET doesn't resove QNames as text values
194         etree = self.etree
195         qname = etree.QName('http://myns', 'a')
196         a = etree.Element(qname, nsmap={'p' : 'http://myns'})
197         a.text = qname
198
199         self.assertEquals("p:a", a.text)
200
201     def test_nsmap_prefix_invalid(self):
202         etree = self.etree
203         self.assertRaises(ValueError,
204                           etree.Element, "root", nsmap={'"' : 'testns'})
205         self.assertRaises(ValueError,
206                           etree.Element, "root", nsmap={'&' : 'testns'})
207         self.assertRaises(ValueError,
208                           etree.Element, "root", nsmap={'a:b' : 'testns'})
209
210     def test_attribute_has_key(self):
211         # ET in Py 3.x has no "attrib.has_key()" method
212         XML = self.etree.XML
213
214         root = XML(_bytes('<foo bar="Bar" xmlns:ns="http://ns.codespeak.net/test" ns:baz="Baz" />'))
215         self.assertEquals(
216             True, root.attrib.has_key('bar'))
217         self.assertEquals(
218             False, root.attrib.has_key('baz'))
219         self.assertEquals(
220             False, root.attrib.has_key('hah'))
221         self.assertEquals(
222             True,
223             root.attrib.has_key('{http://ns.codespeak.net/test}baz'))
224
225     def test_attribute_set(self):
226         Element = self.etree.Element
227         root = Element("root")
228         root.set("attr", "TEST")
229         self.assertEquals("TEST", root.get("attr"))
230
231     def test_attribute_set_invalid(self):
232         # ElementTree accepts arbitrary attribute values
233         # lxml.etree allows only strings
234         Element = self.etree.Element
235         root = Element("root")
236         self.assertRaises(TypeError, root.set, "newattr", 5)
237         self.assertRaises(TypeError, root.set, "newattr", None)
238
239     def test_strip_attributes(self):
240         XML = self.etree.XML
241         xml = _bytes('<test a="5" b="10" c="20"><x a="4" b="2"/></test>')
242
243         root = XML(xml)
244         self.etree.strip_attributes(root, 'a')
245         self.assertEquals(_bytes('<test b="10" c="20"><x b="2"></x></test>'),
246                           self._writeElement(root))
247
248         root = XML(xml)
249         self.etree.strip_attributes(root, 'b', 'c')
250         self.assertEquals(_bytes('<test a="5"><x a="4"></x></test>'),
251                           self._writeElement(root))
252
253     def test_strip_attributes_ns(self):
254         XML = self.etree.XML
255         xml = _bytes('<test xmlns:n="http://test/ns" a="6" b="10" c="20" n:a="5"><x a="4" n:b="2"/></test>')
256
257         root = XML(xml)
258         self.etree.strip_attributes(root, 'a')
259         self.assertEquals(
260             _bytes('<test xmlns:n="http://test/ns" b="10" c="20" n:a="5"><x n:b="2"></x></test>'),
261             self._writeElement(root))
262
263         root = XML(xml)
264         self.etree.strip_attributes(root, '{http://test/ns}a', 'c')
265         self.assertEquals(
266             _bytes('<test xmlns:n="http://test/ns" a="6" b="10"><x a="4" n:b="2"></x></test>'),
267             self._writeElement(root))
268
269         root = XML(xml)
270         self.etree.strip_attributes(root, '{http://test/ns}*')
271         self.assertEquals(
272             _bytes('<test xmlns:n="http://test/ns" a="6" b="10" c="20"><x a="4"></x></test>'),
273             self._writeElement(root))
274
275     def test_strip_elements(self):
276         XML = self.etree.XML
277         xml = _bytes('<test><a><b><c/></b></a><x><a><b/><c/></a></x></test>')
278
279         root = XML(xml)
280         self.etree.strip_elements(root, 'a')
281         self.assertEquals(_bytes('<test><x></x></test>'),
282                           self._writeElement(root))
283
284         root = XML(xml)
285         self.etree.strip_elements(root, 'b', 'c', 'X', 'Y', 'Z')
286         self.assertEquals(_bytes('<test><a></a><x><a></a></x></test>'),
287                           self._writeElement(root))
288
289         root = XML(xml)
290         self.etree.strip_elements(root, 'c')
291         self.assertEquals(_bytes('<test><a><b></b></a><x><a><b></b></a></x></test>'),
292                           self._writeElement(root))
293
294     def test_strip_elements_ns(self):
295         XML = self.etree.XML
296         xml = _bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"/>C</b>BT</n:a>AT<x>X<a>A<b xmlns="urn:a"/>BT<c xmlns="urn:x"/>CT</a>AT</x>XT</test>')
297
298         root = XML(xml)
299         self.etree.strip_elements(root, 'a')
300         self.assertEquals(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X</x>XT</test>'),
301                           self._writeElement(root))
302
303         root = XML(xml)
304         self.etree.strip_elements(root, '{urn:a}b', 'c')
305         self.assertEquals(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
306                           self._writeElement(root))
307
308         root = XML(xml)
309         self.etree.strip_elements(root, '{urn:a}*', 'c')
310         self.assertEquals(_bytes('<test>TEST<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
311                           self._writeElement(root))
312
313         root = XML(xml)
314         self.etree.strip_elements(root, '{urn:a}*', 'c', with_tail=False)
315         self.assertEquals(_bytes('<test>TESTAT<x>X<a>ABT<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
316                           self._writeElement(root))
317
318     def test_strip_tags(self):
319         XML = self.etree.XML
320         xml = _bytes('<test>TEST<a>A<b>B<c/>CT</b>BT</a>AT<x>X<a>A<b/>BT<c/>CT</a>AT</x>XT</test>')
321
322         root = XML(xml)
323         self.etree.strip_tags(root, 'a')
324         self.assertEquals(_bytes('<test>TESTA<b>B<c></c>CT</b>BTAT<x>XA<b></b>BT<c></c>CTAT</x>XT</test>'),
325                           self._writeElement(root))
326
327         root = XML(xml)
328         self.etree.strip_tags(root, 'b', 'c', 'X', 'Y', 'Z')
329         self.assertEquals(_bytes('<test>TEST<a>ABCTBT</a>AT<x>X<a>ABTCT</a>AT</x>XT</test>'),
330                           self._writeElement(root))
331
332         root = XML(xml)
333         self.etree.strip_tags(root, 'c')
334         self.assertEquals(_bytes('<test>TEST<a>A<b>BCT</b>BT</a>AT<x>X<a>A<b></b>BTCT</a>AT</x>XT</test>'),
335                           self._writeElement(root))
336
337     def test_strip_tags_pi_comment(self):
338         XML = self.etree.XML
339         PI = self.etree.ProcessingInstruction
340         Comment = self.etree.Comment
341         xml = _bytes('<!--comment1-->\n<?PI1?>\n<test>TEST<!--comment2-->XT<?PI2?></test>\n<!--comment3-->\n<?PI1?>')
342
343         root = XML(xml)
344         self.etree.strip_tags(root, PI)
345         self.assertEquals(_bytes('<!--comment1-->\n<?PI1?>\n<test>TEST<!--comment2-->XT</test>\n<!--comment3-->\n<?PI1?>'),
346                           self._writeElement(root))
347
348         root = XML(xml)
349         self.etree.strip_tags(root, Comment)
350         self.assertEquals(_bytes('<!--comment1-->\n<?PI1?>\n<test>TESTXT<?PI2?></test>\n<!--comment3-->\n<?PI1?>'),
351                           self._writeElement(root))
352
353         root = XML(xml)
354         self.etree.strip_tags(root, PI, Comment)
355         self.assertEquals(_bytes('<!--comment1-->\n<?PI1?>\n<test>TESTXT</test>\n<!--comment3-->\n<?PI1?>'),
356                           self._writeElement(root))
357
358         root = XML(xml)
359         self.etree.strip_tags(root, Comment, PI)
360         self.assertEquals(_bytes('<!--comment1-->\n<?PI1?>\n<test>TESTXT</test>\n<!--comment3-->\n<?PI1?>'),
361                           self._writeElement(root))
362
363     def test_strip_tags_pi_comment_all(self):
364         XML = self.etree.XML
365         ElementTree = self.etree.ElementTree
366         PI = self.etree.ProcessingInstruction
367         Comment = self.etree.Comment
368         xml = _bytes('<!--comment1-->\n<?PI1?>\n<test>TEST<!--comment2-->XT<?PI2?></test>\n<!--comment3-->\n<?PI1?>')
369
370         root = XML(xml)
371         self.etree.strip_tags(ElementTree(root), PI)
372         self.assertEquals(_bytes('<!--comment1-->\n<test>TEST<!--comment2-->XT</test>\n<!--comment3-->'),
373                           self._writeElement(root))
374
375         root = XML(xml)
376         self.etree.strip_tags(ElementTree(root), Comment)
377         self.assertEquals(_bytes('<?PI1?>\n<test>TESTXT<?PI2?></test>\n<?PI1?>'),
378                           self._writeElement(root))
379
380         root = XML(xml)
381         self.etree.strip_tags(ElementTree(root), PI, Comment)
382         self.assertEquals(_bytes('<test>TESTXT</test>'),
383                           self._writeElement(root))
384
385         root = XML(xml)
386         self.etree.strip_tags(ElementTree(root), Comment, PI)
387         self.assertEquals(_bytes('<test>TESTXT</test>'),
388                           self._writeElement(root))
389
390     def test_strip_tags_doc_style(self):
391         XML = self.etree.XML
392         xml = _bytes('''
393         <div>
394             <div>
395                 I like <strong>sheep</strong>.
396                 <br/>
397                 I like lots of <strong>sheep</strong>.
398                 <br/>
399                 Click <a href="http://www.sheep.com">here</a> for <a href="http://www.sheep.com">those</a> sheep.
400                 <br/>
401             </div>
402         </div>
403         '''.strip())
404
405         root = XML(xml)
406         self.etree.strip_tags(root, 'a')
407         self.assertEquals(re.sub(_bytes('</?a[^>]*>'), _bytes(''), xml).replace(_bytes('<br/>'), _bytes('<br></br>')),
408                           self._writeElement(root))
409
410         root = XML(xml)
411         self.etree.strip_tags(root, 'a', 'br')
412         self.assertEquals(re.sub(_bytes('</?a[^>]*>'), _bytes(''),
413                                  re.sub(_bytes('<br[^>]*>'), _bytes(''), xml)),
414                           self._writeElement(root))
415
416     def test_strip_tags_ns(self):
417         XML = self.etree.XML
418         xml = _bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"/>CT</b>BT</n:a>AT<x>X<a>A<b xmlns="urn:a"/>BT<c xmlns="urn:x"/>CT</a>AT</x>XT</test>')
419
420         root = XML(xml)
421         self.etree.strip_tags(root, 'a')
422         self.assertEquals(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>CT</b>BT</n:a>AT<x>XA<b xmlns="urn:a"></b>BT<c xmlns="urn:x"></c>CTAT</x>XT</test>'),
423                           self._writeElement(root))
424
425         root = XML(xml)
426         self.etree.strip_tags(root, '{urn:a}b', 'c')
427         self.assertEquals(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>CT</b>BT</n:a>AT<x>X<a>ABT<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
428                           self._writeElement(root))
429
430         root = XML(xml)
431         self.etree.strip_tags(root, '{urn:a}*', 'c')
432         self.assertEquals(_bytes('<test>TESTA<b>B<c xmlns="urn:c"></c>CT</b>BTAT<x>X<a>ABT<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
433                           self._writeElement(root))
434
435     def test_strip_tags_and_remove(self):
436         # previously crashed
437         HTML = self.etree.HTML
438         root = HTML(_bytes('<div><h1>title</h1> <b>foo</b> <p>boo</p></div>'))[0][0]
439         self.assertEquals(_bytes('<div><h1>title</h1> <b>foo</b> <p>boo</p></div>'),
440                           self.etree.tostring(root))
441         self.etree.strip_tags(root, 'b')
442         self.assertEquals(_bytes('<div><h1>title</h1> foo <p>boo</p></div>'),
443                           self.etree.tostring(root))
444         root.remove(root[0])
445         self.assertEquals(_bytes('<div><p>boo</p></div>'),
446                           self.etree.tostring(root))
447
448     def test_pi(self):
449         # lxml.etree separates target and text
450         Element = self.etree.Element
451         SubElement = self.etree.SubElement
452         ProcessingInstruction = self.etree.ProcessingInstruction
453
454         a = Element('a')
455         a.append(ProcessingInstruction('foo', 'some more text'))
456         self.assertEquals(a[0].target, 'foo')
457         self.assertEquals(a[0].text, 'some more text')
458
459     def test_pi_parse(self):
460         XML = self.etree.XML
461         root = XML(_bytes("<test><?mypi my test ?></test>"))
462         self.assertEquals(root[0].target, "mypi")
463         self.assertEquals(root[0].text, "my test ")
464
465     def test_pi_pseudo_attributes_get(self):
466         XML = self.etree.XML
467         root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
468         self.assertEquals(root[0].target, "mypi")
469         self.assertEquals(root[0].get('my'), "1")
470         self.assertEquals(root[0].get('test'), " abc ")
471         self.assertEquals(root[0].get('quotes'), "' '")
472         self.assertEquals(root[0].get('only'), None)
473         self.assertEquals(root[0].get('names'), None)
474         self.assertEquals(root[0].get('nope'), None)
475
476     def test_pi_pseudo_attributes_attrib(self):
477         XML = self.etree.XML
478         root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
479         self.assertEquals(root[0].target, "mypi")
480         self.assertEquals(root[0].attrib['my'], "1")
481         self.assertEquals(root[0].attrib['test'], " abc ")
482         self.assertEquals(root[0].attrib['quotes'], "' '")
483         self.assertRaises(KeyError, root[0].attrib.__getitem__, 'only')
484         self.assertRaises(KeyError, root[0].attrib.__getitem__, 'names')
485         self.assertRaises(KeyError, root[0].attrib.__getitem__, 'nope')
486
487     def test_deepcopy_pi(self):
488         # previously caused a crash
489         ProcessingInstruction = self.etree.ProcessingInstruction
490         
491         a = ProcessingInstruction("PI", "ONE")
492         b = copy.deepcopy(a)
493         b.text = "ANOTHER"
494
495         self.assertEquals('ONE',     a.text)
496         self.assertEquals('ANOTHER', b.text)
497
498     def test_deepcopy_elementtree_pi(self):
499         XML = self.etree.XML
500         tostring = self.etree.tostring
501         root = XML(_bytes("<?mypi my test ?><test/><!--comment -->"))
502         tree1 = self.etree.ElementTree(root)
503         self.assertEquals(_bytes("<?mypi my test ?><test/><!--comment -->"),
504                           tostring(tree1))
505
506         tree2 = copy.deepcopy(tree1)
507         self.assertEquals(_bytes("<?mypi my test ?><test/><!--comment -->"),
508                           tostring(tree2))
509
510         root2 = copy.deepcopy(tree1.getroot())
511         self.assertEquals(_bytes("<test/>"),
512                           tostring(root2))
513
514     def test_deepcopy_elementtree_dtd(self):
515         XML = self.etree.XML
516         tostring = self.etree.tostring
517         xml = _bytes('<!DOCTYPE test [\n<!ENTITY entity "tasty">\n]>\n<test/>')
518         root = XML(xml)
519         tree1 = self.etree.ElementTree(root)
520         self.assertEquals(xml, tostring(tree1))
521
522         tree2 = copy.deepcopy(tree1)
523         self.assertEquals(xml, tostring(tree2))
524
525         root2 = copy.deepcopy(tree1.getroot())
526         self.assertEquals(_bytes("<test/>"),
527                           tostring(root2))
528
529     def test_attribute_set(self):
530         # ElementTree accepts arbitrary attribute values
531         # lxml.etree allows only strings
532         Element = self.etree.Element
533
534         root = Element("root")
535         root.set("attr", "TEST")
536         self.assertEquals("TEST", root.get("attr"))
537         self.assertRaises(TypeError, root.set, "newattr", 5)
538
539     def test_parse_remove_comments(self):
540         fromstring = self.etree.fromstring
541         tostring = self.etree.tostring
542         XMLParser = self.etree.XMLParser
543
544         xml = _bytes('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
545         parser = XMLParser(remove_comments=True)
546         root = fromstring(xml, parser)
547         self.assertEquals(
548             _bytes('<a><b><c/></b></a>'),
549             tostring(root))
550
551     def test_parse_remove_pis(self):
552         parse = self.etree.parse
553         tostring = self.etree.tostring
554         XMLParser = self.etree.XMLParser
555
556         xml = _bytes('<?test?><a><?A?><b><?B?><c/></b><?C?></a><?tail?>')
557
558         f = BytesIO(xml)
559         tree = parse(f)
560         self.assertEquals(
561             xml,
562             tostring(tree))
563
564         parser = XMLParser(remove_pis=True)
565         tree = parse(f, parser)
566         self.assertEquals(
567             _bytes('<a><b><c/></b></a>'),
568             tostring(tree))
569
570     def test_parse_parser_type_error(self):
571         # ET raises IOError only
572         parse = self.etree.parse
573         self.assertRaises(TypeError, parse, 'notthere.xml', object())
574
575     def test_iterparse_tree_comments(self):
576         # ET removes comments
577         iterparse = self.etree.iterparse
578         tostring = self.etree.tostring
579
580         f = BytesIO('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
581         events = list(iterparse(f))
582         root = events[-1][1]
583         self.assertEquals(3, len(events))
584         self.assertEquals(
585             _bytes('<a><!--A--><b><!-- B --><c/></b><!--C--></a>'),
586             tostring(root))
587
588     def test_iterparse_comments(self):
589         # ET removes comments
590         iterparse = self.etree.iterparse
591         tostring = self.etree.tostring
592
593         def name(event, el):
594             if event == 'comment':
595                 return el.text
596             else:
597                 return el.tag
598
599         f = BytesIO('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
600         events = list(iterparse(f, events=('end', 'comment')))
601         root = events[-1][1]
602         self.assertEquals(6, len(events))
603         self.assertEquals(['A', ' B ', 'c', 'b', 'C', 'a'],
604                           [ name(*item) for item in events ])
605         self.assertEquals(
606             _bytes('<a><!--A--><b><!-- B --><c/></b><!--C--></a>'),
607             tostring(root))
608
609     def test_iterparse_pis(self):
610         # ET removes pis
611         iterparse = self.etree.iterparse
612         tostring = self.etree.tostring
613         ElementTree = self.etree.ElementTree
614
615         def name(event, el):
616             if event == 'pi':
617                 return (el.target, el.text)
618             else:
619                 return el.tag
620
621         f = BytesIO('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>')
622         events = list(iterparse(f, events=('end', 'pi')))
623         root = events[-2][1]
624         self.assertEquals(8, len(events))
625         self.assertEquals([('pia','a'), ('pib','b'), ('pic','c'), 'c', 'b',
626                            ('pid','d'), 'a', ('pie','e')],
627                           [ name(*item) for item in events ])
628         self.assertEquals(
629             _bytes('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>'),
630             tostring(ElementTree(root)))
631
632     def test_iterparse_remove_comments(self):
633         iterparse = self.etree.iterparse
634         tostring = self.etree.tostring
635
636         f = BytesIO('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
637         events = list(iterparse(f, remove_comments=True,
638                                 events=('end', 'comment')))
639         root = events[-1][1]
640         self.assertEquals(3, len(events))
641         self.assertEquals(['c', 'b', 'a'],
642                           [ el.tag for (event, el) in events ])
643         self.assertEquals(
644             _bytes('<a><b><c/></b></a>'),
645             tostring(root))
646
647     def test_iterparse_broken(self):
648         iterparse = self.etree.iterparse
649         f = BytesIO('<a><b><c/></a>')
650         # ET raises ExpatError, lxml raises XMLSyntaxError
651         self.assertRaises(self.etree.XMLSyntaxError, list, iterparse(f))
652
653     def test_iterparse_strip(self):
654         iterparse = self.etree.iterparse
655         f = BytesIO("""
656                <a>  \n \n  <b> b test </b>  \n
657
658                \n\t <c> \n </c> </a>  \n """)
659         iterator = iterparse(f, remove_blank_text=True)
660         text = [ (element.text, element.tail)
661                  for event, element in iterator ]
662         self.assertEquals(
663             [(" b test ", None), (" \n ", None), (None, None)],
664             text)
665
666     def test_iterparse_tag(self):
667         iterparse = self.etree.iterparse
668         f = BytesIO('<a><b><d/></b><c/></a>')
669
670         iterator = iterparse(f, tag="b", events=('start', 'end'))
671         events = list(iterator)
672         root = iterator.root
673         self.assertEquals(
674             [('start', root[0]), ('end', root[0])],
675             events)
676
677     def test_iterparse_tag_all(self):
678         iterparse = self.etree.iterparse
679         f = BytesIO('<a><b><d/></b><c/></a>')
680
681         iterator = iterparse(f, tag="*", events=('start', 'end'))
682         events = list(iterator)
683         self.assertEquals(
684             8,
685             len(events))
686
687     def test_iterparse_tag_ns(self):
688         iterparse = self.etree.iterparse
689         f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
690
691         iterator = iterparse(f, tag="{urn:test:1}b", events=('start', 'end'))
692         events = list(iterator)
693         root = iterator.root
694         self.assertEquals(
695             [('start', root[0]), ('end', root[0])],
696             events)
697
698     def test_iterparse_tag_ns_all(self):
699         iterparse = self.etree.iterparse
700         f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
701
702         iterator = iterparse(f, tag="{urn:test:1}*", events=('start', 'end'))
703         events = list(iterator)
704         self.assertEquals(
705             8,
706             len(events))
707
708     def test_iterparse_encoding_error(self):
709         text = _str('Søk pÃ¥ nettet')
710         wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
711         xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
712                       ).encode('iso-8859-1')
713
714         self.assertRaises(self.etree.ParseError,
715                           list, self.etree.iterparse(BytesIO(xml_latin1)))
716
717     def test_iterparse_encoding_8bit_override(self):
718         text = _str('Søk pÃ¥ nettet', encoding="UTF-8")
719         wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
720         xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
721                       ).encode('iso-8859-1')
722
723         iterator = self.etree.iterparse(BytesIO(xml_latin1),
724                                         encoding="iso-8859-1")
725         self.assertEquals(1, len(list(iterator)))
726
727         a = iterator.root
728         self.assertEquals(a.text, text)
729
730     def test_iterparse_keep_cdata(self):
731         tostring = self.etree.tostring
732         f = BytesIO('<root><![CDATA[test]]></root>')
733         context = self.etree.iterparse(f, strip_cdata=False)
734         content = [ el.text for event,el in context ]
735
736         self.assertEquals(['test'], content)
737         self.assertEquals(_bytes('<root><![CDATA[test]]></root>'),
738                           tostring(context.root))
739
740     def test_parser_encoding_unknown(self):
741         self.assertRaises(
742             LookupError, self.etree.XMLParser, encoding="hopefully unknown")
743
744     def test_parser_encoding(self):
745         self.etree.XMLParser(encoding="ascii")
746         self.etree.XMLParser(encoding="utf-8")
747         self.etree.XMLParser(encoding="iso-8859-1")
748
749     def test_feed_parser_recover(self):
750         parser = self.etree.XMLParser(recover=True)
751
752         parser.feed('<?xml version=')
753         parser.feed('"1.0"?><ro')
754         parser.feed('ot><')
755         parser.feed('a test="works"')
756         parser.feed('><othertag/></root') # <a> not closed!
757         parser.feed('>')
758
759         root = parser.close()
760
761         self.assertEquals(root.tag, "root")
762         self.assertEquals(len(root), 1)
763         self.assertEquals(root[0].tag, "a")
764         self.assertEquals(root[0].get("test"), "works")
765         self.assertEquals(len(root[0]), 1)
766         self.assertEquals(root[0][0].tag, "othertag")
767         # FIXME: would be nice to get some errors logged ...
768         #self.assert_(len(parser.error_log) > 0, "error log is empty")
769
770     def test_elementtree_parser_target_type_error(self):
771         assertEquals = self.assertEquals
772         assertFalse  = self.assertFalse
773
774         events = []
775         class Target(object):
776             def start(self, tag, attrib):
777                 events.append("start")
778                 assertFalse(attrib)
779                 assertEquals("TAG", tag)
780             def end(self, tag):
781                 events.append("end")
782                 assertEquals("TAG", tag)
783             def close(self):
784                 return "DONE" # no Element!
785
786         parser = self.etree.XMLParser(target=Target())
787         tree = self.etree.ElementTree()
788
789         self.assertRaises(TypeError,
790                           tree.parse, BytesIO("<TAG/>"), parser=parser)
791         self.assertEquals(["start", "end"], events)
792
793     def test_parser_target_feed_exception(self):
794         # ET doesn't call .close() on errors
795         events = []
796         class Target(object):
797             def start(self, tag, attrib):
798                 events.append("start-" + tag)
799             def end(self, tag):
800                 events.append("end-" + tag)
801                 if tag == 'a':
802                     raise ValueError("dead and gone")
803             def data(self, data):
804                 events.append("data-" + data)
805             def close(self):
806                 events.append("close")
807                 return "DONE"
808
809         parser = self.etree.XMLParser(target=Target())
810
811         try:
812             parser.feed(_bytes('<root>A<a>ca</a>B</root>'))
813             done = parser.close()
814             self.fail("error expected, but parsing succeeded")
815         except ValueError:
816             done = 'value error received as expected'
817
818         self.assertEquals(["start-root", "data-A", "start-a",
819                            "data-ca", "end-a", "close"],
820                           events)
821
822     def test_parser_target_fromstring_exception(self):
823         # ET doesn't call .close() on errors
824         events = []
825         class Target(object):
826             def start(self, tag, attrib):
827                 events.append("start-" + tag)
828             def end(self, tag):
829                 events.append("end-" + tag)
830                 if tag == 'a':
831                     raise ValueError("dead and gone")
832             def data(self, data):
833                 events.append("data-" + data)
834             def close(self):
835                 events.append("close")
836                 return "DONE"
837
838         parser = self.etree.XMLParser(target=Target())
839
840         try:
841             done = self.etree.fromstring(_bytes('<root>A<a>ca</a>B</root>'),
842                                          parser=parser)
843             self.fail("error expected, but parsing succeeded")
844         except ValueError:
845             done = 'value error received as expected'
846
847         self.assertEquals(["start-root", "data-A", "start-a",
848                            "data-ca", "end-a", "close"],
849                           events)
850
851     def test_parser_target_comment(self):
852         events = []
853         class Target(object):
854             def start(self, tag, attrib):
855                 events.append("start-" + tag)
856             def end(self, tag):
857                 events.append("end-" + tag)
858             def data(self, data):
859                 events.append("data-" + data)
860             def comment(self, text):
861                 events.append("comment-" + text)
862             def close(self):
863                 return "DONE"
864
865         parser = self.etree.XMLParser(target=Target())
866
867         parser.feed(_bytes('<!--a--><root>A<!--b--><sub/><!--c-->B</root><!--d-->'))
868         done = parser.close()
869
870         self.assertEquals("DONE", done)
871         self.assertEquals(["comment-a", "start-root", "data-A", "comment-b",
872                            "start-sub", "end-sub", "comment-c", "data-B",
873                            "end-root", "comment-d"],
874                           events)
875
876     def test_parser_target_pi(self):
877         events = []
878         class Target(object):
879             def start(self, tag, attrib):
880                 events.append("start-" + tag)
881             def end(self, tag):
882                 events.append("end-" + tag)
883             def data(self, data):
884                 events.append("data-" + data)
885             def pi(self, target, data):
886                 events.append("pi-" + target + "-" + data)
887             def close(self):
888                 return "DONE"
889
890         parser = self.etree.XMLParser(target=Target())
891
892         parser.feed(_bytes('<?test a?><root>A<?test b?>B</root><?test c?>'))
893         done = parser.close()
894
895         self.assertEquals("DONE", done)
896         self.assertEquals(["pi-test-a", "start-root", "data-A", "pi-test-b",
897                            "data-B", "end-root", "pi-test-c"],
898                           events)
899
900     def test_parser_target_cdata(self):
901         events = []
902         class Target(object):
903             def start(self, tag, attrib):
904                 events.append("start-" + tag)
905             def end(self, tag):
906                 events.append("end-" + tag)
907             def data(self, data):
908                 events.append("data-" + data)
909             def close(self):
910                 return "DONE"
911
912         parser = self.etree.XMLParser(target=Target(),
913                                       strip_cdata=False)
914
915         parser.feed(_bytes('<root>A<a><![CDATA[ca]]></a>B</root>'))
916         done = parser.close()
917
918         self.assertEquals("DONE", done)
919         self.assertEquals(["start-root", "data-A", "start-a",
920                            "data-ca", "end-a", "data-B", "end-root"],
921                           events)
922
923     def test_parser_target_recover(self):
924         events = []
925         class Target(object):
926             def start(self, tag, attrib):
927                 events.append("start-" + tag)
928             def end(self, tag):
929                 events.append("end-" + tag)
930             def data(self, data):
931                 events.append("data-" + data)
932             def close(self):
933                 events.append("close")
934                 return "DONE"
935
936         parser = self.etree.XMLParser(target=Target(),
937                                       recover=True)
938
939         parser.feed(_bytes('<root>A<a>ca</a>B</not-root>'))
940         done = parser.close()
941
942         self.assertEquals("DONE", done)
943         self.assertEquals(["start-root", "data-A", "start-a",
944                            "data-ca", "end-a", "data-B",
945                            "end-root", "close"],
946                           events)
947
948     def test_iterwalk_tag(self):
949         iterwalk = self.etree.iterwalk
950         root = self.etree.XML(_bytes('<a><b><d/></b><c/></a>'))
951
952         iterator = iterwalk(root, tag="b", events=('start', 'end'))
953         events = list(iterator)
954         self.assertEquals(
955             [('start', root[0]), ('end', root[0])],
956             events)
957
958     def test_iterwalk_tag_all(self):
959         iterwalk = self.etree.iterwalk
960         root = self.etree.XML(_bytes('<a><b><d/></b><c/></a>'))
961
962         iterator = iterwalk(root, tag="*", events=('start', 'end'))
963         events = list(iterator)
964         self.assertEquals(
965             8,
966             len(events))
967
968     def test_iterwalk(self):
969         iterwalk = self.etree.iterwalk
970         root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
971
972         events = list(iterwalk(root))
973         self.assertEquals(
974             [('end', root[0]), ('end', root[1]), ('end', root)],
975             events)
976
977     def test_iterwalk_start(self):
978         iterwalk = self.etree.iterwalk
979         root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
980
981         iterator = iterwalk(root, events=('start',))
982         events = list(iterator)
983         self.assertEquals(
984             [('start', root), ('start', root[0]), ('start', root[1])],
985             events)
986
987     def test_iterwalk_start_end(self):
988         iterwalk = self.etree.iterwalk
989         root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
990
991         iterator = iterwalk(root, events=('start','end'))
992         events = list(iterator)
993         self.assertEquals(
994             [('start', root), ('start', root[0]), ('end', root[0]),
995              ('start', root[1]), ('end', root[1]), ('end', root)],
996             events)
997
998     def test_iterwalk_clear(self):
999         iterwalk = self.etree.iterwalk
1000         root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1001
1002         iterator = iterwalk(root)
1003         for event, elem in iterator:
1004             elem.clear()
1005
1006         self.assertEquals(0,
1007                           len(root))
1008
1009     def test_iterwalk_attrib_ns(self):
1010         iterwalk = self.etree.iterwalk
1011         root = self.etree.XML(_bytes('<a xmlns="ns1"><b><c xmlns="ns2"/></b></a>'))
1012
1013         attr_name = '{testns}bla'
1014         events = []
1015         iterator = iterwalk(root, events=('start','end','start-ns','end-ns'))
1016         for event, elem in iterator:
1017             events.append(event)
1018             if event == 'start':
1019                 if elem.tag != '{ns1}a':
1020                     elem.set(attr_name, 'value')
1021
1022         self.assertEquals(
1023             ['start-ns', 'start', 'start', 'start-ns', 'start',
1024              'end', 'end-ns', 'end', 'end', 'end-ns'],
1025             events)
1026
1027         self.assertEquals(
1028             None,
1029             root.get(attr_name))
1030         self.assertEquals(
1031             'value',
1032             root[0].get(attr_name))
1033
1034     def test_iterwalk_getiterator(self):
1035         iterwalk = self.etree.iterwalk
1036         root = self.etree.XML(_bytes('<a><b><d/></b><c/></a>'))
1037
1038         counts = []
1039         for event, elem in iterwalk(root):
1040             counts.append(len(list(elem.getiterator())))
1041         self.assertEquals(
1042             [1,2,1,4],
1043             counts)
1044
1045     def test_resolve_string_dtd(self):
1046         parse = self.etree.parse
1047         parser = self.etree.XMLParser(dtd_validation=True)
1048         assertEqual = self.assertEqual
1049         test_url = _str("__nosuch.dtd")
1050
1051         class MyResolver(self.etree.Resolver):
1052             def resolve(self, url, id, context):
1053                 assertEqual(url, test_url)
1054                 return self.resolve_string(
1055                     _str('''<!ENTITY myentity "%s">
1056                         <!ELEMENT doc ANY>''') % url, context)
1057
1058         parser.resolvers.add(MyResolver())
1059
1060         xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1061         tree = parse(StringIO(xml), parser)
1062         root = tree.getroot()
1063         self.assertEquals(root.text, test_url)
1064
1065     def test_resolve_bytes_dtd(self):
1066         parse = self.etree.parse
1067         parser = self.etree.XMLParser(dtd_validation=True)
1068         assertEqual = self.assertEqual
1069         test_url = _str("__nosuch.dtd")
1070
1071         class MyResolver(self.etree.Resolver):
1072             def resolve(self, url, id, context):
1073                 assertEqual(url, test_url)
1074                 return self.resolve_string(
1075                     (_str('''<!ENTITY myentity "%s">
1076                              <!ELEMENT doc ANY>''') % url).encode('utf-8'),
1077                     context)
1078
1079         parser.resolvers.add(MyResolver())
1080
1081         xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1082         tree = parse(StringIO(xml), parser)
1083         root = tree.getroot()
1084         self.assertEquals(root.text, test_url)
1085
1086     def test_resolve_filelike_dtd(self):
1087         parse = self.etree.parse
1088         parser = self.etree.XMLParser(dtd_validation=True)
1089         assertEqual = self.assertEqual
1090         test_url = _str("__nosuch.dtd")
1091
1092         class MyResolver(self.etree.Resolver):
1093             def resolve(self, url, id, context):
1094                 assertEqual(url, test_url)
1095                 return self.resolve_file(
1096                     SillyFileLike(
1097                         _str('''<!ENTITY myentity "%s">
1098                         <!ELEMENT doc ANY>''') % url), context)
1099
1100         parser.resolvers.add(MyResolver())
1101
1102         xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1103         tree = parse(StringIO(xml), parser)
1104         root = tree.getroot()
1105         self.assertEquals(root.text, test_url)
1106
1107     def test_resolve_filename_dtd(self):
1108         parse = self.etree.parse
1109         parser = self.etree.XMLParser(attribute_defaults=True)
1110         assertEqual = self.assertEqual
1111         test_url = _str("__nosuch.dtd")
1112
1113         class MyResolver(self.etree.Resolver):
1114             def resolve(self, url, id, context):
1115                 assertEqual(url, test_url)
1116                 return self.resolve_filename(
1117                     fileInTestDir('test.dtd'), context)
1118
1119         parser.resolvers.add(MyResolver())
1120
1121         xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1122         tree = parse(StringIO(xml), parser)
1123         root = tree.getroot()
1124         self.assertEquals(
1125             root.attrib,    {'default': 'valueA'})
1126         self.assertEquals(
1127             root[0].attrib, {'default': 'valueB'})
1128
1129     def test_resolve_filename_dtd_relative(self):
1130         parse = self.etree.parse
1131         parser = self.etree.XMLParser(attribute_defaults=True)
1132         assertEqual = self.assertEqual
1133         test_url = _str("__nosuch.dtd")
1134
1135         class MyResolver(self.etree.Resolver):
1136             def resolve(self, url, id, context):
1137                 assertEqual(url, fileInTestDir(test_url))
1138                 return self.resolve_filename(
1139                     fileInTestDir('test.dtd'), context)
1140
1141         parser.resolvers.add(MyResolver())
1142
1143         xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1144         tree = parse(StringIO(xml), parser,
1145                      base_url=fileInTestDir('__test.xml'))
1146         root = tree.getroot()
1147         self.assertEquals(
1148             root.attrib,    {'default': 'valueA'})
1149         self.assertEquals(
1150             root[0].attrib, {'default': 'valueB'})
1151
1152     def test_resolve_file_dtd(self):
1153         parse = self.etree.parse
1154         parser = self.etree.XMLParser(attribute_defaults=True)
1155         assertEqual = self.assertEqual
1156         test_url = _str("__nosuch.dtd")
1157
1158         class MyResolver(self.etree.Resolver):
1159             def resolve(self, url, id, context):
1160                 assertEqual(url, test_url)
1161                 return self.resolve_file(
1162                     open(fileInTestDir('test.dtd'), 'rb'), context)
1163
1164         parser.resolvers.add(MyResolver())
1165
1166         xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1167         tree = parse(StringIO(xml), parser)
1168         root = tree.getroot()
1169         self.assertEquals(
1170             root.attrib,    {'default': 'valueA'})
1171         self.assertEquals(
1172             root[0].attrib, {'default': 'valueB'})
1173
1174     def test_resolve_empty(self):
1175         parse = self.etree.parse
1176         parser = self.etree.XMLParser(load_dtd=True)
1177         assertEqual = self.assertEqual
1178         test_url = _str("__nosuch.dtd")
1179
1180         class check(object):
1181             resolved = False
1182
1183         class MyResolver(self.etree.Resolver):
1184             def resolve(self, url, id, context):
1185                 assertEqual(url, test_url)
1186                 check.resolved = True
1187                 return self.resolve_empty(context)
1188
1189         parser.resolvers.add(MyResolver())
1190
1191         xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1192         self.assertRaises(etree.XMLSyntaxError, parse, StringIO(xml), parser)
1193         self.assert_(check.resolved)
1194
1195     def test_resolve_error(self):
1196         parse = self.etree.parse
1197         parser = self.etree.XMLParser(dtd_validation=True)
1198
1199         class _LocalException(Exception):
1200             pass
1201
1202         class MyResolver(self.etree.Resolver):
1203             def resolve(self, url, id, context):
1204                 raise _LocalException
1205
1206         parser.resolvers.add(MyResolver())
1207
1208         xml = '<!DOCTYPE doc SYSTEM "test"><doc>&myentity;</doc>'
1209         self.assertRaises(_LocalException, parse, BytesIO(xml), parser)
1210
1211     if etree.LIBXML_VERSION > (2,6,20):
1212         def test_entity_parse(self):
1213             parse = self.etree.parse
1214             tostring = self.etree.tostring
1215             parser = self.etree.XMLParser(resolve_entities=False)
1216             Entity = self.etree.Entity
1217
1218             xml = _bytes('<!DOCTYPE doc SYSTEM "test"><doc>&myentity;</doc>')
1219             tree = parse(BytesIO(xml), parser)
1220             root = tree.getroot()
1221             self.assertEquals(root[0].tag, Entity)
1222             self.assertEquals(root[0].text, "&myentity;")
1223             self.assertEquals(root[0].tail, None)
1224             self.assertEquals(root[0].name, "myentity")
1225
1226             self.assertEquals(_bytes('<doc>&myentity;</doc>'),
1227                               tostring(root))
1228
1229         def test_entity_restructure(self):
1230             xml = _bytes('''<!DOCTYPE root [ <!ENTITY nbsp "&#160;"> ]>
1231                 <root>
1232                   <child1/>
1233                   <child2/>
1234                   <child3>&nbsp;</child3>
1235                 </root>''')
1236
1237             parser = self.etree.XMLParser(resolve_entities=False)
1238             root = etree.fromstring(xml, parser)
1239             self.assertEquals([ el.tag for el in root ],
1240                               ['child1', 'child2', 'child3'])
1241
1242             root[0] = root[-1]
1243             self.assertEquals([ el.tag for el in root ],
1244                               ['child3', 'child2'])
1245             self.assertEquals(root[0][0].text, '&nbsp;')
1246             self.assertEquals(root[0][0].name, 'nbsp')
1247
1248     def test_entity_append(self):
1249         Entity = self.etree.Entity
1250         Element = self.etree.Element
1251         tostring = self.etree.tostring
1252
1253         root = Element("root")
1254         root.append( Entity("test") )
1255
1256         self.assertEquals(root[0].tag, Entity)
1257         self.assertEquals(root[0].text, "&test;")
1258         self.assertEquals(root[0].tail, None)
1259         self.assertEquals(root[0].name, "test")
1260
1261         self.assertEquals(_bytes('<root>&test;</root>'),
1262                           tostring(root))
1263
1264     def test_entity_values(self):
1265         Entity = self.etree.Entity
1266         self.assertEquals(Entity("test").text, '&test;')
1267         self.assertEquals(Entity("#17683").text, '&#17683;')
1268         self.assertEquals(Entity("#x1768").text, '&#x1768;')
1269         self.assertEquals(Entity("#x98AF").text, '&#x98AF;')
1270
1271     def test_entity_error(self):
1272         Entity = self.etree.Entity
1273         self.assertRaises(ValueError, Entity, 'a b c')
1274         self.assertRaises(ValueError, Entity, 'a,b')
1275         self.assertRaises(ValueError, Entity, 'a\0b')
1276         self.assertRaises(ValueError, Entity, '#abc')
1277         self.assertRaises(ValueError, Entity, '#xxyz')
1278
1279     def test_cdata(self):
1280         CDATA = self.etree.CDATA
1281         Element = self.etree.Element
1282         tostring = self.etree.tostring
1283
1284         root = Element("root")
1285         root.text = CDATA('test')
1286
1287         self.assertEquals('test',
1288                           root.text)
1289         self.assertEquals(_bytes('<root><![CDATA[test]]></root>'),
1290                           tostring(root))
1291
1292     def test_cdata_type(self):
1293         CDATA = self.etree.CDATA
1294         Element = self.etree.Element
1295         root = Element("root")
1296
1297         root.text = CDATA("test")
1298         self.assertEquals('test', root.text)
1299
1300         root.text = CDATA(_str("test"))
1301         self.assertEquals('test', root.text)
1302
1303         self.assertRaises(TypeError, CDATA, 1)
1304
1305     def test_cdata_errors(self):
1306         CDATA = self.etree.CDATA
1307         Element = self.etree.Element
1308
1309         root = Element("root")
1310         cdata = CDATA('test')
1311         
1312         self.assertRaises(TypeError,
1313                           setattr, root, 'tail', cdata)
1314         self.assertRaises(TypeError,
1315                           root.set, 'attr', cdata)
1316         self.assertRaises(TypeError,
1317                           operator.setitem, root.attrib, 'attr', cdata)
1318
1319     def test_cdata_parser(self):
1320         tostring = self.etree.tostring
1321         parser = self.etree.XMLParser(strip_cdata=False)
1322         root = self.etree.XML(_bytes('<root><![CDATA[test]]></root>'), parser)
1323
1324         self.assertEquals('test', root.text)
1325         self.assertEquals(_bytes('<root><![CDATA[test]]></root>'),
1326                           tostring(root))
1327
1328     def test_cdata_xpath(self):
1329         tostring = self.etree.tostring
1330         parser = self.etree.XMLParser(strip_cdata=False)
1331         root = self.etree.XML(_bytes('<root><![CDATA[test]]></root>'), parser)
1332         self.assertEquals(_bytes('<root><![CDATA[test]]></root>'),
1333                           tostring(root))
1334
1335         self.assertEquals(['test'], root.xpath('//text()'))
1336
1337     # TypeError in etree, AssertionError in ElementTree;
1338     def test_setitem_assert(self):
1339         Element = self.etree.Element
1340         SubElement = self.etree.SubElement
1341
1342         a = Element('a')
1343         b = SubElement(a, 'b')
1344         
1345         self.assertRaises(TypeError,
1346                           a.__setitem__, 0, 'foo')
1347
1348     def test_append_error(self):
1349         Element = self.etree.Element
1350         root = Element('root')
1351         # raises AssertionError in ElementTree
1352         self.assertRaises(TypeError, root.append, None)
1353         self.assertRaises(TypeError, root.extend, [None])
1354         self.assertRaises(TypeError, root.extend, [Element('one'), None])
1355         self.assertEquals('one', root[0].tag)
1356
1357     def test_addnext(self):
1358         Element = self.etree.Element
1359         SubElement = self.etree.SubElement
1360         root = Element('root')
1361         SubElement(root, 'a')
1362         SubElement(root, 'b')
1363
1364         self.assertEquals(['a', 'b'],
1365                           [c.tag for c in root])
1366         root[1].addnext(root[0])
1367         self.assertEquals(['b', 'a'],
1368                           [c.tag for c in root])
1369
1370     def test_addprevious(self):
1371         Element = self.etree.Element
1372         SubElement = self.etree.SubElement
1373         root = Element('root')
1374         SubElement(root, 'a')
1375         SubElement(root, 'b')
1376
1377         self.assertEquals(['a', 'b'],
1378                           [c.tag for c in root])
1379         root[0].addprevious(root[1])
1380         self.assertEquals(['b', 'a'],
1381                           [c.tag for c in root])
1382
1383     def test_addnext_root(self):
1384         Element = self.etree.Element
1385         a = Element('a')
1386         b = Element('b')
1387         self.assertRaises(TypeError, a.addnext, b)
1388
1389     def test_addnext_root(self):
1390         Element = self.etree.Element
1391         a = Element('a')
1392         b = Element('b')
1393         self.assertRaises(TypeError, a.addnext, b)
1394
1395     def test_addprevious_pi(self):
1396         Element = self.etree.Element
1397         SubElement = self.etree.SubElement
1398         PI = self.etree.PI
1399         root = Element('root')
1400         SubElement(root, 'a')
1401         pi = PI('TARGET', 'TEXT')
1402         pi.tail = "TAIL"
1403
1404         self.assertEquals(_bytes('<root><a></a></root>'),
1405                           self._writeElement(root))
1406         root[0].addprevious(pi)
1407         self.assertEquals(_bytes('<root><?TARGET TEXT?>TAIL<a></a></root>'),
1408                           self._writeElement(root))
1409
1410     def test_addprevious_root_pi(self):
1411         Element = self.etree.Element
1412         PI = self.etree.PI
1413         root = Element('root')
1414         pi = PI('TARGET', 'TEXT')
1415         pi.tail = "TAIL"
1416
1417         self.assertEquals(_bytes('<root></root>'),
1418                           self._writeElement(root))
1419         root.addprevious(pi)
1420         self.assertEquals(_bytes('<?TARGET TEXT?>\n<root></root>'),
1421                           self._writeElement(root))
1422
1423     def test_addnext_pi(self):
1424         Element = self.etree.Element
1425         SubElement = self.etree.SubElement
1426         PI = self.etree.PI
1427         root = Element('root')
1428         SubElement(root, 'a')
1429         pi = PI('TARGET', 'TEXT')
1430         pi.tail = "TAIL"
1431
1432         self.assertEquals(_bytes('<root><a></a></root>'),
1433                           self._writeElement(root))
1434         root[0].addnext(pi)
1435         self.assertEquals(_bytes('<root><a></a><?TARGET TEXT?>TAIL</root>'),
1436                           self._writeElement(root))
1437
1438     def test_addnext_root_pi(self):
1439         Element = self.etree.Element
1440         PI = self.etree.PI
1441         root = Element('root')
1442         pi = PI('TARGET', 'TEXT')
1443         pi.tail = "TAIL"
1444
1445         self.assertEquals(_bytes('<root></root>'),
1446                           self._writeElement(root))
1447         root.addnext(pi)
1448         self.assertEquals(_bytes('<root></root>\n<?TARGET TEXT?>'),
1449                           self._writeElement(root))
1450
1451     def test_addnext_comment(self):
1452         Element = self.etree.Element
1453         SubElement = self.etree.SubElement
1454         Comment = self.etree.Comment
1455         root = Element('root')
1456         SubElement(root, 'a')
1457         comment = Comment('TEXT ')
1458         comment.tail = "TAIL"
1459
1460         self.assertEquals(_bytes('<root><a></a></root>'),
1461                           self._writeElement(root))
1462         root[0].addnext(comment)
1463         self.assertEquals(_bytes('<root><a></a><!--TEXT -->TAIL</root>'),
1464                           self._writeElement(root))
1465
1466     def test_addnext_root_comment(self):
1467         Element = self.etree.Element
1468         Comment = self.etree.Comment
1469         root = Element('root')
1470         comment = Comment('TEXT ')
1471         comment.tail = "TAIL"
1472
1473         self.assertEquals(_bytes('<root></root>'),
1474                           self._writeElement(root))
1475         root.addnext(comment)
1476         self.assertEquals(_bytes('<root></root>\n<!--TEXT -->'),
1477                           self._writeElement(root))
1478
1479     def test_addprevious_comment(self):
1480         Element = self.etree.Element
1481         SubElement = self.etree.SubElement
1482         Comment = self.etree.Comment
1483         root = Element('root')
1484         SubElement(root, 'a')
1485         comment = Comment('TEXT ')
1486         comment.tail = "TAIL"
1487
1488         self.assertEquals(_bytes('<root><a></a></root>'),
1489                           self._writeElement(root))
1490         root[0].addprevious(comment)
1491         self.assertEquals(_bytes('<root><!--TEXT -->TAIL<a></a></root>'),
1492                           self._writeElement(root))
1493
1494     def test_addprevious_root_comment(self):
1495         Element = self.etree.Element
1496         Comment = self.etree.Comment
1497         root = Element('root')
1498         comment = Comment('TEXT ')
1499         comment.tail = "TAIL"
1500
1501         self.assertEquals(_bytes('<root></root>'),
1502                           self._writeElement(root))
1503         root.addprevious(comment)
1504         self.assertEquals(_bytes('<!--TEXT -->\n<root></root>'),
1505                           self._writeElement(root))
1506
1507     # ET's Elements have items() and key(), but not values()
1508     def test_attribute_values(self):
1509         XML = self.etree.XML
1510         
1511         root = XML(_bytes('<doc alpha="Alpha" beta="Beta" gamma="Gamma"/>'))
1512         values = root.values()
1513         values.sort()
1514         self.assertEquals(['Alpha', 'Beta', 'Gamma'], values)
1515
1516     # gives error in ElementTree
1517     def test_comment_empty(self):
1518         Element = self.etree.Element
1519         Comment = self.etree.Comment
1520
1521         a = Element('a')
1522         a.append(Comment())
1523         self.assertEquals(
1524             _bytes('<a><!----></a>'),
1525             self._writeElement(a))
1526
1527     # ElementTree ignores comments
1528     def test_comment_parse_empty(self):
1529         ElementTree = self.etree.ElementTree
1530         tostring = self.etree.tostring
1531
1532         xml = _bytes('<a><b/><!----><c/></a>')
1533         f = BytesIO(xml)
1534         doc = ElementTree(file=f)
1535         a = doc.getroot()
1536         self.assertEquals(
1537             '',
1538             a[1].text)
1539         self.assertEquals(
1540             xml,
1541             tostring(a))
1542
1543     # ElementTree ignores comments
1544     def test_comment_no_proxy_yet(self):
1545         ElementTree = self.etree.ElementTree
1546         
1547         f = BytesIO('<a><b></b><!-- hoi --><c></c></a>')
1548         doc = ElementTree(file=f)
1549         a = doc.getroot()
1550         self.assertEquals(
1551             ' hoi ',
1552             a[1].text)
1553
1554     # does not raise an exception in ElementTree
1555     def test_comment_immutable(self):
1556         Element = self.etree.Element
1557         Comment = self.etree.Comment
1558
1559         c = Comment()
1560         el = Element('myel')
1561
1562         self.assertRaises(TypeError, c.append, el)
1563         self.assertRaises(TypeError, c.insert, 0, el)
1564         self.assertRaises(TypeError, c.set, "myattr", "test")
1565
1566     # test passing 'None' to dump
1567     def test_dump_none(self):
1568         self.assertRaises(TypeError, self.etree.dump, None)
1569
1570     def test_prefix(self):
1571         ElementTree = self.etree.ElementTree
1572         
1573         f = BytesIO('<a xmlns:foo="http://www.infrae.com/ns/1"><foo:b/></a>')
1574         doc = ElementTree(file=f)
1575         a = doc.getroot()
1576         self.assertEquals(
1577             None,
1578             a.prefix)
1579         self.assertEquals(
1580             'foo',
1581             a[0].prefix)
1582
1583     def test_prefix_default_ns(self):
1584         ElementTree = self.etree.ElementTree
1585         
1586         f = BytesIO('<a xmlns="http://www.infrae.com/ns/1"><b/></a>')
1587         doc = ElementTree(file=f)
1588         a = doc.getroot()
1589         self.assertEquals(
1590             None,
1591             a.prefix)
1592         self.assertEquals(
1593             None,
1594             a[0].prefix)
1595
1596     def test_getparent(self):
1597         Element = self.etree.Element
1598         SubElement = self.etree.SubElement
1599
1600         a = Element('a')
1601         b = SubElement(a, 'b')
1602         c = SubElement(a, 'c')
1603         d = SubElement(b, 'd')
1604         self.assertEquals(
1605             None,
1606             a.getparent())
1607         self.assertEquals(
1608             a,
1609             b.getparent())
1610         self.assertEquals(
1611             b.getparent(),
1612             c.getparent())
1613         self.assertEquals(
1614             b,
1615             d.getparent())
1616
1617     def test_iterchildren(self):
1618         XML = self.etree.XML
1619         
1620         root = XML(_bytes('<doc><one/><two>Two</two>Hm<three/></doc>'))
1621         result = []
1622         for el in root.iterchildren():
1623             result.append(el.tag)
1624         self.assertEquals(['one', 'two', 'three'], result)
1625
1626     def test_iterchildren_reversed(self):
1627         XML = self.etree.XML
1628         
1629         root = XML(_bytes('<doc><one/><two>Two</two>Hm<three/></doc>'))
1630         result = []
1631         for el in root.iterchildren(reversed=True):
1632             result.append(el.tag)
1633         self.assertEquals(['three', 'two', 'one'], result)
1634
1635     def test_iterchildren_tag(self):
1636         XML = self.etree.XML
1637         
1638         root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two></doc>'))
1639         result = []
1640         for el in root.iterchildren(tag='two'):
1641             result.append(el.text)
1642         self.assertEquals(['Two', 'Bla'], result)
1643
1644     def test_iterchildren_tag_reversed(self):
1645         XML = self.etree.XML
1646         
1647         root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two></doc>'))
1648         result = []
1649         for el in root.iterchildren(reversed=True, tag='two'):
1650             result.append(el.text)
1651         self.assertEquals(['Bla', 'Two'], result)
1652
1653     def test_iterancestors(self):
1654         Element    = self.etree.Element
1655         SubElement = self.etree.SubElement
1656
1657         a = Element('a')
1658         b = SubElement(a, 'b')
1659         c = SubElement(a, 'c')
1660         d = SubElement(b, 'd')
1661         self.assertEquals(
1662             [],
1663             list(a.iterancestors()))
1664         self.assertEquals(
1665             [a],
1666             list(b.iterancestors()))
1667         self.assertEquals(
1668             [a],
1669             list(c.iterancestors()))
1670         self.assertEquals(
1671             [b, a],
1672             list(d.iterancestors()))
1673
1674     def test_iterancestors_tag(self):
1675         Element    = self.etree.Element
1676         SubElement = self.etree.SubElement
1677
1678         a = Element('a')
1679         b = SubElement(a, 'b')
1680         c = SubElement(a, 'c')
1681         d = SubElement(b, 'd')
1682         self.assertEquals(
1683             [a],
1684             list(d.iterancestors(tag='a')))
1685
1686     def test_iterdescendants(self):
1687         Element = self.etree.Element
1688         SubElement = self.etree.SubElement
1689
1690         a = Element('a')
1691         b = SubElement(a, 'b')
1692         c = SubElement(a, 'c')
1693         d = SubElement(b, 'd')
1694         e = SubElement(c, 'e')
1695
1696         self.assertEquals(
1697             [b, d, c, e],
1698             list(a.iterdescendants()))
1699         self.assertEquals(
1700             [],
1701             list(d.iterdescendants()))
1702
1703     def test_iterdescendants_tag(self):
1704         Element = self.etree.Element
1705         SubElement = self.etree.SubElement
1706
1707         a = Element('a')
1708         b = SubElement(a, 'b')
1709         c = SubElement(a, 'c')
1710         d = SubElement(b, 'd')
1711         e = SubElement(c, 'e')
1712
1713         self.assertEquals(
1714             [],
1715             list(a.iterdescendants('a')))
1716         a2 = SubElement(e, 'a')
1717         self.assertEquals(
1718             [a2],
1719             list(a.iterdescendants('a')))
1720         self.assertEquals(
1721             [a2],
1722             list(c.iterdescendants('a')))
1723
1724     def test_getroottree(self):
1725         Element = self.etree.Element
1726         SubElement = self.etree.SubElement
1727
1728         a = Element('a')
1729         b = SubElement(a, 'b')
1730         c = SubElement(a, 'c')
1731         d = SubElement(b, 'd')
1732         self.assertEquals(
1733             a,
1734             a.getroottree().getroot())
1735         self.assertEquals(
1736             a,
1737             b.getroottree().getroot())
1738         self.assertEquals(
1739             a,
1740             d.getroottree().getroot())
1741
1742     def test_getnext(self):
1743         Element    = self.etree.Element
1744         SubElement = self.etree.SubElement
1745
1746         a = Element('a')
1747         b = SubElement(a, 'b')
1748         c = SubElement(a, 'c')
1749         self.assertEquals(
1750             None,
1751             a.getnext())
1752         self.assertEquals(
1753             c,
1754             b.getnext())
1755         self.assertEquals(
1756             None,
1757             c.getnext())
1758
1759     def test_getprevious(self):
1760         Element    = self.etree.Element
1761         SubElement = self.etree.SubElement
1762
1763         a = Element('a')
1764         b = SubElement(a, 'b')
1765         c = SubElement(a, 'c')
1766         d = SubElement(b, 'd')
1767         self.assertEquals(
1768             None,
1769             a.getprevious())
1770         self.assertEquals(
1771             b,
1772             c.getprevious())
1773         self.assertEquals(
1774             None,
1775             b.getprevious())
1776
1777     def test_itersiblings(self):
1778         Element    = self.etree.Element
1779         SubElement = self.etree.SubElement
1780
1781         a = Element('a')
1782         b = SubElement(a, 'b')
1783         c = SubElement(a, 'c')
1784         d = SubElement(b, 'd')
1785         self.assertEquals(
1786             [],
1787             list(a.itersiblings()))
1788         self.assertEquals(
1789             [c],
1790             list(b.itersiblings()))
1791         self.assertEquals(
1792             [],
1793             list(c.itersiblings()))
1794         self.assertEquals(
1795             [b],
1796             list(c.itersiblings(preceding=True)))
1797         self.assertEquals(
1798             [],
1799             list(b.itersiblings(preceding=True)))
1800
1801     def test_itersiblings_tag(self):
1802         Element    = self.etree.Element
1803         SubElement = self.etree.SubElement
1804
1805         a = Element('a')
1806         b = SubElement(a, 'b')
1807         c = SubElement(a, 'c')
1808         d = SubElement(b, 'd')
1809         self.assertEquals(
1810             [],
1811             list(a.itersiblings(tag='XXX')))
1812         self.assertEquals(
1813             [c],
1814             list(b.itersiblings(tag='c')))
1815         self.assertEquals(
1816             [b],
1817             list(c.itersiblings(preceding=True, tag='b')))
1818         self.assertEquals(
1819             [],
1820             list(c.itersiblings(preceding=True, tag='c')))
1821
1822     def test_parseid(self):
1823         parseid = self.etree.parseid
1824         XML     = self.etree.XML
1825         xml_text = _bytes('''
1826         <!DOCTYPE document [
1827         <!ELEMENT document (h1,p)*>
1828         <!ELEMENT h1 (#PCDATA)>
1829         <!ATTLIST h1 myid ID #REQUIRED>
1830         <!ELEMENT p  (#PCDATA)>
1831         <!ATTLIST p  someid ID #REQUIRED>
1832         ]>
1833         <document>
1834           <h1 myid="chapter1">...</h1>
1835           <p id="note1" class="note">...</p>
1836           <p>Regular paragraph.</p>
1837           <p xml:id="xmlid">XML:ID paragraph.</p>
1838           <p someid="warn1" class="warning">...</p>
1839         </document>
1840         ''')
1841
1842         tree, dic = parseid(BytesIO(xml_text))
1843         root = tree.getroot()
1844         root2 = XML(xml_text)
1845         self.assertEquals(self._writeElement(root),
1846                           self._writeElement(root2))
1847         expected = {
1848             "chapter1" : root[0],
1849             "xmlid"    : root[3],
1850             "warn1"    : root[4]
1851             }
1852         self.assert_("chapter1" in dic)
1853         self.assert_("warn1" in dic)
1854         self.assert_("xmlid" in dic)
1855         self._checkIDDict(dic, expected)
1856
1857     def test_XMLDTDID(self):
1858         XMLDTDID = self.etree.XMLDTDID
1859         XML      = self.etree.XML
1860         xml_text = _bytes('''
1861         <!DOCTYPE document [
1862         <!ELEMENT document (h1,p)*>
1863         <!ELEMENT h1 (#PCDATA)>
1864         <!ATTLIST h1 myid ID #REQUIRED>
1865         <!ELEMENT p  (#PCDATA)>
1866         <!ATTLIST p  someid ID #REQUIRED>
1867         ]>
1868         <document>
1869           <h1 myid="chapter1">...</h1>
1870           <p id="note1" class="note">...</p>
1871           <p>Regular paragraph.</p>
1872           <p xml:id="xmlid">XML:ID paragraph.</p>
1873           <p someid="warn1" class="warning">...</p>
1874         </document>
1875         ''')
1876
1877         root, dic = XMLDTDID(xml_text)
1878         root2 = XML(xml_text)
1879         self.assertEquals(self._writeElement(root),
1880                           self._writeElement(root2))
1881         expected = {
1882             "chapter1" : root[0],
1883             "xmlid"    : root[3],
1884             "warn1"    : root[4]
1885             }
1886         self.assert_("chapter1" in dic)
1887         self.assert_("warn1" in dic)
1888         self.assert_("xmlid" in dic)
1889         self._checkIDDict(dic, expected)
1890
1891     def test_XMLDTDID_empty(self):
1892         XMLDTDID = self.etree.XMLDTDID
1893         XML      = self.etree.XML
1894         xml_text = _bytes('''
1895         <document>
1896           <h1 myid="chapter1">...</h1>
1897           <p id="note1" class="note">...</p>
1898           <p>Regular paragraph.</p>
1899           <p someid="warn1" class="warning">...</p>
1900         </document>
1901         ''')
1902
1903         root, dic = XMLDTDID(xml_text)
1904         root2 = XML(xml_text)
1905         self.assertEquals(self._writeElement(root),
1906                           self._writeElement(root2))
1907         expected = {}
1908         self._checkIDDict(dic, expected)
1909
1910     def _checkIDDict(self, dic, expected):
1911         self.assertEquals(len(dic),
1912                           len(expected))
1913         self.assertEquals(sorted(dic.items()),
1914                           sorted(expected.items()))
1915         if sys.version_info < (3,):
1916             self.assertEquals(sorted(dic.iteritems()),
1917                               sorted(expected.iteritems()))
1918         self.assertEquals(sorted(dic.keys()),
1919                           sorted(expected.keys()))
1920         if sys.version_info < (3,):
1921             self.assertEquals(sorted(dic.iterkeys()),
1922                               sorted(expected.iterkeys()))
1923         if sys.version_info < (3,):
1924             self.assertEquals(sorted(dic.values()),
1925                               sorted(expected.values()))
1926             self.assertEquals(sorted(dic.itervalues()),
1927                               sorted(expected.itervalues()))
1928
1929     def test_namespaces(self):
1930         etree = self.etree
1931
1932         r = {'foo': 'http://ns.infrae.com/foo'}
1933         e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
1934         self.assertEquals(
1935             'foo',
1936             e.prefix)
1937         self.assertEquals(
1938             _bytes('<foo:bar xmlns:foo="http://ns.infrae.com/foo"></foo:bar>'),
1939             self._writeElement(e))
1940         
1941     def test_namespaces_default(self):
1942         etree = self.etree
1943
1944         r = {None: 'http://ns.infrae.com/foo'}
1945         e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
1946         self.assertEquals(
1947             None,
1948             e.prefix)
1949         self.assertEquals(
1950             '{http://ns.infrae.com/foo}bar',
1951             e.tag)
1952         self.assertEquals(
1953             _bytes('<bar xmlns="http://ns.infrae.com/foo"></bar>'),
1954             self._writeElement(e))
1955
1956     def test_namespaces_default_and_attr(self):
1957         etree = self.etree
1958
1959         r = {None: 'http://ns.infrae.com/foo',
1960              'hoi': 'http://ns.infrae.com/hoi'}
1961         e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
1962         e.set('{http://ns.infrae.com/hoi}test', 'value')
1963         self.assertEquals(
1964             _bytes('<bar xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi" hoi:test="value"></bar>'),
1965             self._writeElement(e))
1966
1967     def test_namespaces_elementtree(self):
1968         etree = self.etree
1969         r = {None: 'http://ns.infrae.com/foo',
1970              'hoi': 'http://ns.infrae.com/hoi'} 
1971         e = etree.Element('{http://ns.infrae.com/foo}z', nsmap=r)
1972         tree = etree.ElementTree(element=e)
1973         etree.SubElement(e, '{http://ns.infrae.com/hoi}x')
1974         self.assertEquals(
1975             _bytes('<z xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi"><hoi:x></hoi:x></z>'),
1976             self._writeElement(e))
1977
1978     def test_namespaces_default_copy_element(self):
1979         etree = self.etree
1980
1981         r = {None: 'http://ns.infrae.com/foo'}
1982         e1 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
1983         e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
1984
1985         e1.append(e2)
1986
1987         self.assertEquals(
1988             None,
1989             e1.prefix)
1990         self.assertEquals(
1991             None,
1992             e1[0].prefix)
1993         self.assertEquals(
1994             '{http://ns.infrae.com/foo}bar',
1995             e1.tag)
1996         self.assertEquals(
1997             '{http://ns.infrae.com/foo}bar',
1998             e1[0].tag)
1999
2000     def test_namespaces_copy_element(self):
2001         etree = self.etree
2002
2003         r = {None: 'http://ns.infrae.com/BAR'}
2004         e1 = etree.Element('{http://ns.infrae.com/BAR}bar', nsmap=r)
2005         e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2006
2007         e1.append(e2)
2008
2009         self.assertEquals(
2010             None,
2011             e1.prefix)
2012         self.assertNotEquals(
2013             None,
2014             e2.prefix)
2015         self.assertEquals(
2016             '{http://ns.infrae.com/BAR}bar',
2017             e1.tag)
2018         self.assertEquals(
2019             '{http://ns.infrae.com/foo}bar',
2020             e2.tag)
2021
2022     def test_namespaces_reuse_after_move(self):
2023         ns_href = "http://a.b.c"
2024         one = self.etree.fromstring(
2025             _bytes('<foo><bar xmlns:ns="%s"><ns:baz/></bar></foo>' % ns_href))
2026         baz = one[0][0]
2027
2028         two = self.etree.fromstring(
2029             _bytes('<root xmlns:ns="%s"/>' % ns_href))
2030         two.append(baz)
2031         del one # make sure the source document is deallocated
2032
2033         self.assertEquals('{%s}baz' % ns_href, baz.tag)
2034         self.assertEquals(
2035             _bytes('<root xmlns:ns="%s"><ns:baz/></root>' % ns_href),
2036             self.etree.tostring(two))
2037
2038     def test_namespace_cleanup(self):
2039         xml = _bytes('<foo xmlns="F" xmlns:x="x"><bar xmlns:ns="NS" xmlns:b="b" xmlns="B"><ns:baz/></bar></foo>')
2040         root = self.etree.fromstring(xml)
2041         self.assertEquals(xml,
2042                           self.etree.tostring(root))
2043         self.etree.cleanup_namespaces(root)
2044         self.assertEquals(
2045             _bytes('<foo xmlns="F"><bar xmlns:ns="NS" xmlns="B"><ns:baz/></bar></foo>'),
2046             self.etree.tostring(root))
2047
2048     def test_element_nsmap(self):
2049         etree = self.etree
2050
2051         r = {None: 'http://ns.infrae.com/foo',
2052              'hoi': 'http://ns.infrae.com/hoi'}
2053         e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2054         self.assertEquals(
2055             r,
2056             e.nsmap)
2057
2058     def test_subelement_nsmap(self):
2059         etree = self.etree
2060
2061         re = {None: 'http://ns.infrae.com/foo',
2062              'hoi': 'http://ns.infrae.com/hoi'}
2063         e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=re)
2064
2065         rs = {None: 'http://ns.infrae.com/honk',
2066              'top': 'http://ns.infrae.com/top'}
2067         s = etree.SubElement(e, '{http://ns.infrae.com/honk}bar', nsmap=rs)
2068
2069         r = re.copy()
2070         r.update(rs)
2071         self.assertEquals(re, e.nsmap)
2072         self.assertEquals(r,  s.nsmap)
2073
2074     def test_html_prefix_nsmap(self):
2075         etree = self.etree
2076         el = etree.HTML('<hha:page-description>aa</hha:page-description>').find('.//page-description')
2077         self.assertEquals({'hha': None}, el.nsmap)
2078
2079     def test_getiterator_filter_namespace(self):
2080         Element = self.etree.Element
2081         SubElement = self.etree.SubElement
2082
2083         a = Element('{a}a')
2084         b = SubElement(a, '{a}b')
2085         c = SubElement(a, '{a}c')
2086         d = SubElement(b, '{b}d')
2087         e = SubElement(c, '{a}e')
2088         f = SubElement(c, '{b}f')
2089
2090         self.assertEquals(
2091             [a],
2092             list(a.getiterator('{a}a')))
2093         self.assertEquals(
2094             [],
2095             list(a.getiterator('{b}a')))
2096         self.assertEquals(
2097             [],
2098             list(a.getiterator('a')))
2099         self.assertEquals(
2100             [f],
2101             list(c.getiterator('{b}*')))
2102         self.assertEquals(
2103             [d, f],
2104             list(a.getiterator('{b}*')))
2105
2106     def test_getiterator_filter_entities(self):
2107         Element = self.etree.Element
2108         Entity = self.etree.Entity
2109         SubElement = self.etree.SubElement
2110
2111         a = Element('a')
2112         b = SubElement(a, 'b')
2113         entity_b = Entity("TEST-b")
2114         b.append(entity_b)
2115
2116         self.assertEquals(
2117             [entity_b],
2118             list(a.getiterator(Entity)))
2119
2120         entity_a = Entity("TEST-a")
2121         a.append(entity_a)
2122
2123         self.assertEquals(
2124             [entity_b, entity_a],
2125             list(a.getiterator(Entity)))
2126
2127         self.assertEquals(
2128             [entity_b],
2129             list(b.getiterator(Entity)))
2130
2131     def test_getiterator_filter_element(self):
2132         Element = self.etree.Element
2133         Comment = self.etree.Comment
2134         PI = self.etree.PI
2135         SubElement = self.etree.SubElement
2136
2137         a = Element('a')
2138         b = SubElement(a, 'b')
2139         a.append(Comment("test"))
2140         a.append(PI("pi", "content"))
2141         c = SubElement(a, 'c')
2142
2143         self.assertEquals(
2144             [a, b, c],
2145             list(a.getiterator(Element)))
2146
2147     def test_getiterator_filter_all_comment_pi(self):
2148         # ElementTree iterates over everything here
2149         Element = self.etree.Element
2150         Comment = self.etree.Comment
2151         PI = self.etree.PI
2152         SubElement = self.etree.SubElement
2153
2154         a = Element('a')
2155         b = SubElement(a, 'b')
2156         a.append(Comment("test"))
2157         a.append(PI("pi", "content"))
2158         c = SubElement(a, 'c')
2159
2160         self.assertEquals(
2161             [a, b, c],
2162             list(a.getiterator('*')))
2163
2164     def test_elementtree_find_qname(self):
2165         XML = self.etree.XML
2166         ElementTree = self.etree.ElementTree
2167         QName = self.etree.QName
2168         tree = ElementTree(XML(_bytes('<a><b><c/></b><b/><c><b/></c></a>')))
2169         self.assertEquals(tree.find(QName("c")), tree.getroot()[2])
2170
2171     def test_elementtree_findall_qname(self):
2172         XML = self.etree.XML
2173         ElementTree = self.etree.ElementTree
2174         QName = self.etree.QName
2175         tree = ElementTree(XML(_bytes('<a><b><c/></b><b/><c><b/></c></a>')))
2176         self.assertEquals(len(list(tree.findall(QName("c")))), 1)
2177
2178     def test_elementtree_findall_ns_qname(self):
2179         XML = self.etree.XML
2180         ElementTree = self.etree.ElementTree
2181         QName = self.etree.QName
2182         tree = ElementTree(XML(
2183                 _bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><b/></a>')))
2184         self.assertEquals(len(list(tree.findall(QName("b")))), 2)
2185         self.assertEquals(len(list(tree.findall(QName("X", "b")))), 1)
2186
2187     def test_findall_ns(self):
2188         XML = self.etree.XML
2189         root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><b/></a>'))
2190         self.assertEquals(len(root.findall(".//{X}b")), 2)
2191         self.assertEquals(len(root.findall(".//{X}*")), 2)
2192         self.assertEquals(len(root.findall(".//b")), 3)
2193
2194     def test_index(self):
2195         etree = self.etree
2196         e = etree.Element('foo')
2197         for i in range(10):
2198             etree.SubElement(e, 'a%s' % i)
2199         for i in range(10):
2200             self.assertEquals(
2201                 i,
2202                 e.index(e[i]))
2203         self.assertEquals(
2204             3, e.index(e[3], 3))
2205         self.assertRaises(
2206             ValueError, e.index, e[3], 4)
2207         self.assertRaises(
2208             ValueError, e.index, e[3], 0, 2)
2209         self.assertRaises(
2210             ValueError, e.index, e[8], 0, -3)
2211         self.assertRaises(
2212             ValueError, e.index, e[8], -5, -3)
2213         self.assertEquals(
2214             8, e.index(e[8], 0, -1))
2215         self.assertEquals(
2216             8, e.index(e[8], -12, -1))
2217         self.assertEquals(
2218             0, e.index(e[0], -12, -1))
2219
2220     def test_replace(self):
2221         etree = self.etree
2222         e = etree.Element('foo')
2223         for i in range(10):
2224             el = etree.SubElement(e, 'a%s' % i)
2225             el.text = "text%d" % i
2226             el.tail = "tail%d" % i
2227
2228         child0 = e[0]
2229         child1 = e[1]
2230         child2 = e[2]
2231
2232         e.replace(e[0], e[1])
2233         self.assertEquals(
2234             9, len(e))
2235         self.assertEquals(
2236             child1, e[0])
2237         self.assertEquals(
2238             child1.text, "text1")
2239         self.assertEquals(
2240             child1.tail, "tail1")
2241         self.assertEquals(
2242             child0.tail, "tail0")
2243         self.assertEquals(
2244             child2, e[1])
2245
2246         e.replace(e[-1], e[0])
2247         self.assertEquals(
2248             child1, e[-1])
2249         self.assertEquals(
2250             child1.text, "text1")
2251         self.assertEquals(
2252             child1.tail, "tail1")
2253         self.assertEquals(
2254             child2, e[0])
2255
2256     def test_replace_new(self):
2257         etree = self.etree
2258         e = etree.Element('foo')
2259         for i in range(10):
2260             etree.SubElement(e, 'a%s' % i)
2261
2262         new_element = etree.Element("test")
2263         new_element.text = "TESTTEXT"
2264         new_element.tail = "TESTTAIL"
2265         child1 = e[1]
2266         e.replace(e[0], new_element)
2267         self.assertEquals(
2268             new_element, e[0])
2269         self.assertEquals(
2270             "TESTTEXT",
2271             e[0].text)
2272         self.assertEquals(
2273             "TESTTAIL",
2274             e[0].tail)
2275         self.assertEquals(
2276             child1, e[1])
2277
2278     def test_setslice_all_empty_reversed(self):
2279         Element = self.etree.Element
2280         SubElement = self.etree.SubElement
2281
2282         a = Element('a')
2283
2284         e = Element('e')
2285         f = Element('f')
2286         g = Element('g')
2287
2288         s = [e, f, g]
2289         a[::-1] = s
2290         self.assertEquals(
2291             [g, f, e],
2292             list(a))
2293
2294     def test_setslice_step(self):
2295         Element = self.etree.Element
2296         SubElement = self.etree.SubElement
2297
2298         a = Element('a')
2299         b = SubElement(a, 'b')
2300         c = SubElement(a, 'c')
2301         d = SubElement(a, 'd')
2302         e = SubElement(a, 'e')
2303
2304         x = Element('x')
2305         y = Element('y')
2306
2307         a[1::2] = [x, y]
2308         self.assertEquals(
2309             [b, x, d, y],
2310             list(a))
2311
2312     def test_setslice_step_negative(self):
2313         Element = self.etree.Element
2314         SubElement = self.etree.SubElement
2315
2316         a = Element('a')
2317         b = SubElement(a, 'b')
2318         c = SubElement(a, 'c')
2319         d = SubElement(a, 'd')
2320         e = SubElement(a, 'e')
2321
2322         x = Element('x')
2323         y = Element('y')
2324
2325         a[1::-1] = [x, y]
2326         self.assertEquals(
2327             [y, x, d, e],
2328             list(a))
2329
2330     def test_setslice_step_negative2(self):
2331         Element = self.etree.Element
2332         SubElement = self.etree.SubElement
2333
2334         a = Element('a')
2335         b = SubElement(a, 'b')
2336         c = SubElement(a, 'c')
2337         d = SubElement(a, 'd')
2338         e = SubElement(a, 'e')
2339
2340         x = Element('x')
2341         y = Element('y')
2342
2343         a[::-2] = [x, y]
2344         self.assertEquals(
2345             [b, y, d, x],
2346             list(a))
2347
2348     def test_setslice_step_overrun(self):
2349         Element = self.etree.Element
2350         SubElement = self.etree.SubElement
2351         try:
2352             slice
2353         except NameError:
2354             print("slice() not found")
2355             return
2356
2357         a = Element('a')
2358         b = SubElement(a, 'b')
2359         c = SubElement(a, 'c')
2360         d = SubElement(a, 'd')
2361         e = SubElement(a, 'e')
2362
2363         x = Element('x')
2364         y = Element('y')
2365         z = Element('z')
2366
2367         self.assertRaises(
2368             ValueError,
2369             operator.setitem, a, slice(1,None,2), [x, y, z])
2370
2371         self.assertEquals(
2372             [b, c, d, e],
2373             list(a))
2374
2375     def test_sourceline_XML(self):
2376         XML = self.etree.XML
2377         root = XML(_bytes('''<?xml version="1.0"?>
2378         <root><test>
2379
2380         <bla/></test>
2381         </root>
2382         '''))
2383
2384         self.assertEquals(
2385             [2, 2, 4],
2386             [ el.sourceline for el in root.getiterator() ])
2387
2388     def test_sourceline_parse(self):
2389         parse = self.etree.parse
2390         tree = parse(fileInTestDir('include/test_xinclude.xml'))
2391
2392         self.assertEquals(
2393             [1, 2, 3],
2394             [ el.sourceline for el in tree.getiterator() ])
2395
2396     def test_sourceline_iterparse_end(self):
2397         iterparse = self.etree.iterparse
2398         lines = [ el.sourceline for (event, el) in 
2399                   iterparse(fileInTestDir('include/test_xinclude.xml')) ]
2400
2401         self.assertEquals(
2402             [2, 3, 1],
2403             lines)
2404
2405     def test_sourceline_iterparse_start(self):
2406         iterparse = self.etree.iterparse
2407         lines = [ el.sourceline for (event, el) in 
2408                   iterparse(fileInTestDir('include/test_xinclude.xml'),
2409                             events=("start",)) ]
2410
2411         self.assertEquals(
2412             [1, 2, 3],
2413             lines)
2414
2415     def test_sourceline_element(self):
2416         Element = self.etree.Element
2417         SubElement = self.etree.SubElement
2418         el = Element("test")
2419         self.assertEquals(None, el.sourceline)
2420
2421         child = SubElement(el, "test")
2422         self.assertEquals(None, el.sourceline)
2423         self.assertEquals(None, child.sourceline)
2424
2425     def test_XML_base_url_docinfo(self):
2426         etree = self.etree
2427         root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
2428         docinfo = root.getroottree().docinfo
2429         self.assertEquals(docinfo.URL, "http://no/such/url")
2430
2431     def test_XML_set_base_url_docinfo(self):
2432         etree = self.etree
2433         root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
2434         docinfo = root.getroottree().docinfo
2435         self.assertEquals(docinfo.URL, "http://no/such/url")
2436         docinfo.URL = "https://secret/url"
2437         self.assertEquals(docinfo.URL, "https://secret/url")
2438
2439     def test_parse_stringio_base_url(self):
2440         etree = self.etree
2441         tree = etree.parse(BytesIO("<root/>"), base_url="http://no/such/url")
2442         docinfo = tree.docinfo
2443         self.assertEquals(docinfo.URL, "http://no/such/url")
2444
2445     def test_parse_base_url_docinfo(self):
2446         etree = self.etree
2447         tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
2448                            base_url="http://no/such/url")
2449         docinfo = tree.docinfo
2450         self.assertEquals(docinfo.URL, "http://no/such/url")
2451
2452     def test_HTML_base_url_docinfo(self):
2453         etree = self.etree
2454         root = etree.HTML(_bytes("<html/>"), base_url="http://no/such/url")
2455         docinfo = root.getroottree().docinfo
2456         self.assertEquals(docinfo.URL, "http://no/such/url")
2457
2458     def test_docinfo_public(self):
2459         etree = self.etree
2460         xml_header = '<?xml version="1.0" encoding="ascii"?>'
2461         pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
2462         sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
2463         doctype_string = '<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id)
2464
2465         xml = _bytes(xml_header + doctype_string + '<html><body></body></html>')
2466
2467         tree = etree.parse(BytesIO(xml))
2468         docinfo = tree.docinfo
2469         self.assertEquals(docinfo.encoding,    "ascii")
2470         self.assertEquals(docinfo.xml_version, "1.0")
2471         self.assertEquals(docinfo.public_id,   pub_id)
2472         self.assertEquals(docinfo.system_url,  sys_id)
2473         self.assertEquals(docinfo.root_name,   'html')
2474         self.assertEquals(docinfo.doctype, doctype_string)
2475
2476     def test_docinfo_system(self):
2477         etree = self.etree
2478         xml_header = '<?xml version="1.0" encoding="UTF-8"?>'
2479         sys_id = "some.dtd"
2480         doctype_string = '<!DOCTYPE html SYSTEM "%s">' % sys_id
2481         xml = _bytes(xml_header + doctype_string + '<html><body></body></html>')
2482
2483         tree = etree.parse(BytesIO(xml))
2484         docinfo = tree.docinfo
2485         self.assertEquals(docinfo.encoding,    "UTF-8")
2486         self.assertEquals(docinfo.xml_version, "1.0")
2487         self.assertEquals(docinfo.public_id,   None)
2488         self.assertEquals(docinfo.system_url,  sys_id)
2489         self.assertEquals(docinfo.root_name,   'html')
2490         self.assertEquals(docinfo.doctype, doctype_string)
2491
2492     def test_docinfo_empty(self):
2493         etree = self.etree
2494         xml = _bytes('<html><body></body></html>')
2495         tree = etree.parse(BytesIO(xml))
2496         docinfo = tree.docinfo
2497         self.assertEquals(docinfo.encoding,    "UTF-8")
2498         self.assertEquals(docinfo.xml_version, "1.0")
2499         self.assertEquals(docinfo.public_id,   None)
2500         self.assertEquals(docinfo.system_url,  None)
2501         self.assertEquals(docinfo.root_name,   'html')
2502         self.assertEquals(docinfo.doctype, '')
2503
2504     def test_docinfo_name_only(self):
2505         etree = self.etree
2506         xml = _bytes('<!DOCTYPE root><root></root>')
2507         tree = etree.parse(BytesIO(xml))
2508         docinfo = tree.docinfo
2509         self.assertEquals(docinfo.encoding,    "UTF-8")
2510         self.assertEquals(docinfo.xml_version, "1.0")
2511         self.assertEquals(docinfo.public_id,   None)
2512         self.assertEquals(docinfo.system_url,  None)
2513         self.assertEquals(docinfo.root_name,   'root')
2514         self.assertEquals(docinfo.doctype, '<!DOCTYPE root>')
2515
2516     def test_doctype_name_only_roundtrip(self):
2517         etree = self.etree
2518         xml = _bytes('<!DOCTYPE root>\n<root/>')
2519         tree = etree.parse(BytesIO(xml))
2520         self.assertEquals(xml, etree.tostring(tree))
2521
2522     def test_doctype_output_override(self):
2523         etree = self.etree
2524         pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
2525         sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
2526         doctype_string = _bytes('<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id))
2527
2528         xml = _bytes('<!DOCTYPE root>\n<root/>')
2529         tree = etree.parse(BytesIO(xml))
2530         self.assertEquals(xml.replace(_bytes('<!DOCTYPE root>'), doctype_string),
2531                           etree.tostring(tree, doctype=doctype_string))
2532
2533     def test_xml_base(self):
2534         etree = self.etree
2535         root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
2536         self.assertEquals(root.base, "http://no/such/url")
2537         self.assertEquals(
2538             root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
2539         root.base = "https://secret/url"
2540         self.assertEquals(root.base, "https://secret/url")
2541         self.assertEquals(
2542             root.get('{http://www.w3.org/XML/1998/namespace}base'),
2543             "https://secret/url")
2544
2545     def test_xml_base_attribute(self):
2546         etree = self.etree
2547         root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
2548         self.assertEquals(root.base, "http://no/such/url")
2549         self.assertEquals(
2550             root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
2551         root.set('{http://www.w3.org/XML/1998/namespace}base',
2552                  "https://secret/url")
2553         self.assertEquals(root.base, "https://secret/url")
2554         self.assertEquals(
2555             root.get('{http://www.w3.org/XML/1998/namespace}base'),
2556             "https://secret/url")
2557
2558     def test_html_base(self):
2559         etree = self.etree
2560         root = etree.HTML(_bytes("<html><body></body></html>"),
2561                           base_url="http://no/such/url")
2562         self.assertEquals(root.base, "http://no/such/url")
2563
2564     def test_html_base_tag(self):
2565         etree = self.etree
2566         root = etree.HTML(_bytes('<html><head><base href="http://no/such/url"></head></html>'))
2567         self.assertEquals(root.base, "http://no/such/url")
2568
2569     def test_parse_fileobject_unicode(self):
2570         # parse from a file object that returns unicode strings
2571         f = LargeFileLikeUnicode()
2572         tree = self.etree.parse(f)
2573         root = tree.getroot()
2574         self.assert_(root.tag.endswith('root'))
2575
2576     def test_dtd_io(self):
2577         # check that DTDs that go in also go back out
2578         xml = _bytes('''\
2579         <!DOCTYPE test SYSTEM "test.dtd" [
2580           <!ENTITY entity "tasty">
2581           <!ELEMENT test (a)>
2582           <!ELEMENT a (#PCDATA)>
2583         ]>
2584         <test><a>test-test</a></test>\
2585         ''')
2586         tree = self.etree.parse(BytesIO(xml))
2587         self.assertEqual(self.etree.tostring(tree).replace(_bytes(" "), _bytes("")),
2588                          xml.replace(_bytes(" "), _bytes("")))
2589
2590     def test_byte_zero(self):
2591         Element = self.etree.Element
2592
2593         a = Element('a')
2594         self.assertRaises(ValueError, setattr, a, "text", 'ha\0ho')
2595         self.assertRaises(ValueError, setattr, a, "tail", 'ha\0ho')
2596
2597         self.assertRaises(ValueError, Element, 'ha\0ho')
2598
2599     def test_unicode_byte_zero(self):
2600         Element = self.etree.Element
2601
2602         a = Element('a')
2603         self.assertRaises(ValueError, setattr, a, "text",
2604                           _str('ha\0ho'))
2605         self.assertRaises(ValueError, setattr, a, "tail",
2606                           _str('ha\0ho'))
2607
2608         self.assertRaises(ValueError, Element,
2609                           _str('ha\0ho'))
2610
2611     def test_byte_invalid(self):
2612         Element = self.etree.Element
2613
2614         a = Element('a')
2615         self.assertRaises(ValueError, setattr, a, "text", 'ha\x07ho')
2616         self.assertRaises(ValueError, setattr, a, "text", 'ha\x02ho')
2617
2618         self.assertRaises(ValueError, setattr, a, "tail", 'ha\x07ho')
2619         self.assertRaises(ValueError, setattr, a, "tail", 'ha\x02ho')
2620
2621         self.assertRaises(ValueError, Element, 'ha\x07ho')
2622         self.assertRaises(ValueError, Element, 'ha\x02ho')
2623
2624     def test_unicode_byte_invalid(self):
2625         Element = self.etree.Element
2626
2627         a = Element('a')
2628         self.assertRaises(ValueError, setattr, a, "text",
2629                           _str('ha\x07ho'))
2630         self.assertRaises(ValueError, setattr, a, "text",
2631                           _str('ha\x02ho'))
2632
2633         self.assertRaises(ValueError, setattr, a, "tail",
2634                           _str('ha\x07ho'))
2635         self.assertRaises(ValueError, setattr, a, "tail",
2636                           _str('ha\x02ho'))
2637
2638         self.assertRaises(ValueError, Element,
2639                           _str('ha\x07ho'))
2640         self.assertRaises(ValueError, Element,
2641                           _str('ha\x02ho'))
2642
2643     def test_unicode_byte_invalid_sequence(self):
2644         Element = self.etree.Element
2645
2646         a = Element('a')
2647         self.assertRaises(ValueError, setattr, a, "text",
2648                           _str('ha\u1234\x07ho'))
2649         self.assertRaises(ValueError, setattr, a, "text",
2650                           _str('ha\u1234\x02ho'))
2651
2652         self.assertRaises(ValueError, setattr, a, "tail",
2653                           _str('ha\u1234\x07ho'))
2654         self.assertRaises(ValueError, setattr, a, "tail",
2655                           _str('ha\u1234\x02ho'))
2656
2657         self.assertRaises(ValueError, Element,
2658                           _str('ha\u1234\x07ho'))
2659         self.assertRaises(ValueError, Element,
2660                           _str('ha\u1234\x02ho'))
2661
2662     def test_encoding_tostring_utf16(self):
2663         # ElementTree fails to serialize this
2664         tostring = self.etree.tostring
2665         Element = self.etree.Element
2666         SubElement = self.etree.SubElement
2667         
2668         a = Element('a')
2669         b = SubElement(a, 'b')
2670         c = SubElement(a, 'c')
2671
2672         result = tostring(a, encoding='UTF-16')
2673         self.assertEquals(_bytes('<a><b></b><c></c></a>'),
2674                           canonicalize(result))
2675
2676     def test_tostring_none(self):
2677         # ElementTree raises an AssertionError here
2678         tostring = self.etree.tostring
2679         self.assertRaises(TypeError, self.etree.tostring, None)
2680
2681     def test_tostring_pretty(self):
2682         tostring = self.etree.tostring
2683         Element = self.etree.Element
2684         SubElement = self.etree.SubElement
2685
2686         a = Element('a')
2687         b = SubElement(a, 'b')
2688         c = SubElement(a, 'c')
2689
2690         result = tostring(a)
2691         self.assertEquals(result, _bytes("<a><b/><c/></a>"))
2692
2693         result = tostring(a, pretty_print=False)
2694         self.assertEquals(result, _bytes("<a><b/><c/></a>"))
2695
2696         result = tostring(a, pretty_print=True)
2697         self.assertEquals(result, _bytes("<a>\n  <b/>\n  <c/>\n</a>\n"))
2698
2699     def test_tostring_with_tail(self):
2700         tostring = self.etree.tostring
2701         Element = self.etree.Element
2702         SubElement = self.etree.SubElement
2703
2704         a = Element('a')
2705         a.tail = "aTAIL"
2706         b = SubElement(a, 'b')
2707         b.tail = "bTAIL"
2708         c = SubElement(a, 'c')
2709
2710         result = tostring(a)
2711         self.assertEquals(result, _bytes("<a><b/>bTAIL<c/></a>aTAIL"))
2712
2713         result = tostring(a, with_tail=False)
2714         self.assertEquals(result, _bytes("<a><b/>bTAIL<c/></a>"))
2715
2716         result = tostring(a, with_tail=True)
2717         self.assertEquals(result, _bytes("<a><b/>bTAIL<c/></a>aTAIL"))
2718
2719     def test_standalone(self):
2720         tostring = self.etree.tostring
2721         XML = self.etree.XML
2722         ElementTree = self.etree.ElementTree
2723         Element = self.etree.Element
2724
2725         tree = Element("root").getroottree()
2726         self.assertEquals(None, tree.docinfo.standalone)
2727
2728         tree = XML(_bytes("<root/>")).getroottree()
2729         self.assertEquals(None, tree.docinfo.standalone)
2730
2731         tree = XML(_bytes(
2732             "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"
2733             )).getroottree()
2734         self.assertEquals(True, tree.docinfo.standalone)
2735
2736         tree = XML(_bytes(
2737             "<?xml version='1.0' encoding='ASCII' standalone='no'?>\n<root/>"
2738             )).getroottree()
2739         self.assertEquals(False, tree.docinfo.standalone)
2740
2741     def test_tostring_standalone(self):
2742         tostring = self.etree.tostring
2743         XML = self.etree.XML
2744         ElementTree = self.etree.ElementTree
2745
2746         root = XML(_bytes("<root/>"))
2747
2748         tree = ElementTree(root)
2749         self.assertEquals(None, tree.docinfo.standalone)
2750
2751         result = tostring(root, xml_declaration=True, encoding="ASCII")
2752         self.assertEquals(result, _bytes(
2753             "<?xml version='1.0' encoding='ASCII'?>\n<root/>"))
2754
2755         result = tostring(root, xml_declaration=True, encoding="ASCII",
2756                           standalone=True)
2757         self.assertEquals(result, _bytes(
2758             "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"))
2759
2760         tree = ElementTree(XML(result))
2761         self.assertEquals(True, tree.docinfo.standalone)
2762
2763         result = tostring(root, xml_declaration=True, encoding="ASCII",
2764                           standalone=False)
2765         self.assertEquals(result, _bytes(
2766             "<?xml version='1.0' encoding='ASCII' standalone='no'?>\n<root/>"))
2767
2768         tree = ElementTree(XML(result))
2769         self.assertEquals(False, tree.docinfo.standalone)
2770
2771     def test_tostring_standalone_in_out(self):
2772         tostring = self.etree.tostring
2773         XML = self.etree.XML
2774         ElementTree = self.etree.ElementTree
2775
2776         root = XML(_bytes(
2777             "<?xml version='1.0' encoding='UTF-8' standalone='yes'?>\n<root/>"))
2778
2779         tree = ElementTree(root)
2780         self.assertEquals(True, tree.docinfo.standalone)
2781
2782         result = tostring(root, xml_declaration=True, encoding="ASCII")
2783         self.assertEquals(result, _bytes(
2784             "<?xml version='1.0' encoding='ASCII'?>\n<root/>"))
2785
2786         result = tostring(root, xml_declaration=True, encoding="ASCII",
2787                           standalone=True)
2788         self.assertEquals(result, _bytes(
2789             "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"))
2790
2791     def test_tostring_method_text_encoding(self):
2792         tostring = self.etree.tostring
2793         Element = self.etree.Element
2794         SubElement = self.etree.SubElement
2795         
2796         a = Element('a')
2797         a.text = "A"
2798         a.tail = "tail"
2799         b = SubElement(a, 'b')
2800         b.text = "B"
2801         b.tail = _str("Søk pÃ¥ nettet")
2802         c = SubElement(a, 'c')
2803         c.text = "C"
2804
2805         result = tostring(a, method="text", encoding="UTF-16")
2806
2807         self.assertEquals(_str('ABSøk pÃ¥ nettetCtail').encode("UTF-16"),
2808                           result)
2809
2810     def test_tostring_method_text_unicode(self):
2811         tostring = self.etree.tostring
2812         Element = self.etree.Element
2813         SubElement = self.etree.SubElement
2814         
2815         a = Element('a')
2816         a.text = _str('Søk pÃ¥ nettetA')
2817         a.tail = "tail"
2818         b = SubElement(a, 'b')
2819         b.text = "B"
2820         b.tail = _str('Søk pÃ¥ nettetB')
2821         c = SubElement(a, 'c')
2822         c.text = "C"
2823         
2824         self.assertRaises(UnicodeEncodeError,
2825                           tostring, a, method="text")
2826         
2827         self.assertEquals(
2828             _str('Søk pÃ¥ nettetABSøk pÃ¥ nettetBCtail').encode('utf-8'),
2829             tostring(a, encoding="UTF-8", method="text"))
2830
2831     def test_tounicode(self):
2832         tounicode = self.etree.tounicode
2833         Element = self.etree.Element
2834         SubElement = self.etree.SubElement
2835         
2836         a = Element('a')
2837         b = SubElement(a, 'b')
2838         c = SubElement(a, 'c')
2839         
2840         self.assert_(isinstance(tounicode(a), _unicode))
2841         self.assertEquals(_bytes('<a><b></b><c></c></a>'),
2842                           canonicalize(tounicode(a)))
2843
2844     def test_tounicode_element(self):
2845         tounicode = self.etree.tounicode
2846         Element = self.etree.Element
2847         SubElement = self.etree.SubElement
2848         
2849         a = Element('a')
2850         b = SubElement(a, 'b')
2851         c = SubElement(a, 'c')
2852         d = SubElement(c, 'd')
2853         self.assert_(isinstance(tounicode(b), _unicode))
2854         self.assert_(isinstance(tounicode(c), _unicode))
2855         self.assertEquals(_bytes('<b></b>'),
2856                           canonicalize(tounicode(b)))
2857         self.assertEquals(_bytes('<c><d></d></c>'),
2858                           canonicalize(tounicode(c)))
2859
2860     def test_tounicode_none(self):
2861         tounicode = self.etree.tounicode
2862         self.assertRaises(TypeError, self.etree.tounicode, None)
2863
2864     def test_tounicode_element_tail(self):
2865         tounicode = self.etree.tounicode
2866         Element = self.etree.Element
2867         SubElement = self.etree.SubElement
2868         
2869         a = Element('a')
2870         b = SubElement(a, 'b')
2871         c = SubElement(a, 'c')
2872         d = SubElement(c, 'd')
2873         b.tail = 'Foo'
2874
2875         self.assert_(isinstance(tounicode(b), _unicode))
2876         self.assert_(tounicode(b) == '<b/>Foo' or
2877                      tounicode(b) == '<b />Foo')
2878
2879     def test_tounicode_pretty(self):
2880         tounicode = self.etree.tounicode
2881         Element = self.etree.Element
2882         SubElement = self.etree.SubElement
2883
2884         a = Element('a')
2885         b = SubElement(a, 'b')
2886         c = SubElement(a, 'c')
2887
2888         result = tounicode(a)
2889         self.assertEquals(result, "<a><b/><c/></a>")
2890
2891         result = tounicode(a, pretty_print=False)
2892         self.assertEquals(result, "<a><b/><c/></a>")
2893
2894         result = tounicode(a, pretty_print=True)
2895         self.assertEquals(result, "<a>\n  <b/>\n  <c/>\n</a>\n")
2896
2897     def test_tostring_unicode(self):
2898         tostring = self.etree.tostring
2899         Element = self.etree.Element
2900         SubElement = self.etree.SubElement
2901         
2902         a = Element('a')
2903         b = SubElement(a, 'b')
2904         c = SubElement(a, 'c')
2905         
2906         self.assert_(isinstance(tostring(a, encoding=_unicode), _unicode))
2907         self.assertEquals(_bytes('<a><b></b><c></c></a>'),
2908                           canonicalize(tostring(a, encoding=_unicode)))
2909
2910     def test_tostring_unicode_element(self):
2911         tostring = self.etree.tostring
2912         Element = self.etree.Element
2913         SubElement = self.etree.SubElement
2914         
2915         a = Element('a')
2916         b = SubElement(a, 'b')
2917         c = SubElement(a, 'c')
2918         d = SubElement(c, 'd')
2919         self.assert_(isinstance(tostring(b, encoding=_unicode), _unicode))
2920         self.assert_(isinstance(tostring(c, encoding=_unicode), _unicode))
2921         self.assertEquals(_bytes('<b></b>'),
2922                           canonicalize(tostring(b, encoding=_unicode)))
2923         self.assertEquals(_bytes('<c><d></d></c>'),
2924                           canonicalize(tostring(c, encoding=_unicode)))
2925
2926     def test_tostring_unicode_none(self):
2927         tostring = self.etree.tostring
2928         self.assertRaises(TypeError, self.etree.tostring,
2929                           None, encoding=_unicode)
2930
2931     def test_tostring_unicode_element_tail(self):
2932         tostring = self.etree.tostring
2933         Element = self.etree.Element
2934         SubElement = self.etree.SubElement
2935         
2936         a = Element('a')
2937         b = SubElement(a, 'b')
2938         c = SubElement(a, 'c')
2939         d = SubElement(c, 'd')
2940         b.tail = 'Foo'
2941
2942         self.assert_(isinstance(tostring(b, encoding=_unicode), _unicode))
2943         self.assert_(tostring(b, encoding=_unicode) == '<b/>Foo' or
2944                      tostring(b, encoding=_unicode) == '<b />Foo')
2945
2946     def test_tostring_unicode_pretty(self):
2947         tostring = self.etree.tostring
2948         Element = self.etree.Element
2949         SubElement = self.etree.SubElement
2950
2951         a = Element('a')
2952         b = SubElement(a, 'b')
2953         c = SubElement(a, 'c')
2954
2955         result = tostring(a, encoding=_unicode)
2956         self.assertEquals(result, "<a><b/><c/></a>")
2957
2958         result = tostring(a, encoding=_unicode, pretty_print=False)
2959         self.assertEquals(result, "<a><b/><c/></a>")
2960
2961         result = tostring(a, encoding=_unicode, pretty_print=True)
2962         self.assertEquals(result, "<a>\n  <b/>\n  <c/>\n</a>\n")
2963
2964     # helper methods
2965
2966     def _writeElement(self, element, encoding='us-ascii', compression=0):
2967         """Write out element for comparison.
2968         """
2969         ElementTree = self.etree.ElementTree
2970         f = BytesIO()
2971         tree = ElementTree(element=element)
2972         tree.write(f, encoding=encoding, compression=compression)
2973         data = f.getvalue()
2974         if compression:
2975             data = zlib.decompress(data)
2976         return canonicalize(data)
2977
2978
2979 class XIncludeTestCase(HelperTestCase):
2980     def test_xinclude_text(self):
2981         filename = fileInTestDir('test_broken.xml')
2982         root = etree.XML(_bytes('''\
2983         <doc xmlns:xi="http://www.w3.org/2001/XInclude">
2984           <xi:include href="%s" parse="text"/>
2985         </doc>
2986         ''' % filename))
2987         old_text = root.text
2988         content = read_file(filename)
2989         old_tail = root[0].tail
2990
2991         self.include( etree.ElementTree(root) )
2992         self.assertEquals(old_text + content + old_tail,
2993                           root.text)
2994
2995     def test_xinclude(self):
2996         tree = etree.parse(fileInTestDir('include/test_xinclude.xml'))
2997         self.assertNotEquals(
2998             'a',
2999             tree.getroot()[1].tag)
3000         # process xincludes
3001         self.include( tree )
3002         # check whether we find it replaced with included data
3003         self.assertEquals(
3004             'a',
3005             tree.getroot()[1].tag)
3006
3007     def test_xinclude_resolver(self):
3008         class res(etree.Resolver):
3009             include_text = read_file(fileInTestDir('test.xml'))
3010             called = {}
3011             def resolve(self, url, id, context):
3012                 if url.endswith(".dtd"):
3013                     self.called["dtd"] = True
3014                     return self.resolve_filename(
3015                         fileInTestDir('test.dtd'), context)
3016                 elif url.endswith("test_xinclude.xml"):
3017                     self.called["input"] = True
3018                     return None # delegate to default resolver
3019                 else:
3020                     self.called["include"] = True
3021                     return self.resolve_string(self.include_text, context)
3022
3023         res_instance = res()
3024         parser = etree.XMLParser(load_dtd = True)
3025         parser.resolvers.add(res_instance)
3026
3027         tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
3028                            parser = parser)
3029
3030         self.include(tree)
3031
3032         called = list(res_instance.called.items())
3033         called.sort()
3034         self.assertEquals(
3035             [("dtd", True), ("include", True), ("input", True)],
3036             called)
3037
3038 class ETreeXIncludeTestCase(XIncludeTestCase):
3039     def include(self, tree):
3040         tree.xinclude()
3041
3042
3043 class ElementIncludeTestCase(XIncludeTestCase):
3044     from lxml import ElementInclude
3045     def include(self, tree):
3046         self.ElementInclude.include(tree.getroot())
3047
3048
3049 class ETreeC14NTestCase(HelperTestCase):
3050     def test_c14n(self):
3051         tree = self.parse(_bytes('<a><b/></a>'))
3052         f = BytesIO()
3053         tree.write_c14n(f)
3054         s = f.getvalue()
3055         self.assertEquals(_bytes('<a><b></b></a>'),
3056                           s)
3057
3058     def test_c14n_gzip(self):
3059         tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
3060         f = BytesIO()
3061         tree.write_c14n(f, compression=9)
3062         gzfile = gzip.GzipFile(fileobj=BytesIO(f.getvalue()))
3063         try:
3064             s = gzfile.read()
3065         finally:
3066             gzfile.close()
3067         self.assertEquals(_bytes('<a>'+'<b></b>'*200+'</a>'),
3068                           s)
3069
3070     def test_c14n_file(self):
3071         tree = self.parse(_bytes('<a><b/></a>'))
3072         handle, filename = tempfile.mkstemp()
3073         try:
3074             tree.write_c14n(filename)
3075             data = read_file(filename, 'rb')
3076         finally:
3077             os.close(handle)
3078             os.remove(filename)
3079         self.assertEquals(_bytes('<a><b></b></a>'),
3080                           data)
3081
3082     def test_c14n_file_gzip(self):
3083         tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
3084         handle, filename = tempfile.mkstemp()
3085         try:
3086             tree.write_c14n(filename, compression=9)
3087             f = gzip.open(filename, 'rb')
3088             try:
3089                 data = f.read()
3090             finally:
3091                 f.close()
3092         finally:
3093             os.close(handle)
3094             os.remove(filename)
3095         self.assertEquals(_bytes('<a>'+'<b></b>'*200+'</a>'),
3096                           data)
3097
3098     def test_c14n_with_comments(self):
3099         tree = self.parse(_bytes('<!--hi--><a><!--ho--><b/></a><!--hu-->'))
3100         f = BytesIO()
3101         tree.write_c14n(f)
3102         s = f.getvalue()
3103         self.assertEquals(_bytes('<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->'),
3104                           s)
3105         f = BytesIO()
3106         tree.write_c14n(f, with_comments=True)
3107         s = f.getvalue()
3108         self.assertEquals(_bytes('<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->'),
3109                           s)
3110         f = BytesIO()
3111         tree.write_c14n(f, with_comments=False)
3112         s = f.getvalue()
3113         self.assertEquals(_bytes('<a><b></b></a>'),
3114                           s)
3115
3116     def test_c14n_tostring_with_comments(self):
3117         tree = self.parse(_bytes('<!--hi--><a><!--ho--><b/></a><!--hu-->'))
3118         s = etree.tostring(tree, method='c14n')
3119         self.assertEquals(_bytes('<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->'),
3120                           s)
3121         s = etree.tostring(tree, method='c14n', with_comments=True)
3122         self.assertEquals(_bytes('<!--hi-->\n<a><!--ho--><b></b></a>\n<!--hu-->'),
3123                           s)
3124         s = etree.tostring(tree, method='c14n', with_comments=False)
3125         self.assertEquals(_bytes('<a><b></b></a>'),
3126                           s)
3127
3128     def test_c14n_element_tostring_with_comments(self):
3129         tree = self.parse(_bytes('<!--hi--><a><!--ho--><b/></a><!--hu-->'))
3130         s = etree.tostring(tree.getroot(), method='c14n')
3131         self.assertEquals(_bytes('<a><!--ho--><b></b></a>'),
3132                           s)
3133         s = etree.tostring(tree.getroot(), method='c14n', with_comments=True)
3134         self.assertEquals(_bytes('<a><!--ho--><b></b></a>'),
3135                           s)
3136         s = etree.tostring(tree.getroot(), method='c14n', with_comments=False)
3137         self.assertEquals(_bytes('<a><b></b></a>'),
3138                           s)
3139
3140     def test_c14n_exclusive(self):
3141         tree = self.parse(_bytes(
3142                 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
3143         f = BytesIO()
3144         tree.write_c14n(f)
3145         s = f.getvalue()
3146         self.assertEquals(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3147                           s)
3148         f = BytesIO()
3149         tree.write_c14n(f, exclusive=False)
3150         s = f.getvalue()
3151         self.assertEquals(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3152                           s)
3153         f = BytesIO()
3154         tree.write_c14n(f, exclusive=True)
3155         s = f.getvalue()
3156         self.assertEquals(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
3157                           s)
3158
3159     def test_c14n_tostring_exclusive(self):
3160         tree = self.parse(_bytes(
3161                 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
3162         s = etree.tostring(tree, method='c14n')
3163         self.assertEquals(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3164                           s)
3165         s = etree.tostring(tree, method='c14n', exclusive=False)
3166         self.assertEquals(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3167                           s)
3168         s = etree.tostring(tree, method='c14n', exclusive=True)
3169         self.assertEquals(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
3170                           s)
3171
3172     def test_c14n_element_tostring_exclusive(self):
3173         tree = self.parse(_bytes(
3174                 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
3175         s = etree.tostring(tree.getroot(), method='c14n')
3176         self.assertEquals(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3177                           s)
3178         s = etree.tostring(tree.getroot(), method='c14n', exclusive=False)
3179         self.assertEquals(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
3180                           s)
3181         s = etree.tostring(tree.getroot(), method='c14n', exclusive=True)
3182         self.assertEquals(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
3183                           s)
3184
3185         s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=False)
3186         self.assertEquals(_bytes('<z:b xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
3187                           s)
3188         s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True)
3189         self.assertEquals(_bytes('<z:b xmlns:z="http://cde"></z:b>'),
3190                           s)
3191
3192
3193 class ETreeWriteTestCase(HelperTestCase):
3194     def test_write(self):
3195         tree = self.parse(_bytes('<a><b/></a>'))
3196         f = BytesIO()
3197         tree.write(f)
3198         s = f.getvalue()
3199         self.assertEquals(_bytes('<a><b/></a>'),
3200                           s)
3201
3202     def test_write_gzip(self):
3203         tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
3204         f = BytesIO()
3205         tree.write(f, compression=9)
3206         gzfile = gzip.GzipFile(fileobj=BytesIO(f.getvalue()))
3207         try:
3208             s = gzfile.read()
3209         finally:
3210             gzfile.close()
3211         self.assertEquals(_bytes('<a>'+'<b/>'*200+'</a>'),
3212                           s)
3213
3214     def test_write_gzip_level(self):
3215         tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
3216         f = BytesIO()
3217         tree.write(f, compression=0)
3218         s0 = f.getvalue()
3219
3220         f = BytesIO()
3221         tree.write(f)
3222         self.assertEquals(f.getvalue(), s0)
3223
3224         f = BytesIO()
3225         tree.write(f, compression=1)
3226         s = f.getvalue()
3227         self.assert_(len(s) <= len(s0))
3228         gzfile = gzip.GzipFile(fileobj=BytesIO(s))
3229         try:
3230             s1 = gzfile.read()
3231         finally:
3232             gzfile.close()
3233
3234         f = BytesIO()
3235         tree.write(f, compression=9)
3236         s = f.getvalue()
3237         self.assert_(len(s) <= len(s0))
3238         gzfile = gzip.GzipFile(fileobj=BytesIO(s))
3239         try:
3240             s9 = gzfile.read()
3241         finally:
3242             gzfile.close()
3243
3244         self.assertEquals(_bytes('<a>'+'<b/>'*200+'</a>'),
3245                           s0)
3246         self.assertEquals(_bytes('<a>'+'<b/>'*200+'</a>'),
3247                           s1)
3248         self.assertEquals(_bytes('<a>'+'<b/>'*200+'</a>'),
3249                           s9)
3250
3251     def test_write_file(self):
3252         tree = self.parse(_bytes('<a><b/></a>'))
3253         handle, filename = tempfile.mkstemp()
3254         try:
3255             tree.write(filename)
3256             data = read_file(filename, 'rb')
3257         finally:
3258             os.close(handle)
3259             os.remove(filename)
3260         self.assertEquals(_bytes('<a><b/></a>'),
3261                           data)
3262
3263     def test_write_file_gzip(self):
3264         tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
3265         handle, filename = tempfile.mkstemp()
3266         try:
3267             tree.write(filename, compression=9)
3268             f = gzip.open(filename, 'rb')
3269             try:
3270                 data = f.read()
3271             finally:
3272                 f.close()
3273         finally:
3274             os.close(handle)
3275             os.remove(filename)
3276         self.assertEquals(_bytes('<a>'+'<b/>'*200+'</a>'),
3277                           data)
3278
3279     def test_write_file_gzip_parse(self):
3280         tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
3281         handle, filename = tempfile.mkstemp()
3282         try:
3283             tree.write(filename, compression=9)
3284             data = etree.tostring(etree.parse(filename))
3285         finally:
3286             os.close(handle)
3287             os.remove(filename)
3288         self.assertEquals(_bytes('<a>'+'<b/>'*200+'</a>'),
3289                           data)
3290
3291     def test_write_file_gzipfile_parse(self):
3292         tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
3293         handle, filename = tempfile.mkstemp()
3294         try:
3295             tree.write(filename, compression=9)
3296             data = etree.tostring(etree.parse(
3297                 gzip.GzipFile(filename)))
3298         finally:
3299             os.close(handle)
3300             os.remove(filename)
3301         self.assertEquals(_bytes('<a>'+'<b/>'*200+'</a>'),
3302                           data)
3303
3304 class ETreeErrorLogTest(HelperTestCase):
3305     etree = etree
3306
3307     def test_parse_error_logging(self):
3308         parse = self.etree.parse
3309         f = BytesIO('<a><b></c></b></a>')
3310         self.etree.clear_error_log()
3311         try:
3312             parse(f)
3313             logs = None
3314         except SyntaxError:
3315             e = sys.exc_info()[1]
3316             logs = e.error_log
3317         f.close()
3318         self.assert_([ log for log in logs
3319                        if 'mismatch' in log.message ])
3320         self.assert_([ log for log in logs
3321                        if 'PARSER'   in log.domain_name])
3322         self.assert_([ log for log in logs
3323                        if 'ERR_TAG_NAME_MISMATCH' in log.type_name ])
3324         self.assert_([ log for log in logs
3325                        if 1 == log.line ])
3326         self.assert_([ log for log in logs
3327                        if 15 == log.column ])
3328
3329     def _test_python_error_logging(self):
3330         """This can't really be tested as long as there isn't a way to
3331         reset the logging setup ...
3332         """
3333         parse = self.etree.parse
3334
3335         messages = []
3336         class Logger(self.etree.PyErrorLog):
3337             def log(self, entry, message, *args):
3338                 messages.append(message)
3339
3340         self.etree.use_global_python_log(Logger())
3341         f = BytesIO('<a><b></c></b></a>')
3342         try:
3343             parse(f)
3344         except SyntaxError:
3345             pass
3346         f.close()
3347
3348         self.assert_([ message for message in messages
3349                        if 'mismatch' in message ])
3350         self.assert_([ message for message in messages
3351                        if ':PARSER:'   in message])
3352         self.assert_([ message for message in messages
3353                        if ':ERR_TAG_NAME_MISMATCH:' in message ])
3354         self.assert_([ message for message in messages
3355                        if ':1:15:' in message ])
3356
3357 def test_suite():
3358     suite = unittest.TestSuite()
3359     suite.addTests([unittest.makeSuite(ETreeOnlyTestCase)])
3360     suite.addTests([unittest.makeSuite(ETreeXIncludeTestCase)])
3361     suite.addTests([unittest.makeSuite(ElementIncludeTestCase)])
3362     suite.addTests([unittest.makeSuite(ETreeC14NTestCase)])
3363     suite.addTests([unittest.makeSuite(ETreeWriteTestCase)])
3364     suite.addTests([unittest.makeSuite(ETreeErrorLogTest)])
3365     suite.addTests(
3366         [make_doctest('../../../doc/tutorial.txt')])
3367     suite.addTests(
3368         [make_doctest('../../../doc/api.txt')])
3369     suite.addTests(
3370         [make_doctest('../../../doc/FAQ.txt')])
3371     suite.addTests(
3372         [make_doctest('../../../doc/parsing.txt')])
3373     suite.addTests(
3374         [make_doctest('../../../doc/resolvers.txt')])
3375     return suite
3376
3377 if __name__ == '__main__':
3378     print('to test use test.py %s' % __file__)