src/lxml/lxml.etree.pyx

   1 u"""The ``lxml.etree`` module implements the extended ElementTree API
   2 for XML.
   3 """
   4
   5 __docformat__ = u"restructuredtext en"
   6
   7 __all__ = [
   8     'AttributeBasedElementClassLookup', 'C14NError', 'CDATA',
   9     'Comment', 'CommentBase', 'CustomElementClassLookup', 'DEBUG',
  10     'DTD', 'DTDError', 'DTDParseError', 'DTDValidateError',
  11     'DocumentInvalid', 'ETCompatXMLParser', 'ETXPath', 'Element',
  12     'ElementBase', 'ElementClassLookup', 'ElementDefaultClassLookup',
  13     'ElementNamespaceClassLookup', 'ElementTree', 'Entity', 'EntityBase',
  14     'Error', 'ErrorDomains', 'ErrorLevels', 'ErrorTypes', 'Extension',
  15     'FallbackElementClassLookup', 'FunctionNamespace', 'HTML',
  16     'HTMLParser', 'LIBXML_COMPILED_VERSION', 'LIBXML_VERSION',
  17     'LIBXSLT_COMPILED_VERSION', 'LIBXSLT_VERSION', 'LXML_VERSION',
  18     'LxmlError', 'LxmlRegistryError', 'LxmlSyntaxError',
  19     'NamespaceRegistryError', 'PI', 'PIBase', 'ParseError',
  20     'ParserBasedElementClassLookup', 'ParserError', 'ProcessingInstruction',
  21     'PyErrorLog', 'PythonElementClassLookup', 'QName', 'RelaxNG',
  22     'RelaxNGError', 'RelaxNGErrorTypes', 'RelaxNGParseError',
  23     'RelaxNGValidateError', 'Resolver', 'Schematron', 'SchematronError',
  24     'SchematronParseError', 'SchematronValidateError', 'SerialisationError',
  25     'SubElement', 'TreeBuilder', 'XInclude', 'XIncludeError', 'XML',
  26     'XMLDTDID', 'XMLID', 'XMLParser', 'XMLSchema', 'XMLSchemaError',
  27     'XMLSchemaParseError', 'XMLSchemaValidateError', 'XMLSyntaxError',
  28     'XMLTreeBuilder', 'XPath', 'XPathDocumentEvaluator', 'XPathError',
  29     'XPathEvalError', 'XPathEvaluator', 'XPathFunctionError', 'XPathResultError',
  30     'XPathSyntaxError', 'XSLT', 'XSLTAccessControl', 'XSLTApplyError',
  31     'XSLTError', 'XSLTExtension', 'XSLTExtensionError', 'XSLTParseError',
  32     'XSLTSaveError', 'cleanup_namespaces', 'clear_error_log', 'dump',
  33     'fromstring', 'fromstringlist', 'get_default_parser', 'iselement',
  34     'iterparse', 'iterwalk', 'parse', 'parseid', 'register_namespace',
  35     'set_default_parser', 'set_element_class_lookup', 'strip_attributes',
  36     'strip_elements', 'strip_tags', 'tostring', 'tostringlist', 'tounicode',
  37     'use_global_python_log'
  38     ]
  39
  40 cimport tree, python, config
  41 from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement, _getNs
  42 from python cimport callable, _cstr, _isString
  43 cimport xpath
  44 cimport c14n
  45 cimport cstd
  46
  47 try:
  48     import __builtin__
  49 except ImportError:
  50     # Python 3
  51     import builtins as __builtin__
  52
  53 cdef object _unicode
  54 try:
  55     _unicode = __builtin__.unicode
  56 except AttributeError:
  57     # Python 3
  58     _unicode = __builtin__.str
  59
  60 del __builtin__
  61
  62 cdef object os_path_abspath
  63 from os.path import abspath as os_path_abspath
  64
  65 cdef object BytesIO, StringIO
  66 try:
  67     from io import BytesIO, StringIO
  68 except (ImportError, AttributeError):
  69     from StringIO import StringIO, StringIO as BytesIO
  70
  71 cdef object _elementpath
  72 import _elementpath
  73
  74 cdef object sys
  75 import sys
  76
  77 cdef object re
  78 import re
  79
  80 cdef object gzip
  81 import gzip
  82
  83 cdef object ITER_EMPTY
  84 ITER_EMPTY = iter(())
  85
  86 cdef object EMPTY_READ_ONLY_DICT
  87 EMPTY_READ_ONLY_DICT = python.PyDictProxy_New({})
  88
  89 # the rules
  90 # ---------
  91 # any libxml C argument/variable is prefixed with c_
  92 # any non-public function/class is prefixed with an underscore
  93 # instance creation is always through factories
  94
  95 # what to do with libxml2/libxslt error messages?
  96 # 0 : drop
  97 # 1 : use log
  98 DEF __DEBUG = 1
  99
 100 # maximum number of lines in the libxml2/xslt log if __DEBUG == 1
 101 DEF __MAX_LOG_SIZE = 100
 102
 103 # make the compiled-in debug state publicly available
 104 DEBUG = __DEBUG
 105
 106 # global per-thread setup
 107 tree.xmlThrDefIndentTreeOutput(1)
 108 tree.xmlThrDefLineNumbersDefaultValue(1)
 109
 110 _initThreadLogging()
 111
 112 # initialize parser (and threading)
 113 xmlparser.xmlInitParser()
 114
 115 # filename encoding
 116 cdef object _FILENAME_ENCODING
 117 _FILENAME_ENCODING = sys.getfilesystemencoding()
 118 if _FILENAME_ENCODING is None:
 119     _FILENAME_ENCODING = sys.getdefaultencoding()
 120 if _FILENAME_ENCODING is None:
 121     _FILENAME_ENCODING = b'ascii'
 122 else:
 123     _FILENAME_ENCODING = _FILENAME_ENCODING.encode(u"UTF-8")
 124 cdef char* _C_FILENAME_ENCODING
 125 _C_FILENAME_ENCODING = _cstr(_FILENAME_ENCODING)
 126
 127 # set up some default namespace prefixes
 128 cdef object _DEFAULT_NAMESPACE_PREFIXES
 129 _DEFAULT_NAMESPACE_PREFIXES = {
 130     b"http://www.w3.org/XML/1998/namespace": b'xml',
 131     b"http://www.w3.org/1999/xhtml": b"html",
 132     b"http://www.w3.org/1999/XSL/Transform": b"xsl",
 133     b"http://www.w3.org/1999/02/22-rdf-syntax-ns#": b"rdf",
 134     b"http://schemas.xmlsoap.org/wsdl/": b"wsdl",
 135     # xml schema
 136     b"http://www.w3.org/2001/XMLSchema": b"xs",
 137     b"http://www.w3.org/2001/XMLSchema-instance": b"xsi",
 138     # dublin core
 139     b"http://purl.org/dc/elements/1.1/": b"dc",
 140     # objectify
 141     b"http://codespeak.net/lxml/objectify/pytype" : b"py",
 142 }
 143
 144 cdef object _check_internal_prefix = re.compile(b"ns\d+$").match
 145
 146 def register_namespace(prefix, uri):
 147     u"""Registers a namespace prefix that newly created Elements in that
 148     namespace will use.  The registry is global, and any existing
 149     mapping for either the given prefix or the namespace URI will be
 150     removed.
 151     """
 152     prefix_utf, uri_utf = _utf8(prefix), _utf8(uri)
 153     if _check_internal_prefix(prefix_utf):
 154         raise ValueError("Prefix format reserved for internal use")
 155     _tagValidOrRaise(prefix_utf)
 156     _uriValidOrRaise(uri_utf)
 157     for k, v in _DEFAULT_NAMESPACE_PREFIXES.items():
 158         if k == uri_utf or v == prefix_utf:
 159             del _DEFAULT_NAMESPACE_PREFIXES[k]
 160     _DEFAULT_NAMESPACE_PREFIXES[uri_utf] = prefix_utf
 161
 162
 163 # Error superclass for ElementTree compatibility
 164 class Error(Exception):
 165     pass
 166
 167 # module level superclass for all exceptions
 168 class LxmlError(Error):
 169     u"""Main exception base class for lxml.  All other exceptions inherit from
 170     this one.
 171     """
 172     def __init__(self, message, error_log=None):
 173         if python.PY_VERSION_HEX >= 0x02050000:
 174             # Python >= 2.5 uses new style class exceptions
 175             super(_Error, self).__init__(message)
 176         else:
 177             error_super_init(self, message)
 178         if error_log is None:
 179             self.error_log = __copyGlobalErrorLog()
 180         else:
 181             self.error_log = error_log.copy()
 182
 183 cdef object _Error = Error if python.PY_VERSION_HEX >= 0x02050000 else None
 184 cdef object error_super_init = Error.__init__ if python.PY_VERSION_HEX < 0x02050000 else None
 185
 186
 187 # superclass for all syntax errors
 188 class LxmlSyntaxError(LxmlError, SyntaxError):
 189     u"""Base class for all syntax errors.
 190     """
 191     pass
 192
 193 class C14NError(LxmlError):
 194     u"""Error during C14N serialisation.
 195     """
 196     pass
 197
 198 # version information
 199 cdef __unpackDottedVersion(version):
 200     cdef list version_list = []
 201     l = (version.decode("ascii").replace(u'-', u'.').split(u'.') + [0]*4)[:4]
 202     for item in l:
 203         try:
 204             item = int(item)
 205         except ValueError:
 206             if item.startswith(u'dev'):
 207                 count = item[3:]
 208                 item = -300
 209             elif item.startswith(u'alpha'):
 210                 count = item[5:]
 211                 item = -200
 212             elif item.startswith(u'beta'):
 213                 count = item[4:]
 214                 item = -100
 215             else:
 216                 count = 0
 217             if count:
 218                 item += int(count)
 219         version_list.append(item)
 220     return tuple(version_list)
 221
 222 cdef __unpackIntVersion(int c_version):
 223     return (
 224         ((c_version / (100*100)) % 100),
 225         ((c_version / 100)       % 100),
 226         (c_version               % 100)
 227         )
 228
 229 cdef int _LIBXML_VERSION_INT
 230 try:
 231     _LIBXML_VERSION_INT = int(
 232         re.match(u'[0-9]+', (tree.xmlParserVersion).decode("ascii")).group(0))
 233 except Exception:
 234     print u"Unknown libxml2 version: %s" % (tree.xmlParserVersion).decode("ascii")
 235     _LIBXML_VERSION_INT = 0
 236
 237 LIBXML_VERSION = __unpackIntVersion(_LIBXML_VERSION_INT)
 238 LIBXML_COMPILED_VERSION = __unpackIntVersion(tree.LIBXML_VERSION)
 239 LXML_VERSION = __unpackDottedVersion(tree.LXML_VERSION_STRING)
 240
 241 __version__ = (tree.LXML_VERSION_STRING).decode("ascii")
 242
 243
 244 # class for temporary storage of Python references,
 245 # used e.g. for XPath results
 246 cdef class _TempStore:
 247     cdef list _storage
 248     def __init__(self):
 249         self._storage = []
 250
 251     cdef int add(self, obj) except -1:
 252         self._storage.append(obj)
 253         return 0
 254
 255     cdef int clear(self) except -1:
 256         del self._storage[:]
 257         return 0
 258
 259 # class for temporarily storing exceptions raised in extensions
 260 cdef class _ExceptionContext:
 261     cdef object _exc_info
 262     cdef void clear(self):
 263         self._exc_info = None
 264
 265     cdef void _store_raised(self):
 266         self._exc_info = sys.exc_info()
 267
 268     cdef void _store_exception(self, exception):
 269         self._exc_info = (exception, None, None)
 270
 271     cdef bint _has_raised(self):
 272         return self._exc_info is not None
 273
 274     cdef int _raise_if_stored(self) except -1:
 275         if self._exc_info is None:
 276             return 0
 277         type, value, traceback = self._exc_info
 278         self._exc_info = None
 279         if value is None and traceback is None:
 280             raise type
 281         else:
 282             raise type, value, traceback
 283
 284
 285 # forward declarations
 286 cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]
 287 cdef public class _Element [ type LxmlElementType, object LxmlElement ]
 288 cdef class _BaseParser
 289 cdef class QName
 290 ctypedef public xmlNode* (*_node_to_node_function)(xmlNode*)
 291
 292 ################################################################################
 293 # Include submodules
 294
 295 include "proxy.pxi"        # Proxy handling (element backpointers/memory/etc.)
 296 include "apihelpers.pxi"   # Private helper functions
 297 include "xmlerror.pxi"     # Error and log handling
 298
 299
 300 ################################################################################
 301 # Public Python API
 302
 303 cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]:
 304     u"""Internal base class to reference a libxml document.
 305
 306     When instances of this class are garbage collected, the libxml
 307     document is cleaned up.
 308     """
 309     cdef int _ns_counter
 310     cdef bytes _prefix_tail
 311     cdef xmlDoc* _c_doc
 312     cdef _BaseParser _parser
 313
 314     def __dealloc__(self):
 315         # if there are no more references to the document, it is safe
 316         # to clean the whole thing up, as all nodes have a reference to
 317         # the document
 318         tree.xmlFreeDoc(self._c_doc)
 319
 320     cdef getroot(self):
 321         # return an element proxy for the document root
 322         cdef xmlNode* c_node
 323         c_node = tree.xmlDocGetRootElement(self._c_doc)
 324         if c_node is NULL:
 325             return None
 326         return _elementFactory(self, c_node)
 327
 328     cdef bint hasdoctype(self):
 329         # DOCTYPE gets parsed into internal subset (xmlDTD*)
 330         return self._c_doc is not NULL and self._c_doc.intSubset is not NULL
 331
 332     cdef getdoctype(self):
 333         # get doctype info: root tag, public/system ID (or None if not known)
 334         cdef tree.xmlDtd* c_dtd
 335         cdef xmlNode* c_root_node
 336         public_id = None
 337         sys_url   = None
 338         c_dtd = self._c_doc.intSubset
 339         if c_dtd is not NULL:
 340             if c_dtd.ExternalID is not NULL:
 341                 public_id = funicode(c_dtd.ExternalID)
 342             if c_dtd.SystemID is not NULL:
 343                 sys_url = funicode(c_dtd.SystemID)
 344         c_dtd = self._c_doc.extSubset
 345         if c_dtd is not NULL:
 346             if not public_id and c_dtd.ExternalID is not NULL:
 347                 public_id = funicode(c_dtd.ExternalID)
 348             if not sys_url and c_dtd.SystemID is not NULL:
 349                 sys_url = funicode(c_dtd.SystemID)
 350         c_root_node = tree.xmlDocGetRootElement(self._c_doc)
 351         if c_root_node is NULL:
 352             root_name = None
 353         else:
 354             root_name = funicode(c_root_node.name)
 355         return (root_name, public_id, sys_url)
 356
 357     cdef getxmlinfo(self):
 358         # return XML version and encoding (or None if not known)
 359         cdef xmlDoc* c_doc = self._c_doc
 360         if c_doc.version is NULL:
 361             version = None
 362         else:
 363             version = funicode(c_doc.version)
 364         if c_doc.encoding is NULL:
 365             encoding = None
 366         else:
 367             encoding = funicode(c_doc.encoding)
 368         return (version, encoding)
 369
 370     cdef isstandalone(self):
 371         # returns True for "standalone=true",
 372         # False for "standalone=false", None if not provided
 373         if self._c_doc.standalone == -1:
 374             return None
 375         else:
 376             return <bint>(self._c_doc.standalone == 1)
 377
 378     cdef bytes buildNewPrefix(self):
 379         # get a new unique prefix ("nsX") for this document
 380         cdef bytes ns
 381         if self._ns_counter < len(_PREFIX_CACHE):
 382             ns = _PREFIX_CACHE[self._ns_counter]
 383         else:
 384             ns = python.PyBytes_FromFormat("ns%d", self._ns_counter)
 385         if self._prefix_tail is not None:
 386             ns += self._prefix_tail
 387         self._ns_counter += 1
 388         if self._ns_counter < 0:
 389             # overflow!
 390             self._ns_counter = 0
 391             if self._prefix_tail is None:
 392                 self._prefix_tail = b"A"
 393             else:
 394                 self._prefix_tail += b"A"
 395         return ns
 396
 397     cdef xmlNs* _findOrBuildNodeNs(self, xmlNode* c_node,
 398                                    char* c_href, char* c_prefix,
 399                                    bint is_attribute) except NULL:
 400         u"""Get or create namespace structure for a node.  Reuses the prefix if
 401         possible.
 402         """
 403         cdef xmlNs* c_ns
 404         cdef xmlNs* c_doc_ns
 405         cdef python.PyObject* dict_result
 406         if c_node.type != tree.XML_ELEMENT_NODE:
 407             assert c_node.type == tree.XML_ELEMENT_NODE, \
 408                 u"invalid node type %d, expected %d" % (
 409                 c_node.type, tree.XML_ELEMENT_NODE)
 410         # look for existing ns declaration
 411         c_ns = _searchNsByHref(c_node, c_href, is_attribute)
 412         if c_ns is not NULL:
 413             if is_attribute and c_ns.prefix is NULL:
 414                 # do not put namespaced attributes into the default
 415                 # namespace as this would break serialisation
 416                 pass
 417             else:
 418                 return c_ns
 419
 420         # none found => determine a suitable new prefix
 421         if c_prefix is NULL:
 422             dict_result = python.PyDict_GetItem(
 423                 _DEFAULT_NAMESPACE_PREFIXES, c_href)
 424             if dict_result is not NULL:
 425                 prefix = <object>dict_result
 426             else:
 427                 prefix = self.buildNewPrefix()
 428             c_prefix = _cstr(prefix)
 429
 430         # make sure the prefix is not in use already
 431         while tree.xmlSearchNs(self._c_doc, c_node, c_prefix) is not NULL:
 432             prefix = self.buildNewPrefix()
 433             c_prefix = _cstr(prefix)
 434
 435         # declare the namespace and return it
 436         c_ns = tree.xmlNewNs(c_node, c_href, c_prefix)
 437         if c_ns is NULL:
 438             python.PyErr_NoMemory()
 439         return c_ns
 440
 441     cdef int _setNodeNs(self, xmlNode* c_node, char* href) except -1:
 442         u"Lookup namespace structure and set it for the node."
 443         cdef xmlNs* c_ns
 444         c_ns = self._findOrBuildNodeNs(c_node, href, NULL, 0)
 445         tree.xmlSetNs(c_node, c_ns)
 446
 447 cdef tuple __initPrefixCache():
 448     cdef int i
 449     return tuple([ python.PyBytes_FromFormat("ns%d", i)
 450                    for i in range(30) ])
 451
 452 cdef tuple _PREFIX_CACHE
 453 _PREFIX_CACHE = __initPrefixCache()
 454
 455 cdef _Document _documentFactory(xmlDoc* c_doc, _BaseParser parser):
 456     cdef _Document result
 457     result = _Document.__new__(_Document)
 458     result._c_doc = c_doc
 459     result._ns_counter = 0
 460     result._prefix_tail = None
 461     if parser is None:
 462         parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
 463     result._parser = parser
 464     return result
 465
 466
 467 cdef class DocInfo:
 468     u"Document information provided by parser and DTD."
 469     cdef _Document _doc
 470     def __cinit__(self, tree):
 471         u"Create a DocInfo object for an ElementTree object or root Element."
 472         self._doc = _documentOrRaise(tree)
 473         root_name, public_id, system_url = self._doc.getdoctype()
 474         if not root_name and (public_id or system_url):
 475             raise ValueError, u"Could not find root node"
 476
 477     property root_name:
 478         u"Returns the name of the root node as defined by the DOCTYPE."
 479         def __get__(self):
 480             root_name, public_id, system_url = self._doc.getdoctype()
 481             return root_name
 482
 483     property public_id:
 484         u"Returns the public ID of the DOCTYPE."
 485         def __get__(self):
 486             root_name, public_id, system_url = self._doc.getdoctype()
 487             return public_id
 488
 489     property system_url:
 490         u"Returns the system ID of the DOCTYPE."
 491         def __get__(self):
 492             root_name, public_id, system_url = self._doc.getdoctype()
 493             return system_url
 494
 495     property xml_version:
 496         u"Returns the XML version as declared by the document."
 497         def __get__(self):
 498             xml_version, encoding = self._doc.getxmlinfo()
 499             return xml_version
 500
 501     property encoding:
 502         u"Returns the encoding name as declared by the document."
 503         def __get__(self):
 504             xml_version, encoding = self._doc.getxmlinfo()
 505             return encoding
 506
 507     property standalone:
 508         u"""Returns the standalone flag as declared by the document.  The possible
 509         values are True (``standalone='yes'``), False
 510         (``standalone='no'`` or flag not provided in the declaration),
 511         and None (unknown or no declaration found).  Note that a
 512         normal truth test on this value will always tell if the
 513         ``standalone`` flag was set to ``'yes'`` or not.
 514         """
 515         def __get__(self):
 516             return self._doc.isstandalone()
 517
 518     property URL:
 519         u"The source URL of the document (or None if unknown)."
 520         def __get__(self):
 521             if self._doc._c_doc.URL is NULL:
 522                 return None
 523             return _decodeFilename(self._doc._c_doc.URL)
 524         def __set__(self, url):
 525             cdef char* c_oldurl
 526             url = _encodeFilename(url)
 527             c_oldurl = self._doc._c_doc.URL
 528             if url is None:
 529                 self._doc._c_doc.URL = NULL
 530             else:
 531                 self._doc._c_doc.URL = tree.xmlStrdup(_cstr(url))
 532             if c_oldurl is not NULL:
 533                 tree.xmlFree(c_oldurl)
 534
 535     property doctype:
 536         u"Returns a DOCTYPE declaration string for the document."
 537         def __get__(self):
 538             root_name, public_id, system_url = self._doc.getdoctype()
 539             if public_id:
 540                 if system_url:
 541                     return u'<!DOCTYPE %s PUBLIC "%s" "%s">' % (
 542                         root_name, public_id, system_url)
 543                 else:
 544                     return u'<!DOCTYPE %s PUBLIC "%s">' % (
 545                         root_name, public_id)
 546             elif system_url:
 547                 return u'<!DOCTYPE %s SYSTEM "%s">' % (
 548                     root_name, system_url)
 549             elif self._doc.hasdoctype():
 550                 return u'<!DOCTYPE %s>' % root_name
 551             else:
 552                 return u""
 553
 554     property internalDTD:
 555         u"Returns a DTD validator based on the internal subset of the document."
 556         def __get__(self):
 557             return _dtdFactory(self._doc._c_doc.intSubset)
 558
 559     property externalDTD:
 560         u"Returns a DTD validator based on the external subset of the document."
 561         def __get__(self):
 562             return _dtdFactory(self._doc._c_doc.extSubset)
 563
 564
 565 cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
 566     u"""Element class.
 567
 568     References a document object and a libxml node.
 569
 570     By pointing to a Document instance, a reference is kept to
 571     _Document as long as there is some pointer to a node in it.
 572     """
 573     cdef python.PyObject* _gc_doc
 574     cdef _Document _doc
 575     cdef xmlNode* _c_node
 576     cdef object _tag
 577
 578     def _init(self):
 579         u"""_init(self)
 580
 581         Called after object initialisation.  Custom subclasses may override
 582         this if they recursively call _init() in the superclasses.
 583         """
 584
 585     def __dealloc__(self):
 586         #print "trying to free node:", <int>self._c_node
 587         #displayNode(self._c_node, 0)
 588         if self._c_node is not NULL:
 589             _unregisterProxy(self)
 590             attemptDeallocation(self._c_node)
 591         _releaseProxy(self)
 592
 593     # MANIPULATORS
 594
 595     def __setitem__(self, x, value):
 596         u"""__setitem__(self, x, value)
 597
 598         Replaces the given subelement index or slice.
 599         """
 600         cdef xmlNode* c_node = NULL
 601         cdef xmlNode* c_next
 602         cdef xmlDoc* c_source_doc
 603         cdef _Element element
 604         cdef bint left_to_right
 605         cdef Py_ssize_t slicelength = 0, step = 0
 606         _assertValidNode(self)
 607         if value is None:
 608             raise ValueError, u"cannot assign None"
 609         if python.PySlice_Check(x):
 610             # slice assignment
 611             _findChildSlice(<slice>x, self._c_node, &c_node, &step, &slicelength)
 612             if step > 0:
 613                 left_to_right = 1
 614             else:
 615                 left_to_right = 0
 616                 step = -step
 617             _replaceSlice(self, c_node, slicelength, step, left_to_right, value)
 618             return
 619         else:
 620             # otherwise: normal item assignment
 621             element = value
 622             _assertValidNode(element)
 623             c_node = _findChild(self._c_node, x)
 624             if c_node is NULL:
 625                 raise IndexError, u"list index out of range"
 626             c_source_doc = element._c_node.doc
 627             c_next = element._c_node.next
 628             _removeText(c_node.next)
 629             tree.xmlReplaceNode(c_node, element._c_node)
 630             _moveTail(c_next, element._c_node)
 631             moveNodeToDocument(self._doc, c_source_doc, element._c_node)
 632             if not attemptDeallocation(c_node):
 633                 moveNodeToDocument(self._doc, c_node.doc, c_node)
 634
 635     def __delitem__(self, x):
 636         u"""__delitem__(self, x)
 637
 638         Deletes the given subelement or a slice.
 639         """
 640         cdef xmlNode* c_node = NULL
 641         cdef xmlNode* c_next
 642         cdef Py_ssize_t step = 0, slicelength = 0
 643         _assertValidNode(self)
 644         if python.PySlice_Check(x):
 645             # slice deletion
 646             if _isFullSlice(<slice>x):
 647                 c_node = self._c_node.children
 648                 if c_node is not NULL:
 649                     if not _isElement(c_node):
 650                         c_node = _nextElement(c_node)
 651                     while c_node is not NULL:
 652                         c_next = _nextElement(c_node)
 653                         _removeNode(self._doc, c_node)
 654                         c_node = c_next
 655             else:
 656                 _findChildSlice(<slice>x, self._c_node, &c_node, &step, &slicelength)
 657                 _deleteSlice(self._doc, c_node, slicelength, step)
 658         else:
 659             # item deletion
 660             c_node = _findChild(self._c_node, x)
 661             if c_node is NULL:
 662                 raise IndexError, u"index out of range: %d" % x
 663             _removeText(c_node.next)
 664             _removeNode(self._doc, c_node)
 665
 666     def __deepcopy__(self, memo):
 667         u"__deepcopy__(self, memo)"
 668         return self.__copy__()
 669
 670     def __copy__(self):
 671         u"__copy__(self)"
 672         cdef xmlDoc* c_doc
 673         cdef xmlNode* c_node
 674         cdef _Document new_doc
 675         _assertValidNode(self)
 676         c_doc = _copyDocRoot(self._doc._c_doc, self._c_node) # recursive
 677         new_doc = _documentFactory(c_doc, self._doc._parser)
 678         root = new_doc.getroot()
 679         if root is not None:
 680             return root
 681         # Comment/PI
 682         c_node = c_doc.children
 683         while c_node is not NULL and c_node.type != self._c_node.type:
 684             c_node = c_node.next
 685         if c_node is NULL:
 686             return None
 687         return _elementFactory(new_doc, c_node)
 688
 689     def set(self, key, value):
 690         u"""set(self, key, value)
 691
 692         Sets an element attribute.
 693         """
 694         _assertValidNode(self)
 695         _setAttributeValue(self, key, value)
 696
 697     def append(self, _Element element not None):
 698         u"""append(self, element)
 699
 700         Adds a subelement to the end of this element.
 701         """
 702         _assertValidNode(self)
 703         _assertValidNode(element)
 704         _appendChild(self, element)
 705
 706     def addnext(self, _Element element not None):
 707         u"""addnext(self, element)
 708
 709         Adds the element as a following sibling directly after this
 710         element.
 711
 712         This is normally used to set a processing instruction or comment after
 713         the root node of a document.  Note that tail text is automatically
 714         discarded when adding at the root level.
 715         """
 716         _assertValidNode(self)
 717         _assertValidNode(element)
 718         if self._c_node.parent != NULL and not _isElement(self._c_node.parent):
 719             if element._c_node.type != tree.XML_PI_NODE:
 720                 if element._c_node.type != tree.XML_COMMENT_NODE:
 721                     raise TypeError, u"Only processing instructions and comments can be siblings of the root element"
 722             element.tail = None
 723         _appendSibling(self, element)
 724
 725     def addprevious(self, _Element element not None):
 726         u"""addprevious(self, element)
 727
 728         Adds the element as a preceding sibling directly before this
 729         element.
 730
 731         This is normally used to set a processing instruction or comment
 732         before the root node of a document.  Note that tail text is
 733         automatically discarded when adding at the root level.
 734         """
 735         _assertValidNode(self)
 736         _assertValidNode(element)
 737         if self._c_node.parent != NULL and not _isElement(self._c_node.parent):
 738             if element._c_node.type != tree.XML_PI_NODE:
 739                 if element._c_node.type != tree.XML_COMMENT_NODE:
 740                     raise TypeError, u"Only processing instructions and comments can be siblings of the root element"
 741             element.tail = None
 742         _prependSibling(self, element)
 743
 744     def extend(self, elements):
 745         u"""extend(self, elements)
 746
 747         Extends the current children by the elements in the iterable.
 748         """
 749         cdef _Element element
 750         _assertValidNode(self)
 751         for element in elements:
 752             if element is None:
 753                 raise TypeError, u"Node must not be None"
 754             _assertValidNode(element)
 755             _appendChild(self, element)
 756
 757     def clear(self):
 758         u"""clear(self)
 759
 760         Resets an element.  This function removes all subelements, clears
 761         all attributes and sets the text and tail properties to None.
 762         """
 763         cdef xmlAttr* c_attr
 764         cdef xmlAttr* c_attr_next
 765         cdef xmlNode* c_node
 766         cdef xmlNode* c_node_next
 767         _assertValidNode(self)
 768         c_node = self._c_node
 769         # remove self.text and self.tail
 770         _removeText(c_node.children)
 771         _removeText(c_node.next)
 772         # remove all attributes
 773         c_attr = c_node.properties
 774         while c_attr is not NULL:
 775             c_attr_next = c_attr.next
 776             tree.xmlRemoveProp(c_attr)
 777             c_attr = c_attr_next
 778         # remove all subelements
 779         c_node = c_node.children
 780         if c_node is not NULL:
 781             if not _isElement(c_node):
 782                 c_node = _nextElement(c_node)
 783             while c_node is not NULL:
 784                 c_node_next = _nextElement(c_node)
 785                 _removeNode(self._doc, c_node)
 786                 c_node = c_node_next
 787
 788     def insert(self, index, _Element element not None):
 789         u"""insert(self, index, element)
 790
 791         Inserts a subelement at the given position in this element
 792         """
 793         cdef xmlNode* c_node
 794         cdef xmlNode* c_next
 795         cdef xmlDoc* c_source_doc
 796         _assertValidNode(self)
 797         _assertValidNode(element)
 798         c_node = _findChild(self._c_node, index)
 799         if c_node is NULL:
 800             _appendChild(self, element)
 801             return
 802         c_source_doc = c_node.doc
 803         c_next = element._c_node.next
 804         tree.xmlAddPrevSibling(c_node, element._c_node)
 805         _moveTail(c_next, element._c_node)
 806         moveNodeToDocument(self._doc, c_source_doc, element._c_node)
 807
 808     def remove(self, _Element element not None):
 809         u"""remove(self, element)
 810
 811         Removes a matching subelement. Unlike the find methods, this
 812         method compares elements based on identity, not on tag value
 813         or contents.
 814         """
 815         cdef xmlNode* c_node
 816         cdef xmlNode* c_next
 817         _assertValidNode(self)
 818         _assertValidNode(element)
 819         c_node = element._c_node
 820         if c_node.parent is not self._c_node:
 821             raise ValueError, u"Element is not a child of this node."
 822         c_next = element._c_node.next
 823         tree.xmlUnlinkNode(c_node)
 824         _moveTail(c_next, c_node)
 825         # fix namespace declarations
 826         moveNodeToDocument(self._doc, c_node.doc, c_node)
 827
 828     def replace(self, _Element old_element not None,
 829                 _Element new_element not None):
 830         u"""replace(self, old_element, new_element)
 831
 832         Replaces a subelement with the element passed as second argument.
 833         """
 834         cdef xmlNode* c_old_node
 835         cdef xmlNode* c_old_next
 836         cdef xmlNode* c_new_node
 837         cdef xmlNode* c_new_next
 838         cdef xmlDoc* c_source_doc
 839         _assertValidNode(self)
 840         _assertValidNode(old_element)
 841         _assertValidNode(new_element)
 842         c_old_node = old_element._c_node
 843         if c_old_node.parent is not self._c_node:
 844             raise ValueError, u"Element is not a child of this node."
 845         c_old_next = c_old_node.next
 846         c_new_node = new_element._c_node
 847         c_new_next = c_new_node.next
 848         c_source_doc = c_new_node.doc
 849         tree.xmlReplaceNode(c_old_node, c_new_node)
 850         _moveTail(c_new_next, c_new_node)
 851         _moveTail(c_old_next, c_old_node)
 852         moveNodeToDocument(self._doc, c_source_doc, c_new_node)
 853         # fix namespace declarations
 854         moveNodeToDocument(self._doc, c_old_node.doc, c_old_node)
 855
 856     # PROPERTIES
 857     property tag:
 858         u"""Element tag
 859         """
 860         def __get__(self):
 861             if self._tag is not None:
 862                 return self._tag
 863             _assertValidNode(self)
 864             self._tag = _namespacedName(self._c_node)
 865             return self._tag
 866
 867         def __set__(self, value):
 868             cdef _BaseParser parser
 869             _assertValidNode(self)
 870             ns, name = _getNsTag(value)
 871             parser = self._doc._parser
 872             if parser is not None and parser._for_html:
 873                 _htmlTagValidOrRaise(name)
 874             else:
 875                 _tagValidOrRaise(name)
 876             self._tag = value
 877             tree.xmlNodeSetName(self._c_node, _cstr(name))
 878             if ns is None:
 879                 self._c_node.ns = NULL
 880             else:
 881                 self._doc._setNodeNs(self._c_node, _cstr(ns))
 882
 883     property attrib:
 884         u"""Element attribute dictionary. Where possible, use get(), set(),
 885         keys(), values() and items() to access element attributes.
 886         """
 887         def __get__(self):
 888             _assertValidNode(self)
 889             return _Attrib(self)
 890
 891     property text:
 892         u"""Text before the first subelement. This is either a string or
 893         the value None, if there was no text.
 894         """
 895         def __get__(self):
 896             _assertValidNode(self)
 897             return _collectText(self._c_node.children)
 898
 899         def __set__(self, value):
 900             _assertValidNode(self)
 901             if isinstance(value, QName):
 902                 value = python.PyUnicode_FromEncodedObject(
 903                     _resolveQNameText(self, value), 'UTF-8', 'strict')
 904             _setNodeText(self._c_node, value)
 905
 906         # using 'del el.text' is the wrong thing to do
 907         #def __del__(self):
 908         #    _setNodeText(self._c_node, None)
 909
 910     property tail:
 911         u"""Text after this element's end tag, but before the next sibling
 912         element's start tag. This is either a string or the value None, if
 913         there was no text.
 914         """
 915         def __get__(self):
 916             _assertValidNode(self)
 917             return _collectText(self._c_node.next)
 918
 919         def __set__(self, value):
 920             _assertValidNode(self)
 921             _setTailText(self._c_node, value)
 922
 923         # using 'del el.tail' is the wrong thing to do
 924         #def __del__(self):
 925         #    _setTailText(self._c_node, None)
 926
 927     # not in ElementTree, read-only
 928     property prefix:
 929         u"""Namespace prefix or None.
 930         """
 931         def __get__(self):
 932             if self._c_node.ns is not NULL:
 933                 if self._c_node.ns.prefix is not NULL:
 934                     return funicode(self._c_node.ns.prefix)
 935             return None
 936
 937     # not in ElementTree, read-only
 938     property sourceline:
 939         u"""Original line number as found by the parser or None if unknown.
 940         """
 941         def __get__(self):
 942             cdef long line
 943             _assertValidNode(self)
 944             line = tree.xmlGetLineNo(self._c_node)
 945             if line > 0:
 946                 return line
 947             else:
 948                 return None
 949
 950         def __set__(self, line):
 951             _assertValidNode(self)
 952             if line < 0:
 953                 self._c_node.line = 0
 954             else:
 955                 self._c_node.line = line
 956
 957     # not in ElementTree, read-only
 958     property nsmap:
 959         u"""Namespace prefix->URI mapping known in the context of this
 960         Element.  This includes all namespace declarations of the
 961         parents.
 962
 963         Note that changing the returned dict has no effect on the Element.
 964         """
 965         def __get__(self):
 966             cdef xmlNode* c_node
 967             cdef xmlNs* c_ns
 968             cdef dict nsmap = {}
 969             _assertValidNode(self)
 970             c_node = self._c_node
 971             while c_node is not NULL and c_node.type == tree.XML_ELEMENT_NODE:
 972                 c_ns = c_node.nsDef
 973                 while c_ns is not NULL:
 974                     prefix = None if c_ns.prefix is NULL else funicode(c_ns.prefix)
 975                     if prefix not in nsmap:
 976                         nsmap[prefix] = None if c_ns.href is NULL else funicode(c_ns.href)
 977                     c_ns = c_ns.next
 978                 c_node = c_node.parent
 979             return nsmap
 980
 981     # not in ElementTree, read-only
 982     property base:
 983         u"""The base URI of the Element (xml:base or HTML base URL).
 984         None if the base URI is unknown.
 985
 986         Note that the value depends on the URL of the document that
 987         holds the Element if there is no xml:base attribute on the
 988         Element or its ancestors.
 989
 990         Setting this property will set an xml:base attribute on the
 991         Element, regardless of the document type (XML or HTML).
 992         """
 993         def __get__(self):
 994             cdef char* c_base
 995             _assertValidNode(self)
 996             c_base = tree.xmlNodeGetBase(self._doc._c_doc, self._c_node)
 997             if c_base is NULL:
 998                 if self._doc._c_doc.URL is NULL:
 999                     return None
1000                 return _decodeFilename(self._doc._c_doc.URL)
1001             base = _decodeFilename(c_base)
1002             tree.xmlFree(c_base)
1003             return base
1004
1005         def __set__(self, url):
1006             cdef char* c_base
1007             _assertValidNode(self)
1008             if url is None:
1009                 c_base = NULL
1010             else:
1011                 url = _encodeFilename(url)
1012                 c_base = _cstr(url)
1013             tree.xmlNodeSetBase(self._c_node, c_base)
1014
1015     # ACCESSORS
1016     def __repr__(self):
1017         u"__repr__(self)"
1018         return u"<Element %s at 0x%x>" % (self.tag, id(self))
1019
1020     def __getitem__(self, x):
1021         u"""Returns the subelement at the given position or the requested
1022         slice.
1023         """
1024         cdef xmlNode* c_node = NULL
1025         cdef Py_ssize_t step = 0, slicelength = 0
1026         cdef Py_ssize_t c, i
1027         cdef _node_to_node_function next_element
1028         cdef list result
1029         _assertValidNode(self)
1030         if python.PySlice_Check(x):
1031             # slicing
1032             if _isFullSlice(<slice>x):
1033                 return _collectChildren(self)
1034             _findChildSlice(<slice>x, self._c_node, &c_node, &step, &slicelength)
1035             if c_node is NULL:
1036                 return []
1037             if step > 0:
1038                 next_element = _nextElement
1039             else:
1040                 step = -step
1041                 next_element = _previousElement
1042             result = []
1043             c = 0
1044             while c_node is not NULL and c < slicelength:
1045                 result.append(_elementFactory(self._doc, c_node))
1046                 c = c + 1
1047                 for i from 0 <= i < step:
1048                     c_node = next_element(c_node)
1049             return result
1050         else:
1051             # indexing
1052             c_node = _findChild(self._c_node, x)
1053             if c_node is NULL:
1054                 raise IndexError, u"list index out of range"
1055             return _elementFactory(self._doc, c_node)
1056
1057     def __len__(self):
1058         u"""__len__(self)
1059
1060         Returns the number of subelements.
1061         """
1062         _assertValidNode(self)
1063         return _countElements(self._c_node.children)
1064
1065     def __nonzero__(self):
1066         #u"__nonzero__(self)" # currently fails in Py3.1
1067         import warnings
1068         warnings.warn(
1069             u"The behavior of this method will change in future versions. "
1070             u"Use specific 'len(elem)' or 'elem is not None' test instead.",
1071             FutureWarning
1072             )
1073         # emulate old behaviour
1074         _assertValidNode(self)
1075         return _hasChild(self._c_node)
1076
1077     def __contains__(self, element):
1078         u"__contains__(self, element)"
1079         cdef xmlNode* c_node
1080         _assertValidNode(self)
1081         if not isinstance(element, _Element):
1082             return 0
1083         c_node = (<_Element>element)._c_node
1084         return c_node is not NULL and c_node.parent is self._c_node
1085
1086     def __iter__(self):
1087         u"__iter__(self)"
1088         return ElementChildIterator(self)
1089
1090     def __reversed__(self):
1091         u"__reversed__(self)"
1092         return ElementChildIterator(self, reversed=True)
1093
1094     def index(self, _Element child not None, start=None, stop=None):
1095         u"""index(self, child, start=None, stop=None)
1096
1097         Find the position of the child within the parent.
1098
1099         This method is not part of the original ElementTree API.
1100         """
1101         cdef Py_ssize_t k, l
1102         cdef Py_ssize_t c_start, c_stop
1103         cdef xmlNode* c_child
1104         cdef xmlNode* c_start_node
1105         _assertValidNode(self)
1106         _assertValidNode(child)
1107         c_child = child._c_node
1108         if c_child.parent is not self._c_node:
1109             raise ValueError, u"Element is not a child of this node."
1110
1111         # handle the unbounded search straight away (normal case)
1112         if stop is None and (start is None or start == 0):
1113             k = 0
1114             c_child = c_child.prev
1115             while c_child is not NULL:
1116                 if _isElement(c_child):
1117                     k += 1
1118                 c_child = c_child.prev
1119             return k
1120
1121         # check indices
1122         if start is None:
1123             c_start = 0
1124         else:
1125             c_start = start
1126         if stop is None:
1127             c_stop = 0
1128         else:
1129             c_stop = stop
1130             if c_stop == 0 or \
1131                    c_start >= c_stop and (c_stop > 0 or c_start < 0):
1132                 raise ValueError, u"list.index(x): x not in slice"
1133
1134         # for negative slice indices, check slice before searching index
1135         if c_start < 0 or c_stop < 0:
1136             # start from right, at most up to leftmost(c_start, c_stop)
1137             if c_start < c_stop:
1138                 k = -c_start
1139             else:
1140                 k = -c_stop
1141             c_start_node = self._c_node.last
1142             l = 1
1143             while c_start_node != c_child and l < k:
1144                 if _isElement(c_start_node):
1145                     l += 1
1146                 c_start_node = c_start_node.prev
1147             if c_start_node == c_child:
1148                 # found! before slice end?
1149                 if c_stop < 0 and l <= -c_stop:
1150                     raise ValueError, u"list.index(x): x not in slice"
1151             elif c_start < 0:
1152                 raise ValueError, u"list.index(x): x not in slice"
1153
1154         # now determine the index backwards from child
1155         c_child = c_child.prev
1156         k = 0
1157         if c_stop > 0:
1158             # we can optimize: stop after c_stop elements if not found
1159             while c_child != NULL and k < c_stop:
1160                 if _isElement(c_child):
1161                     k += 1
1162                 c_child = c_child.prev
1163             if k < c_stop:
1164                 return k
1165         else:
1166             # traverse all
1167             while c_child != NULL:
1168                 if _isElement(c_child):
1169                     k = k + 1
1170                 c_child = c_child.prev
1171             if c_start > 0:
1172                 if k >= c_start:
1173                     return k
1174             else:
1175                 return k
1176         if c_start != 0 or c_stop != 0:
1177             raise ValueError, u"list.index(x): x not in slice"
1178         else:
1179             raise ValueError, u"list.index(x): x not in list"
1180
1181     def get(self, key, default=None):
1182         u"""get(self, key, default=None)
1183
1184         Gets an element attribute.
1185         """
1186         _assertValidNode(self)
1187         return _getAttributeValue(self, key, default)
1188
1189     def keys(self):
1190         u"""keys(self)
1191
1192         Gets a list of attribute names.  The names are returned in an
1193         arbitrary order (just like for an ordinary Python dictionary).
1194         """
1195         _assertValidNode(self)
1196         return _collectAttributes(self._c_node, 1)
1197
1198     def values(self):
1199         u"""values(self)
1200
1201         Gets element attribute values as a sequence of strings.  The
1202         attributes are returned in an arbitrary order.
1203         """
1204         _assertValidNode(self)
1205         return _collectAttributes(self._c_node, 2)
1206
1207     def items(self):
1208         u"""items(self)
1209
1210         Gets element attributes, as a sequence. The attributes are returned in
1211         an arbitrary order.
1212         """
1213         _assertValidNode(self)
1214         return _collectAttributes(self._c_node, 3)
1215
1216     def getchildren(self):
1217         u"""getchildren(self)
1218
1219         Returns all direct children.  The elements are returned in document
1220         order.
1221
1222         :deprecated: Note that this method has been deprecated as of
1223           ElementTree 1.3 and lxml 2.0.  New code should use
1224           ``list(element)`` or simply iterate over elements.
1225         """
1226         _assertValidNode(self)
1227         return _collectChildren(self)
1228
1229     def getparent(self):
1230         u"""getparent(self)
1231
1232         Returns the parent of this element or None for the root element.
1233         """
1234         cdef xmlNode* c_node
1235         #_assertValidNode(self) # not needed
1236         c_node = _parentElement(self._c_node)
1237         if c_node is NULL:
1238             return None
1239         return _elementFactory(self._doc, c_node)
1240
1241     def getnext(self):
1242         u"""getnext(self)
1243
1244         Returns the following sibling of this element or None.
1245         """
1246         cdef xmlNode* c_node
1247         #_assertValidNode(self) # not needed
1248         c_node = _nextElement(self._c_node)
1249         if c_node is NULL:
1250             return None
1251         return _elementFactory(self._doc, c_node)
1252
1253     def getprevious(self):
1254         u"""getprevious(self)
1255
1256         Returns the preceding sibling of this element or None.
1257         """
1258         cdef xmlNode* c_node
1259         #_assertValidNode(self) # not needed
1260         c_node = _previousElement(self._c_node)
1261         if c_node is NULL:
1262             return None
1263         return _elementFactory(self._doc, c_node)
1264
1265     def itersiblings(self, tag=None, *, preceding=False):
1266         u"""itersiblings(self, tag=None, preceding=False)
1267
1268         Iterate over the following or preceding siblings of this element.
1269
1270         The direction is determined by the 'preceding' keyword which
1271         defaults to False, i.e. forward iteration over the following
1272         siblings.  When True, the iterator yields the preceding
1273         siblings in reverse document order, i.e. starting right before
1274         the current element and going left.  The generated elements
1275         can be restricted to a specific tag name with the 'tag'
1276         keyword.
1277         """
1278         return SiblingsIterator(self, tag, preceding=preceding)
1279
1280     def iterancestors(self, tag=None):
1281         u"""iterancestors(self, tag=None)
1282
1283         Iterate over the ancestors of this element (from parent to parent).
1284
1285         The generated elements can be restricted to a specific tag name with
1286         the 'tag' keyword.
1287         """
1288         return AncestorsIterator(self, tag)
1289
1290     def iterdescendants(self, tag=None):
1291         u"""iterdescendants(self, tag=None)
1292
1293         Iterate over the descendants of this element in document order.
1294
1295         As opposed to ``el.iter()``, this iterator does not yield the element
1296         itself.  The generated elements can be restricted to a specific tag
1297         name with the 'tag' keyword.
1298         """
1299         return ElementDepthFirstIterator(self, tag, inclusive=False)
1300
1301     def iterchildren(self, tag=None, *, reversed=False):
1302         u"""iterchildren(self, tag=None, reversed=False)
1303
1304         Iterate over the children of this element.
1305
1306         As opposed to using normal iteration on this element, the generated
1307         elements can be restricted to a specific tag name with the 'tag'
1308         keyword and reversed with the 'reversed' keyword.
1309         """
1310         return ElementChildIterator(self, tag, reversed=reversed)
1311
1312     def getroottree(self):
1313         u"""getroottree(self)
1314
1315         Return an ElementTree for the root node of the document that
1316         contains this element.
1317
1318         This is the same as following element.getparent() up the tree until it
1319         returns None (for the root element) and then build an ElementTree for
1320         the last parent that was returned."""
1321         _assertValidDoc(self._doc)
1322         return _elementTreeFactory(self._doc, None)
1323
1324     def getiterator(self, tag=None):
1325         u"""getiterator(self, tag=None)
1326
1327         Returns a sequence or iterator of all elements in the subtree in
1328         document order (depth first pre-order), starting with this
1329         element.
1330
1331         Can be restricted to find only elements with a specific tag
1332         (pass ``tag="xyz"``) or from a namespace (pass ``tag="{ns}*"``).
1333
1334         You can also pass the Element, Comment, ProcessingInstruction and
1335         Entity factory functions to look only for the specific element type.
1336
1337         :deprecated: Note that this method is deprecated as of
1338           ElementTree 1.3 and lxml 2.0.  It returns an iterator in
1339           lxml, which diverges from the original ElementTree
1340           behaviour.  If you want an efficient iterator, use the
1341           ``element.iter()`` method instead.  You should only use this
1342           method in new code if you require backwards compatibility
1343           with older versions of lxml or ElementTree.
1344         """
1345         return ElementDepthFirstIterator(self, tag)
1346
1347     def iter(self, tag=None):
1348         u"""iter(self, tag=None)
1349
1350         Iterate over all elements in the subtree in document order (depth
1351         first pre-order), starting with this element.
1352
1353         Can be restricted to find only elements with a specific tag
1354         (pass ``tag="xyz"``) or from a namespace (pass ``tag="{ns}*"``).
1355
1356         You can also pass the Element, Comment, ProcessingInstruction and
1357         Entity factory functions to look only for the specific element type.
1358         """
1359         return ElementDepthFirstIterator(self, tag)
1360
1361     def itertext(self, tag=None, *, with_tail=True):
1362         u"""itertext(self, tag=None, with_tail=True)
1363
1364         Iterates over the text content of a subtree.
1365
1366         You can pass the ``tag`` keyword argument to restrict text content to
1367         a specific tag name.
1368
1369         You can set the ``with_tail`` keyword argument to ``False`` to skip
1370         over tail text.
1371         """
1372         return ElementTextIterator(self, tag, with_tail=with_tail)
1373
1374     def makeelement(self, _tag, attrib=None, nsmap=None, **_extra):
1375         u"""makeelement(self, _tag, attrib=None, nsmap=None, **_extra)
1376
1377         Creates a new element associated with the same document.
1378         """
1379         _assertValidDoc(self._doc)
1380         return _makeElement(_tag, NULL, self._doc, None, None, None,
1381                             attrib, nsmap, _extra)
1382
1383     def find(self, path, namespaces=None):
1384         u"""find(self, path, namespaces=None)
1385
1386         Finds the first matching subelement, by tag name or path.
1387
1388         The optional ``namespaces`` argument accepts a
1389         prefix-to-namespace mapping that allows the usage of XPath
1390         prefixes in the path expression.
1391         """
1392         if isinstance(path, QName):
1393             path = (<QName>path).text
1394         return _elementpath.find(self, path, namespaces)
1395
1396     def findtext(self, path, default=None, namespaces=None):
1397         u"""findtext(self, path, default=None, namespaces=None)
1398
1399         Finds text for the first matching subelement, by tag name or path.
1400
1401         The optional ``namespaces`` argument accepts a
1402         prefix-to-namespace mapping that allows the usage of XPath
1403         prefixes in the path expression.
1404         """
1405         if isinstance(path, QName):
1406             path = (<QName>path).text
1407         return _elementpath.findtext(self, path, default, namespaces)
1408
1409     def findall(self, path, namespaces=None):
1410         u"""findall(self, path, namespaces=None)
1411
1412         Finds all matching subelements, by tag name or path.
1413
1414         The optional ``namespaces`` argument accepts a
1415         prefix-to-namespace mapping that allows the usage of XPath
1416         prefixes in the path expression.
1417         """
1418         if isinstance(path, QName):
1419             path = (<QName>path).text
1420         return _elementpath.findall(self, path, namespaces)
1421
1422     def iterfind(self, path, namespaces=None):
1423         u"""iterfind(self, path, namespaces=None)
1424
1425         Iterates over all matching subelements, by tag name or path.
1426
1427         The optional ``namespaces`` argument accepts a
1428         prefix-to-namespace mapping that allows the usage of XPath
1429         prefixes in the path expression.
1430         """
1431         if isinstance(path, QName):
1432             path = (<QName>path).text
1433         return _elementpath.iterfind(self, path, namespaces)
1434
1435     def xpath(self, _path, *, namespaces=None, extensions=None,
1436               smart_strings=True, **_variables):
1437         u"""xpath(self, _path, namespaces=None, extensions=None, smart_strings=True, **_variables)
1438
1439         Evaluate an xpath expression using the element as context node.
1440         """
1441         evaluator = XPathElementEvaluator(self, namespaces=namespaces,
1442                                           extensions=extensions,
1443                                           smart_strings=smart_strings)
1444         return evaluator(_path, **_variables)
1445
1446
1447 cdef extern from "etree_defs.h":
1448     # macro call to 't->tp_new()' for fast instantiation
1449     cdef object NEW_ELEMENT "PY_NEW" (object t)
1450
1451 cdef _Element _elementFactory(_Document doc, xmlNode* c_node):
1452     cdef _Element result
1453     result = getProxy(c_node)
1454     if result is not None:
1455         return result
1456     if c_node is NULL:
1457         return None
1458
1459     element_class = LOOKUP_ELEMENT_CLASS(
1460         ELEMENT_CLASS_LOOKUP_STATE, doc, c_node)
1461     if hasProxy(c_node):
1462         # prevent re-entry race condition - we just called into Python
1463         return getProxy(c_node)
1464     result = NEW_ELEMENT(element_class)
1465     if hasProxy(c_node):
1466         # prevent re-entry race condition - we just called into Python
1467         result._c_node = NULL
1468         return getProxy(c_node)
1469
1470     _registerProxy(result, doc, c_node)
1471     if element_class is not _Element:
1472         result._init()
1473     return result
1474
1475
1476 cdef class __ContentOnlyElement(_Element):
1477     cdef int _raiseImmutable(self) except -1:
1478         raise TypeError, u"this element does not have children or attributes"
1479
1480     def set(self, key, value):
1481         u"set(self, key, value)"
1482         self._raiseImmutable()
1483
1484     def append(self, value):
1485         u"append(self, value)"
1486         self._raiseImmutable()
1487
1488     def insert(self, index, value):
1489         u"insert(self, index, value)"
1490         self._raiseImmutable()
1491
1492     def __setitem__(self, index, value):
1493         u"__setitem__(self, index, value)"
1494         self._raiseImmutable()
1495
1496     property attrib:
1497         def __get__(self):
1498             return {}
1499
1500     property text:
1501         def __get__(self):
1502             _assertValidNode(self)
1503             if self._c_node.content is NULL:
1504                 return ''
1505             else:
1506                 return funicode(self._c_node.content)
1507
1508         def __set__(self, value):
1509             cdef tree.xmlDict* c_dict
1510             cdef char* c_text
1511             _assertValidNode(self)
1512             if value is None:
1513                 c_text = NULL
1514             else:
1515                 value = _utf8(value)
1516                 c_text = _cstr(value)
1517             tree.xmlNodeSetContent(self._c_node, c_text)
1518
1519     # ACCESSORS
1520     def __getitem__(self, x):
1521         u"__getitem__(self, x)"
1522         if python.PySlice_Check(x):
1523             return []
1524         else:
1525             raise IndexError, u"list index out of range"
1526
1527     def __len__(self):
1528         u"__len__(self)"
1529         return 0
1530
1531     def get(self, key, default=None):
1532         u"get(self, key, default=None)"
1533         return None
1534
1535     def keys(self):
1536         u"keys(self)"
1537         return []
1538
1539     def items(self):
1540         u"items(self)"
1541         return []
1542
1543     def values(self):
1544         u"values(self)"
1545         return []
1546
1547 cdef class _Comment(__ContentOnlyElement):
1548     property tag:
1549         def __get__(self):
1550             return Comment
1551
1552     def __repr__(self):
1553         return u"<!--%s-->" % self.text
1554
1555 cdef class _ProcessingInstruction(__ContentOnlyElement):
1556     property tag:
1557         def __get__(self):
1558             return ProcessingInstruction
1559
1560     property target:
1561         # not in ElementTree
1562         def __get__(self):
1563             _assertValidNode(self)
1564             return funicode(self._c_node.name)
1565
1566         def __set__(self, value):
1567             _assertValidNode(self)
1568             value = _utf8(value)
1569             c_text = _cstr(value)
1570             tree.xmlNodeSetName(self._c_node, c_text)
1571
1572     def __repr__(self):
1573         text = self.text
1574         if text:
1575             return u"<?%s %s?>" % (self.target, text)
1576         else:
1577             return u"<?%s?>" % self.target
1578
1579     def get(self, key, default=None):
1580         u"""get(self, key, default=None)
1581
1582         Try to parse pseudo-attributes from the text content of the
1583         processing instruction, search for one with the given key as
1584         name and return its associated value.
1585
1586         Note that this is only a convenience method for the most
1587         common case that all text content is structured in
1588         attribute-like name-value pairs with properly quoted values.
1589         It is not guaranteed to work for all possible text content.
1590         """
1591         return self.attrib.get(key, default)
1592
1593     property attrib:
1594         u"""Returns a dict containing all pseudo-attributes that can be
1595         parsed from the text content of this processing instruction.
1596         Note that modifying the dict currently has no effect on the
1597         XML node, although this is not guaranteed to stay this way.
1598         """
1599         def __get__(self):
1600             return { attr : (value1 or value2)
1601                      for attr, value1, value2 in _FIND_PI_ATTRIBUTES(u' ' + self.text) }
1602
1603 cdef object _FIND_PI_ATTRIBUTES = re.compile(ur'\s+(\w+)\s*=\s*(?:\'([^\']*)\'|"([^"]*)")', re.U).findall
1604
1605 cdef class _Entity(__ContentOnlyElement):
1606     property tag:
1607         def __get__(self):
1608             return Entity
1609
1610     property name:
1611         # not in ElementTree
1612         def __get__(self):
1613             _assertValidNode(self)
1614             return funicode(self._c_node.name)
1615
1616         def __set__(self, value):
1617             _assertValidNode(self)
1618             value_utf = _utf8(value)
1619             assert u'&' not in value and u';' not in value, \
1620                 u"Invalid entity name '%s'" % value
1621             tree.xmlNodeSetName(self._c_node, _cstr(value_utf))
1622
1623     property text:
1624         # FIXME: should this be None or '&[VALUE];' or the resolved
1625         # entity value ?
1626         def __get__(self):
1627             _assertValidNode(self)
1628             return u'&%s;' % funicode(self._c_node.name)
1629
1630     def __repr__(self):
1631         return u"&%s;" % self.name
1632
1633
1634 cdef class QName:
1635     u"""QName(text_or_uri_or_element, tag=None)
1636
1637     QName wrapper for qualified XML names.
1638
1639     Pass a tag name by itself or a namespace URI and a tag name to
1640     create a qualified name.  Alternatively, pass an Element to
1641     extract its tag name.
1642
1643     The ``text`` property holds the qualified name in
1644     ``{namespace}tagname`` notation.  The ``namespace`` and
1645     ``localname`` properties hold the respective parts of the tag
1646     name.
1647
1648     You can pass QName objects wherever a tag name is expected.  Also,
1649     setting Element text from a QName will resolve the namespace
1650     prefix and set a qualified text value.  This is helpful in XML
1651     languages like SOAP or XML-Schema that use prefixed tag names in
1652     their text content.
1653     """
1654     cdef readonly object text
1655     cdef readonly object localname
1656     cdef readonly object namespace
1657     def __init__(self, text_or_uri_or_element, tag=None):
1658         if not _isString(text_or_uri_or_element):
1659             if isinstance(text_or_uri_or_element, _Element):
1660                 text_or_uri_or_element = (<_Element>text_or_uri_or_element).tag
1661                 if not _isString(text_or_uri_or_element):
1662                     raise ValueError, (u"Invalid input tag of type %r" %
1663                                        type(text_or_uri_or_element))
1664             elif isinstance(text_or_uri_or_element, QName):
1665                 text_or_uri_or_element = (<QName>text_or_uri_or_element).text
1666             else:
1667                 text_or_uri_or_element = unicode(text_or_uri_or_element)
1668
1669         ns_utf, tag_utf = _getNsTag(text_or_uri_or_element)
1670         if tag is not None:
1671             # either ('ns', 'tag') or ('{ns}oldtag', 'newtag')
1672             if ns_utf is None:
1673                 ns_utf = tag_utf # case 1: namespace ended up as tag name
1674             tag_utf = _utf8(tag)
1675         _tagValidOrRaise(tag_utf)
1676         self.localname = python.PyUnicode_FromEncodedObject(
1677             tag_utf, 'UTF-8', NULL)
1678         if ns_utf is None:
1679             self.namespace = None
1680             self.text = self.localname
1681         else:
1682             self.namespace = python.PyUnicode_FromEncodedObject(
1683                 ns_utf, 'UTF-8', NULL)
1684             self.text = u"{%s}%s" % (self.namespace, self.localname)
1685     def __str__(self):
1686         return self.text
1687     def __hash__(self):
1688         return self.text.__hash__()
1689     def __richcmp__(one, other, int op):
1690         if not _isString(one):
1691             one = unicode(one)
1692         if not _isString(other):
1693             other = unicode(other)
1694         return python.PyObject_RichCompare(one, other, op)
1695
1696
1697 cdef public class _ElementTree [ type LxmlElementTreeType,
1698                                  object LxmlElementTree ]:
1699     cdef _Document _doc
1700     cdef _Element _context_node
1701
1702     # Note that _doc is only used to store the original document if we do not
1703     # have a _context_node.  All methods should prefer self._context_node._doc
1704     # to honour tree restructuring.  _doc can happily be None!
1705
1706     cdef _assertHasRoot(self):
1707         u"""We have to take care here: the document may not have a root node!
1708         This can happen if ElementTree() is called without any argument and
1709         the caller 'forgets' to call parse() afterwards, so this is a bug in
1710         the caller program.
1711         """
1712         assert self._context_node is not None, \
1713                u"ElementTree not initialized, missing root"
1714
1715     def parse(self, source, _BaseParser parser=None, *, base_url=None):
1716         u"""parse(self, source, parser=None, base_url=None)
1717
1718         Updates self with the content of source and returns its root
1719         """
1720         cdef _Document doc = None
1721         try:
1722             doc = _parseDocument(source, parser, base_url)
1723             self._context_node = doc.getroot()
1724             if self._context_node is None:
1725                 self._doc = doc
1726         except _TargetParserResult, result_container:
1727             # raises a TypeError if we don't get an _Element
1728             self._context_node = result_container.result
1729         return self._context_node
1730
1731     def _setroot(self, _Element root not None):
1732         u"""_setroot(self, root)
1733
1734         Relocate the ElementTree to a new root node.
1735         """
1736         _assertValidNode(root)
1737         if root._c_node.type != tree.XML_ELEMENT_NODE:
1738             raise TypeError, u"Only elements can be the root of an ElementTree"
1739         self._context_node = root
1740         self._doc = None
1741
1742     def getroot(self):
1743         u"""getroot(self)
1744
1745         Gets the root element for this tree.
1746         """
1747         return self._context_node
1748
1749     def __copy__(self):
1750         return _elementTreeFactory(self._doc, self._context_node)
1751
1752     def __deepcopy__(self, memo):
1753         cdef _Element root
1754         cdef _Document doc
1755         cdef xmlDoc* c_doc
1756         if self._context_node is not None:
1757             root = self._context_node.__copy__()
1758             _copyNonElementSiblings(self._context_node._c_node, root._c_node)
1759             doc = root._doc
1760             c_doc = self._context_node._doc._c_doc
1761             if c_doc.intSubset is not NULL and doc._c_doc.intSubset is NULL:
1762                 doc._c_doc.intSubset = tree.xmlCopyDtd(c_doc.intSubset)
1763                 if doc._c_doc.intSubset is NULL:
1764                     python.PyErr_NoMemory()
1765             if c_doc.extSubset is not NULL and not doc._c_doc.extSubset is NULL:
1766                 doc._c_doc.extSubset = tree.xmlCopyDtd(c_doc.extSubset)
1767                 if doc._c_doc.extSubset is NULL:
1768                     python.PyErr_NoMemory()
1769             return _elementTreeFactory(None, root)
1770         elif self._doc is not None:
1771             _assertValidDoc(self._doc)
1772             c_doc = tree.xmlCopyDoc(self._doc._c_doc, 1)
1773             if c_doc is NULL:
1774                 python.PyErr_NoMemory()
1775             doc = _documentFactory(c_doc, self._doc._parser)
1776             return _elementTreeFactory(doc, None)
1777         else:
1778             # so what ...
1779             return self
1780
1781     # not in ElementTree, read-only
1782     property docinfo:
1783         u"""Information about the document provided by parser and DTD.  This
1784         value is only defined for ElementTree objects based on the root node
1785         of a parsed document (e.g.  those returned by the parse functions),
1786         not for trees that were built manually.
1787         """
1788         def __get__(self):
1789             self._assertHasRoot()
1790             return DocInfo(self._context_node._doc)
1791
1792     # not in ElementTree, read-only
1793     property parser:
1794         u"""The parser that was used to parse the document in this ElementTree.
1795         """
1796         def __get__(self):
1797             if self._context_node is not None and \
1798                    self._context_node._doc is not None:
1799                 return self._context_node._doc._parser
1800             if self._doc is not None:
1801                 return self._doc._parser
1802             return None
1803
1804     def write(self, file, *, encoding=None, method=u"xml",
1805               pretty_print=False, xml_declaration=None, with_tail=True,
1806               standalone=None, docstring=None, compression=0,
1807               exclusive=False, with_comments=True):
1808         u"""write(self, file, encoding=None, method="xml",
1809                   pretty_print=False, xml_declaration=None, with_tail=True,
1810                   standalone=None, compression=0,
1811                   exclusive=False, with_comments=True)
1812
1813         Write the tree to a filename, file or file-like object.
1814
1815         Defaults to ASCII encoding and writing a declaration as needed.
1816
1817         The keyword argument 'method' selects the output method:
1818         'xml', 'html', 'text' or 'c14n'.  Default is 'xml'.
1819
1820         The ``exclusive`` and ``with_comments`` arguments are only
1821         used with C14N output, where they request exclusive and
1822         uncommented C14N serialisation respectively.
1823
1824         Passing a boolean value to the ``standalone`` option will
1825         output an XML declaration with the corresponding
1826         ``standalone`` flag.
1827
1828         The ``compression`` option enables GZip compression level 1-9.
1829         """
1830         cdef bint write_declaration
1831         cdef int is_standalone
1832         self._assertHasRoot()
1833         _assertValidNode(self._context_node)
1834         if compression is None or compression < 0:
1835             compression = 0
1836         # C14N serialisation
1837         if method == 'c14n':
1838             if encoding is not None:
1839                 raise ValueError("Cannot specify encoding with C14N")
1840             if xml_declaration:
1841                 raise ValueError("Cannot enable XML declaration in C14N")
1842             _tofilelikeC14N(file, self._context_node, exclusive, with_comments,
1843                             compression)
1844             return
1845         if not with_comments:
1846             raise ValueError("Can only discard comments in C14N serialisation")
1847         # suppress decl. in default case (purely for ElementTree compatibility)
1848         if xml_declaration is not None:
1849             write_declaration = xml_declaration
1850             if encoding is None:
1851                 encoding = u'ASCII'
1852             else:
1853                 encoding = encoding.upper()
1854         elif encoding is None:
1855             encoding = u'ASCII'
1856             write_declaration = 0
1857         else:
1858             encoding = encoding.upper()
1859             write_declaration = encoding not in \
1860                                   (u'US-ASCII', u'ASCII', u'UTF8', u'UTF-8')
1861         if standalone is None:
1862             is_standalone = -1
1863         elif standalone:
1864             write_declaration = 1
1865             is_standalone = 1
1866         else:
1867             write_declaration = 1
1868             is_standalone = 0
1869         _tofilelike(file, self._context_node, encoding, docstring, method,
1870                     write_declaration, 1, pretty_print, with_tail,
1871                     is_standalone, compression)
1872
1873     def getpath(self, _Element element not None):
1874         u"""getpath(self, element)
1875
1876         Returns a structural, absolute XPath expression to find that element.
1877         """
1878         cdef _Document doc
1879         cdef _Element root
1880         cdef xmlDoc* c_doc
1881         cdef char* c_path
1882         _assertValidNode(element)
1883         if self._context_node is not None:
1884             root = self._context_node
1885             doc = root._doc
1886         elif self._doc is not None:
1887             doc = self._doc
1888             root = doc.getroot()
1889         else:
1890             raise ValueError, u"Element is not in this tree."
1891         _assertValidDoc(doc)
1892         _assertValidNode(root)
1893         if element._doc is not doc:
1894             raise ValueError, u"Element is not in this tree."
1895
1896         c_doc = _fakeRootDoc(doc._c_doc, root._c_node)
1897         c_path = tree.xmlGetNodePath(element._c_node)
1898         _destroyFakeDoc(doc._c_doc, c_doc)
1899         if c_path is NULL:
1900             python.PyErr_NoMemory()
1901         path = funicode(c_path)
1902         tree.xmlFree(c_path)
1903         return path
1904
1905     def getiterator(self, tag=None):
1906         u"""getiterator(self, tag=None)
1907
1908         Returns a sequence or iterator of all elements in document order
1909         (depth first pre-order), starting with the root element.
1910
1911         Can be restricted to find only elements with a specific tag
1912         (pass ``tag="xyz"`` or ``tag="{ns}xyz"``) or from a namespace
1913         (pass ``tag="{ns}*"``).
1914
1915         You can also pass the Element, Comment, ProcessingInstruction and
1916         Entity factory functions to look only for the specific element type.
1917
1918         :deprecated: Note that this method is deprecated as of
1919           ElementTree 1.3 and lxml 2.0.  It returns an iterator in
1920           lxml, which diverges from the original ElementTree
1921           behaviour.  If you want an efficient iterator, use the
1922           ``tree.iter()`` method instead.  You should only use this
1923           method in new code if you require backwards compatibility
1924           with older versions of lxml or ElementTree.
1925         """
1926         root = self.getroot()
1927         if root is None:
1928             return ()
1929         return root.getiterator(tag)
1930
1931     def iter(self, tag=None):
1932         u"""iter(self, tag=None)
1933
1934         Creates an iterator for the root element.  The iterator loops over
1935         all elements in this tree, in document order.
1936         """
1937         root = self.getroot()
1938         if root is None:
1939             return ()
1940         return root.iter(tag)
1941
1942     def find(self, path, namespaces=None):
1943         u"""find(self, path, namespaces=None)
1944
1945         Finds the first toplevel element with given tag.  Same as
1946         ``tree.getroot().find(path)``.
1947
1948         The optional ``namespaces`` argument accepts a
1949         prefix-to-namespace mapping that allows the usage of XPath
1950         prefixes in the path expression.
1951         """
1952         self._assertHasRoot()
1953         root = self.getroot()
1954         if _isString(path):
1955             start = path[:1]
1956             if start == u"/":
1957                 path = u"." + path
1958             elif start == b"/":
1959                 path = b"." + path
1960         return root.find(path, namespaces)
1961
1962     def findtext(self, path, default=None, namespaces=None):
1963         u"""findtext(self, path, default=None, namespaces=None)
1964
1965         Finds the text for the first element matching the ElementPath
1966         expression.  Same as getroot().findtext(path)
1967
1968         The optional ``namespaces`` argument accepts a
1969         prefix-to-namespace mapping that allows the usage of XPath
1970         prefixes in the path expression.
1971         """
1972         self._assertHasRoot()
1973         root = self.getroot()
1974         if _isString(path):
1975             start = path[:1]
1976             if start == u"/":
1977                 path = u"." + path
1978             elif start == b"/":
1979                 path = b"." + path
1980         return root.findtext(path, default, namespaces)
1981
1982     def findall(self, path, namespaces=None):
1983         u"""findall(self, path, namespaces=None)
1984
1985         Finds all elements matching the ElementPath expression.  Same as
1986         getroot().findall(path).
1987
1988         The optional ``namespaces`` argument accepts a
1989         prefix-to-namespace mapping that allows the usage of XPath
1990         prefixes in the path expression.
1991         """
1992         self._assertHasRoot()
1993         root = self.getroot()
1994         if _isString(path):
1995             start = path[:1]
1996             if start == u"/":
1997                 path = u"." + path
1998             elif start == b"/":
1999                 path = b"." + path
2000         return root.findall(path, namespaces)
2001
2002     def iterfind(self, path, namespaces=None):
2003         u"""iterfind(self, path, namespaces=None)
2004
2005         Iterates over all elements matching the ElementPath expression.
2006         Same as getroot().iterfind(path).
2007
2008         The optional ``namespaces`` argument accepts a
2009         prefix-to-namespace mapping that allows the usage of XPath
2010         prefixes in the path expression.
2011         """
2012         self._assertHasRoot()
2013         root = self.getroot()
2014         if _isString(path):
2015             start = path[:1]
2016             if start == u"/":
2017                 path = u"." + path
2018             elif start == b"/":
2019                 path = b"." + path
2020         return root.iterfind(path, namespaces)
2021
2022     def xpath(self, _path, *, namespaces=None, extensions=None,
2023               smart_strings=True, **_variables):
2024         u"""xpath(self, _path, namespaces=None, extensions=None, smart_strings=True, **_variables)
2025
2026         XPath evaluate in context of document.
2027
2028         ``namespaces`` is an optional dictionary with prefix to namespace URI
2029         mappings, used by XPath.  ``extensions`` defines additional extension
2030         functions.
2031
2032         Returns a list (nodeset), or bool, float or string.
2033
2034         In case of a list result, return Element for element nodes,
2035         string for text and attribute values.
2036
2037         Note: if you are going to apply multiple XPath expressions
2038         against the same document, it is more efficient to use
2039         XPathEvaluator directly.
2040         """
2041         self._assertHasRoot()
2042         evaluator = XPathDocumentEvaluator(self, namespaces=namespaces,
2043                                            extensions=extensions,
2044                                            smart_strings=smart_strings)
2045         return evaluator(_path, **_variables)
2046
2047     def xslt(self, _xslt, extensions=None, access_control=None, **_kw):
2048         u"""xslt(self, _xslt, extensions=None, access_control=None, **_kw)
2049
2050         Transform this document using other document.
2051
2052         xslt is a tree that should be XSLT
2053         keyword parameters are XSLT transformation parameters.
2054
2055         Returns the transformed tree.
2056
2057         Note: if you are going to apply the same XSLT stylesheet against
2058         multiple documents, it is more efficient to use the XSLT
2059         class directly.
2060         """
2061         self._assertHasRoot()
2062         style = XSLT(_xslt, extensions=extensions,
2063                      access_control=access_control)
2064         return style(self, **_kw)
2065
2066     def relaxng(self, relaxng):
2067         u"""relaxng(self, relaxng)
2068
2069         Validate this document using other document.
2070
2071         The relaxng argument is a tree that should contain a Relax NG schema.
2072
2073         Returns True or False, depending on whether validation
2074         succeeded.
2075
2076         Note: if you are going to apply the same Relax NG schema against
2077         multiple documents, it is more efficient to use the RelaxNG
2078         class directly.
2079         """
2080         self._assertHasRoot()
2081         schema = RelaxNG(relaxng)
2082         return schema.validate(self)
2083
2084     def xmlschema(self, xmlschema):
2085         u"""xmlschema(self, xmlschema)
2086
2087         Validate this document using other document.
2088
2089         The xmlschema argument is a tree that should contain an XML Schema.
2090
2091         Returns True or False, depending on whether validation
2092         succeeded.
2093
2094         Note: If you are going to apply the same XML Schema against
2095         multiple documents, it is more efficient to use the XMLSchema
2096         class directly.
2097         """
2098         self._assertHasRoot()
2099         schema = XMLSchema(xmlschema)
2100         return schema.validate(self)
2101
2102     def xinclude(self):
2103         u"""xinclude(self)
2104
2105         Process the XInclude nodes in this document and include the
2106         referenced XML fragments.
2107
2108         There is support for loading files through the file system, HTTP and
2109         FTP.
2110
2111         Note that XInclude does not support custom resolvers in Python space
2112         due to restrictions of libxml2 <= 2.6.29.
2113         """
2114         self._assertHasRoot()
2115         XInclude()(self._context_node)
2116
2117     def write_c14n(self, file, *, exclusive=False, with_comments=True,
2118                    compression=0):
2119         u"""write_c14n(self, file, exclusive=False, with_comments=True,
2120                        compression=0)
2121
2122         C14N write of document. Always writes UTF-8.
2123
2124         The ``compression`` option enables GZip compression level 1-9.
2125         """
2126         self._assertHasRoot()
2127         _assertValidNode(self._context_node)
2128         if compression is None or compression < 0:
2129             compression = 0
2130         _tofilelikeC14N(file, self._context_node, exclusive, with_comments,
2131                         compression)
2132
2133 cdef _ElementTree _elementTreeFactory(_Document doc, _Element context_node):
2134     return _newElementTree(doc, context_node, _ElementTree)
2135
2136 cdef _ElementTree _newElementTree(_Document doc, _Element context_node,
2137                                   object baseclass):
2138     cdef _ElementTree result
2139     result = baseclass()
2140     if context_node is None and doc is not None:
2141         context_node = doc.getroot()
2142     if context_node is None:
2143         _assertValidDoc(doc)
2144         result._doc = doc
2145     else:
2146         _assertValidNode(context_node)
2147     result._context_node = context_node
2148     return result
2149
2150
2151 cdef class _Attrib:
2152     u"""A dict-like proxy for the ``Element.attrib`` property.
2153     """
2154     cdef _Element _element
2155     def __cinit__(self, _Element element not None):
2156         _assertValidNode(element)
2157         self._element = element
2158
2159     # MANIPULATORS
2160     def __setitem__(self, key, value):
2161         _setAttributeValue(self._element, key, value)
2162
2163     def __delitem__(self, key):
2164         _delAttribute(self._element, key)
2165
2166     def update(self, sequence_or_dict):
2167         if isinstance(sequence_or_dict, dict):
2168             sequence_or_dict = sequence_or_dict.items()
2169         for key, value in sequence_or_dict:
2170             _setAttributeValue(self._element, key, value)
2171
2172     def pop(self, key, *default):
2173         if python.PyTuple_GET_SIZE(default) > 1:
2174             raise TypeError, u"pop expected at most 2 arguments, got %d" % (
2175                 python.PyTuple_GET_SIZE(default)+1)
2176         result = _getAttributeValue(self._element, key, None)
2177         if result is None:
2178             if python.PyTuple_GET_SIZE(default) == 0:
2179                 raise KeyError, key
2180             else:
2181                 result = python.PyTuple_GET_ITEM(default, 0)
2182                 python.Py_INCREF(result)
2183         else:
2184             _delAttribute(self._element, key)
2185         return result
2186
2187     def clear(self):
2188         cdef xmlNode* c_node
2189         c_node = self._element._c_node
2190         while c_node.properties is not NULL:
2191             tree.xmlRemoveProp(c_node.properties)
2192
2193     # ACCESSORS
2194     def __repr__(self):
2195         return repr(dict( _attributeIteratorFactory(self._element, 3) ))
2196
2197     def __getitem__(self, key):
2198         result = _getAttributeValue(self._element, key, None)
2199         if result is None:
2200             raise KeyError, key
2201         else:
2202             return result
2203
2204     def __nonzero__(self):
2205         cdef xmlAttr* c_attr
2206         c_attr = self._element._c_node.properties
2207         while c_attr is not NULL:
2208             if c_attr.type == tree.XML_ATTRIBUTE_NODE:
2209                 return 1
2210             c_attr = c_attr.next
2211         return 0
2212
2213     def __len__(self):
2214         cdef xmlAttr* c_attr
2215         cdef Py_ssize_t c
2216         c = 0
2217         c_attr = self._element._c_node.properties
2218         while c_attr is not NULL:
2219             if c_attr.type == tree.XML_ATTRIBUTE_NODE:
2220                 c = c + 1
2221             c_attr = c_attr.next
2222         return c
2223
2224     def get(self, key, default=None):
2225         return _getAttributeValue(self._element, key, default)
2226
2227     def keys(self):
2228         return _collectAttributes(self._element._c_node, 1)
2229
2230     def __iter__(self):
2231         return iter(_collectAttributes(self._element._c_node, 1))
2232
2233     def iterkeys(self):
2234         return iter(_collectAttributes(self._element._c_node, 1))
2235
2236     def values(self):
2237         return _collectAttributes(self._element._c_node, 2)
2238
2239     def itervalues(self):
2240         return iter(_collectAttributes(self._element._c_node, 2))
2241
2242     def items(self):
2243         return _collectAttributes(self._element._c_node, 3)
2244
2245     def iteritems(self):
2246         return iter(_collectAttributes(self._element._c_node, 3))
2247
2248     def has_key(self, key):
2249         if key in self:
2250             return True
2251         else:
2252             return False
2253
2254     def __contains__(self, key):
2255         cdef xmlNode* c_node
2256         cdef char* c_href
2257         ns, tag = _getNsTag(key)
2258         c_node = self._element._c_node
2259         c_href = NULL if ns is None else _cstr(ns)
2260         if tree.xmlHasNsProp(c_node, _cstr(tag), c_href):
2261             return 1
2262         else:
2263             return 0
2264
2265     def __richcmp__(one, other, int op):
2266         if not python.PyDict_Check(one):
2267             one = dict(one)
2268         if not python.PyDict_Check(other):
2269             other = dict(other)
2270         return python.PyObject_RichCompare(one, other, op)
2271
2272
2273 cdef class _AttribIterator:
2274     u"""Attribute iterator - for internal use only!
2275     """
2276     # XML attributes must not be removed while running!
2277     cdef _Element _node
2278     cdef xmlAttr* _c_attr
2279     cdef int _keysvalues # 1 - keys, 2 - values, 3 - items (key, value)
2280     def __iter__(self):
2281         return self
2282
2283     def __next__(self):
2284         cdef xmlAttr* c_attr
2285         if self._node is None:
2286             raise StopIteration
2287         c_attr = self._c_attr
2288         while c_attr is not NULL and c_attr.type != tree.XML_ATTRIBUTE_NODE:
2289             c_attr = c_attr.next
2290         if c_attr is NULL:
2291             self._node = None
2292             raise StopIteration
2293
2294         self._c_attr = c_attr.next
2295         if self._keysvalues == 1:
2296             return _namespacedName(<xmlNode*>c_attr)
2297         elif self._keysvalues == 2:
2298             return _attributeValue(self._node._c_node, c_attr)
2299         else:
2300             return (_namespacedName(<xmlNode*>c_attr),
2301                     _attributeValue(self._node._c_node, c_attr))
2302
2303 cdef object _attributeIteratorFactory(_Element element, int keysvalues):
2304     cdef _AttribIterator attribs
2305     if element._c_node.properties is NULL:
2306         return ITER_EMPTY
2307     attribs = _AttribIterator()
2308     attribs._node = element
2309     attribs._c_attr = element._c_node.properties
2310     attribs._keysvalues = keysvalues
2311     return attribs
2312
2313
2314 cdef public class _ElementTagMatcher [ object LxmlElementTagMatcher,
2315                                        type LxmlElementTagMatcherType ]:
2316     cdef object _pystrings
2317     cdef int _node_type
2318     cdef char* _href
2319     cdef char* _name
2320     cdef _initTagMatch(self, tag):
2321         self._href = NULL
2322         self._name = NULL
2323         if tag is None:
2324             self._node_type = 0
2325         elif tag is Comment:
2326             self._node_type = tree.XML_COMMENT_NODE
2327         elif tag is ProcessingInstruction:
2328             self._node_type = tree.XML_PI_NODE
2329         elif tag is Entity:
2330             self._node_type = tree.XML_ENTITY_REF_NODE
2331         elif tag is Element:
2332             self._node_type = tree.XML_ELEMENT_NODE
2333         else:
2334             self._node_type = tree.XML_ELEMENT_NODE
2335             self._pystrings = _getNsTag(tag)
2336             if self._pystrings[0] is not None:
2337                 self._href = _cstr(self._pystrings[0])
2338             self._name = _cstr(self._pystrings[1])
2339             if self._name[0] == c'*' and self._name[1] == c'\0':
2340                 self._name = NULL
2341
2342 cdef public class _ElementIterator(_ElementTagMatcher) [
2343     object LxmlElementIterator, type LxmlElementIteratorType ]:
2344     # we keep Python references here to control GC
2345     cdef _Element _node
2346     cdef _node_to_node_function _next_element
2347     def __iter__(self):
2348         return self
2349
2350     cdef void _storeNext(self, _Element node):
2351         cdef xmlNode* c_node
2352         c_node = self._next_element(node._c_node)
2353         while c_node is not NULL and \
2354                   self._node_type != 0 and \
2355                   (self._node_type != c_node.type or
2356                    not _tagMatches(c_node, self._href, self._name)):
2357             c_node = self._next_element(c_node)
2358         if c_node is NULL:
2359             self._node = None
2360         else:
2361             # Python ref:
2362             self._node = _elementFactory(node._doc, c_node)
2363
2364     def __next__(self):
2365         cdef xmlNode* c_node
2366         cdef _Element current_node
2367         if self._node is None:
2368             raise StopIteration
2369         # Python ref:
2370         current_node = self._node
2371         self._storeNext(current_node)
2372         return current_node
2373
2374 cdef class ElementChildIterator(_ElementIterator):
2375     u"""ElementChildIterator(self, node, tag=None, reversed=False)
2376     Iterates over the children of an element.
2377     """
2378     def __cinit__(self, _Element node not None, tag=None, *, reversed=False):
2379         cdef xmlNode* c_node
2380         _assertValidNode(node)
2381         self._initTagMatch(tag)
2382         if reversed:
2383             c_node = _findChildBackwards(node._c_node, 0)
2384             self._next_element = _previousElement
2385         else:
2386             c_node = _findChildForwards(node._c_node, 0)
2387             self._next_element = _nextElement
2388         if tag is not None:
2389             while c_node is not NULL and \
2390                       self._node_type != 0 and \
2391                       (self._node_type != c_node.type or
2392                        not _tagMatches(c_node, self._href, self._name)):
2393                 c_node = self._next_element(c_node)
2394         if c_node is not NULL:
2395             # store Python ref:
2396             self._node = _elementFactory(node._doc, c_node)
2397
2398 cdef class SiblingsIterator(_ElementIterator):
2399     u"""SiblingsIterator(self, node, tag=None, preceding=False)
2400     Iterates over the siblings of an element.
2401
2402     You can pass the boolean keyword ``preceding`` to specify the direction.
2403     """
2404     def __cinit__(self, _Element node not None, tag=None, *, preceding=False):
2405         _assertValidNode(node)
2406         self._initTagMatch(tag)
2407         if preceding:
2408             self._next_element = _previousElement
2409         else:
2410             self._next_element = _nextElement
2411         self._storeNext(node)
2412
2413 cdef class AncestorsIterator(_ElementIterator):
2414     u"""AncestorsIterator(self, node, tag=None)
2415     Iterates over the ancestors of an element (from parent to parent).
2416     """
2417     def __cinit__(self, _Element node not None, tag=None):
2418         _assertValidNode(node)
2419         self._initTagMatch(tag)
2420         self._next_element = _parentElement
2421         self._storeNext(node)
2422
2423 cdef class ElementDepthFirstIterator(_ElementTagMatcher):
2424     u"""ElementDepthFirstIterator(self, node, tag=None, inclusive=True)
2425     Iterates over an element and its sub-elements in document order (depth
2426     first pre-order).
2427
2428     Note that this also includes comments, entities and processing
2429     instructions.  To filter them out, check if the ``tag`` property
2430     of the returned element is a string (i.e. not None and not a
2431     factory function), or pass the ``Element`` factory for the ``tag``
2432     keyword.
2433
2434     If the optional ``tag`` argument is not None, the iterator returns only
2435     the elements that match the respective name and namespace.
2436
2437     The optional boolean argument 'inclusive' defaults to True and can be set
2438     to False to exclude the start element itself.
2439
2440     Note that the behaviour of this iterator is completely undefined if the
2441     tree it traverses is modified during iteration.
2442     """
2443     # we keep Python references here to control GC
2444     # keep next node to return and the (s)top node
2445     cdef _Element _next_node
2446     cdef _Element _top_node
2447     def __cinit__(self, _Element node not None, tag=None, *, inclusive=True):
2448         _assertValidNode(node)
2449         self._top_node  = node
2450         self._next_node = node
2451         self._initTagMatch(tag)
2452         if not inclusive or \
2453                tag is not None and \
2454                self._node_type != 0 and \
2455                (self._node_type != node._c_node.type or
2456                 not _tagMatches(node._c_node, self._href, self._name)):
2457             # this cannot raise StopIteration, self._next_node != None
2458             self.__next__()
2459
2460     def __iter__(self):
2461         return self
2462
2463     def __next__(self):
2464         cdef xmlNode* c_node
2465         cdef _Element current_node
2466         if self._next_node is None:
2467             raise StopIteration
2468         current_node = self._next_node
2469         c_node = self._next_node._c_node
2470         if self._name is NULL and self._href is NULL:
2471             c_node = self._nextNodeAnyTag(c_node)
2472         else:
2473             c_node = self._nextNodeMatchTag(c_node)
2474         if c_node is NULL:
2475             self._next_node = None
2476         else:
2477             self._next_node = _elementFactory(current_node._doc, c_node)
2478         return current_node
2479
2480     cdef xmlNode* _nextNodeAnyTag(self, xmlNode* c_node):
2481         cdef int node_type = self._node_type
2482         tree.BEGIN_FOR_EACH_ELEMENT_FROM(self._top_node._c_node, c_node, 0)
2483         if node_type == 0 or node_type == c_node.type:
2484             return c_node
2485         tree.END_FOR_EACH_ELEMENT_FROM(c_node)
2486         return NULL
2487
2488     cdef xmlNode* _nextNodeMatchTag(self, xmlNode* c_node):
2489         cdef char* c_name = NULL
2490         if self._name is not NULL:
2491             c_name = tree.xmlDictExists(c_node.doc.dict, self._name, -1)
2492             if c_name is NULL:
2493                 # not found in dict => not in document at all
2494                 return NULL
2495         tree.BEGIN_FOR_EACH_ELEMENT_FROM(self._top_node._c_node, c_node, 0)
2496         if c_node.type == tree.XML_ELEMENT_NODE:
2497             if (c_name is NULL or c_name is c_node.name) and \
2498                     _tagMatches(c_node, self._href, self._name):
2499                 return c_node
2500         tree.END_FOR_EACH_ELEMENT_FROM(c_node)
2501         return NULL
2502
2503 cdef class ElementTextIterator:
2504     u"""ElementTextIterator(self, element, tag=None, with_tail=True)
2505     Iterates over the text content of a subtree.
2506
2507     You can pass the ``tag`` keyword argument to restrict text content to a
2508     specific tag name.
2509
2510     You can set the ``with_tail`` keyword argument to ``False`` to skip over
2511     tail text.
2512     """
2513     cdef object _nextEvent
2514     cdef _Element _start_element
2515     def __cinit__(self, _Element element not None, tag=None, *, with_tail=True):
2516         _assertValidNode(element)
2517         if with_tail:
2518             events = (u"start", u"end")
2519         else:
2520             events = (u"start",)
2521         self._start_element = element
2522         self._nextEvent = iterwalk(element, events=events, tag=tag).__next__
2523
2524     def __iter__(self):
2525         return self
2526
2527     def __next__(self):
2528         cdef _Element element
2529         result = None
2530         while result is None:
2531             event, element = self._nextEvent() # raises StopIteration
2532             if event == u"start":
2533                 result = element.text
2534             elif element is not self._start_element:
2535                 result = element.tail
2536         return result
2537
2538 cdef xmlNode* _createElement(xmlDoc* c_doc, object name_utf) except NULL:
2539     cdef xmlNode* c_node
2540     c_node = tree.xmlNewDocNode(c_doc, NULL, _cstr(name_utf), NULL)
2541     return c_node
2542
2543 cdef xmlNode* _createComment(xmlDoc* c_doc, char* text):
2544     cdef xmlNode* c_node
2545     c_node = tree.xmlNewDocComment(c_doc, text)
2546     return c_node
2547
2548 cdef xmlNode* _createPI(xmlDoc* c_doc, char* target, char* text):
2549     cdef xmlNode* c_node
2550     c_node = tree.xmlNewDocPI(c_doc, target, text)
2551     return c_node
2552
2553 cdef xmlNode* _createEntity(xmlDoc* c_doc, char* name):
2554     cdef xmlNode* c_node
2555     c_node = tree.xmlNewReference(c_doc, name)
2556     return c_node
2557
2558 # module-level API for ElementTree
2559
2560 def Element(_tag, attrib=None, nsmap=None, **_extra):
2561     u"""Element(_tag, attrib=None, nsmap=None, **_extra)
2562
2563     Element factory.  This function returns an object implementing the
2564     Element interface.
2565
2566     Also look at the `_Element.makeelement()` and
2567     `_BaseParser.makeelement()` methods, which provide a faster way to
2568     create an Element within a specific document or parser context.
2569     """
2570     return _makeElement(_tag, NULL, None, None, None, None,
2571                         attrib, nsmap, _extra)
2572
2573 def Comment(text=None):
2574     u"""Comment(text=None)
2575
2576     Comment element factory. This factory function creates a special element that will
2577     be serialized as an XML comment.
2578     """
2579     cdef _Document doc
2580     cdef xmlNode*  c_node
2581     cdef xmlDoc*   c_doc
2582     if text is None:
2583         text = b''
2584     else:
2585         text = _utf8(text)
2586     c_doc = _newXMLDoc()
2587     doc = _documentFactory(c_doc, None)
2588     c_node = _createComment(c_doc, _cstr(text))
2589     tree.xmlAddChild(<xmlNode*>c_doc, c_node)
2590     return _elementFactory(doc, c_node)
2591
2592 def ProcessingInstruction(target, text=None):
2593     u"""ProcessingInstruction(target, text=None)
2594
2595     ProcessingInstruction element factory. This factory function creates a
2596     special element that will be serialized as an XML processing instruction.
2597     """
2598     cdef _Document doc
2599     cdef xmlNode*  c_node
2600     cdef xmlDoc*   c_doc
2601     target = _utf8(target)
2602     if text is None:
2603         text = b''
2604     else:
2605         text = _utf8(text)
2606     c_doc = _newXMLDoc()
2607     doc = _documentFactory(c_doc, None)
2608     c_node = _createPI(c_doc, _cstr(target), _cstr(text))
2609     tree.xmlAddChild(<xmlNode*>c_doc, c_node)
2610     return _elementFactory(doc, c_node)
2611
2612 PI = ProcessingInstruction
2613
2614 cdef class CDATA:
2615     u"""CDATA(data)
2616
2617     CDATA factory.  This factory creates an opaque data object that
2618     can be used to set Element text.  The usual way to use it is::
2619
2620         >>> from lxml import etree
2621         >>> el = etree.Element('content')
2622         >>> el.text = etree.CDATA('a string')
2623     """
2624     cdef object _utf8_data
2625     def __init__(self, data):
2626         self._utf8_data = _utf8(data)
2627
2628 def Entity(name):
2629     u"""Entity(name)
2630
2631     Entity factory.  This factory function creates a special element
2632     that will be serialized as an XML entity reference or character
2633     reference.  Note, however, that entities will not be automatically
2634     declared in the document.  A document that uses entity references
2635     requires a DTD to define the entities.
2636     """
2637     cdef _Document doc
2638     cdef xmlNode*  c_node
2639     cdef xmlDoc*   c_doc
2640     cdef char* c_name
2641     name_utf = _utf8(name)
2642     c_name = _cstr(name_utf)
2643     if c_name[0] == c'#':
2644         if not _characterReferenceIsValid(c_name + 1):
2645             raise ValueError, u"Invalid character reference: '%s'" % name
2646     elif not _xmlNameIsValid(c_name):
2647         raise ValueError, u"Invalid entity reference: '%s'" % name
2648     c_doc = _newXMLDoc()
2649     doc = _documentFactory(c_doc, None)
2650     c_node = _createEntity(c_doc, c_name)
2651     tree.xmlAddChild(<xmlNode*>c_doc, c_node)
2652     return _elementFactory(doc, c_node)
2653
2654 def SubElement(_Element _parent not None, _tag,
2655                attrib=None, nsmap=None, **_extra):
2656     u"""SubElement(_parent, _tag, attrib=None, nsmap=None, **_extra)
2657
2658     Subelement factory.  This function creates an element instance, and
2659     appends it to an existing element.
2660     """
2661     return _makeSubElement(_parent, _tag, None, None, attrib, nsmap, _extra)
2662
2663 def ElementTree(_Element element=None, *, file=None, _BaseParser parser=None):
2664     u"""ElementTree(element=None, file=None, parser=None)
2665
2666     ElementTree wrapper class.
2667     """
2668     cdef xmlNode* c_next
2669     cdef xmlNode* c_node
2670     cdef xmlNode* c_node_copy
2671     cdef xmlDoc*  c_doc
2672     cdef _ElementTree etree
2673     cdef _Document doc
2674
2675     if element is not None:
2676         doc  = element._doc
2677     elif file is not None:
2678         try:
2679             doc = _parseDocument(file, parser, None)
2680         except _TargetParserResult, result_container:
2681             return result_container.result
2682     else:
2683         c_doc = _newXMLDoc()
2684         doc = _documentFactory(c_doc, parser)
2685
2686     return _elementTreeFactory(doc, element)
2687
2688 def HTML(text, _BaseParser parser=None, *, base_url=None):
2689     u"""HTML(text, parser=None, base_url=None)
2690
2691     Parses an HTML document from a string constant.  Returns the root
2692     node (or the result returned by a parser target).  This function
2693     can be used to embed "HTML literals" in Python code.
2694
2695     To override the parser with a different ``HTMLParser`` you can pass it to
2696     the ``parser`` keyword argument.
2697
2698     The ``base_url`` keyword argument allows to set the original base URL of
2699     the document to support relative Paths when looking up external entities
2700     (DTD, XInclude, ...).
2701     """
2702     cdef _Document doc
2703     if parser is None:
2704         parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
2705         if not isinstance(parser, HTMLParser):
2706             parser = __DEFAULT_HTML_PARSER
2707     try:
2708         doc = _parseMemoryDocument(text, base_url, parser)
2709         return doc.getroot()
2710     except _TargetParserResult, result_container:
2711         return result_container.result
2712
2713 def XML(text, _BaseParser parser=None, *, base_url=None):
2714     u"""XML(text, parser=None, base_url=None)
2715
2716     Parses an XML document or fragment from a string constant.
2717     Returns the root node (or the result returned by a parser target).
2718     This function can be used to embed "XML literals" in Python code,
2719     like in
2720
2721        >>> root = etree.XML("<root><test/></root>")
2722
2723     To override the parser with a different ``XMLParser`` you can pass it to
2724     the ``parser`` keyword argument.
2725
2726     The ``base_url`` keyword argument allows to set the original base URL of
2727     the document to support relative Paths when looking up external entities
2728     (DTD, XInclude, ...).
2729     """
2730     cdef _Document doc
2731     if parser is None:
2732         parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
2733         if not isinstance(parser, XMLParser):
2734             parser = __DEFAULT_XML_PARSER
2735     try:
2736         doc = _parseMemoryDocument(text, base_url, parser)
2737         return doc.getroot()
2738     except _TargetParserResult, result_container:
2739         return result_container.result
2740
2741 def fromstring(text, _BaseParser parser=None, *, base_url=None):
2742     u"""fromstring(text, parser=None, base_url=None)
2743
2744     Parses an XML document or fragment from a string.  Returns the
2745     root node (or the result returned by a parser target).
2746
2747     To override the default parser with a different parser you can pass it to
2748     the ``parser`` keyword argument.
2749
2750     The ``base_url`` keyword argument allows to set the original base URL of
2751     the document to support relative Paths when looking up external entities
2752     (DTD, XInclude, ...).
2753     """
2754     cdef _Document doc
2755     try:
2756         doc = _parseMemoryDocument(text, base_url, parser)
2757         return doc.getroot()
2758     except _TargetParserResult, result_container:
2759         return result_container.result
2760
2761 def fromstringlist(strings, _BaseParser parser=None):
2762     u"""fromstringlist(strings, parser=None)
2763
2764     Parses an XML document from a sequence of strings.  Returns the
2765     root node (or the result returned by a parser target).
2766
2767     To override the default parser with a different parser you can pass it to
2768     the ``parser`` keyword argument.
2769     """
2770     cdef _Document doc
2771     if parser is None:
2772         parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
2773     feed = parser.feed
2774     for data in strings:
2775         feed(data)
2776     return parser.close()
2777
2778 def iselement(element):
2779     u"""iselement(element)
2780
2781     Checks if an object appears to be a valid element object.
2782     """
2783     return isinstance(element, _Element) and (<_Element>element)._c_node is not NULL
2784
2785 def dump(_Element elem not None, *, bint pretty_print=True, bint with_tail=True):
2786     u"""dump(elem, pretty_print=True, with_tail=True)
2787
2788     Writes an element tree or element structure to sys.stdout. This function
2789     should be used for debugging only.
2790     """
2791     _assertValidNode(elem)
2792     _dumpToFile(sys.stdout, elem._c_node, pretty_print, with_tail)
2793
2794 def tostring(element_or_tree, *, encoding=None, method=u"xml",
2795              xml_declaration=None, bint pretty_print=False, bint with_tail=True,
2796              standalone=None, doctype=None,
2797              bint exclusive=False, bint with_comments=True):
2798     u"""tostring(element_or_tree, encoding=None, method="xml",
2799                  xml_declaration=None, pretty_print=False, with_tail=True,
2800                  standalone=None, doctype=None,
2801                  exclusive=False, with_comments=True)
2802
2803     Serialize an element to an encoded string representation of its XML
2804     tree.
2805
2806     Defaults to ASCII encoding without XML declaration.  This
2807     behaviour can be configured with the keyword arguments 'encoding'
2808     (string) and 'xml_declaration' (bool).  Note that changing the
2809     encoding to a non UTF-8 compatible encoding will enable a
2810     declaration by default.
2811
2812     You can also serialise to a Unicode string without declaration by
2813     passing the ``unicode`` function as encoding (or ``str`` in Py3),
2814     or the name 'unicode'.  This changes the return value from a byte
2815     string to an unencoded unicode string.
2816
2817     The keyword argument 'pretty_print' (bool) enables formatted XML.
2818
2819     The keyword argument 'method' selects the output method: 'xml',
2820     'html', plain 'text' (text content without tags) or 'c14n'.
2821     Default is 'xml'.
2822
2823     The ``exclusive`` and ``with_comments`` arguments are only used
2824     with C14N output, where they request exclusive and uncommented
2825     C14N serialisation respectively.
2826
2827     Passing a boolean value to the ``standalone`` option will output
2828     an XML declaration with the corresponding ``standalone`` flag.
2829
2830     The ``doctype`` option allows passing in a plain string that will
2831     be serialised before the XML tree.  Note that passing in non
2832     well-formed content here will make the XML output non well-formed.
2833     Also, an existing doctype in the document tree will not be removed
2834     when serialising an ElementTree instance.
2835
2836     You can prevent the tail text of the element from being serialised
2837     by passing the boolean ``with_tail`` option.  This has no impact
2838     on the tail text of children, which will always be serialised.
2839     """
2840     cdef bint write_declaration
2841     cdef int is_standalone
2842     # C14N serialisation
2843     if method == 'c14n':
2844         if encoding is not None:
2845             raise ValueError("Cannot specify encoding with C14N")
2846         if xml_declaration:
2847             raise ValueError("Cannot enable XML declaration in C14N")
2848         return _tostringC14N(element_or_tree, exclusive, with_comments)
2849     if not with_comments:
2850         raise ValueError("Can only discard comments in C14N serialisation")
2851     if encoding is _unicode or (encoding is not None and encoding.upper() == 'UNICODE'):
2852         if xml_declaration:
2853             raise ValueError, \
2854                 u"Serialisation to unicode must not request an XML declaration"
2855         write_declaration = 0
2856         encoding = _unicode
2857     elif xml_declaration is None:
2858         # by default, write an XML declaration only for non-standard encodings
2859         write_declaration = encoding is not None and encoding.upper() not in \
2860                             (u'ASCII', u'UTF-8', u'UTF8', u'US-ASCII')
2861     else:
2862         write_declaration = xml_declaration
2863     if encoding is None:
2864         encoding = u'ASCII'
2865     if standalone is None:
2866         is_standalone = -1
2867     elif standalone:
2868         write_declaration = 1
2869         is_standalone = 1
2870     else:
2871         write_declaration = 1
2872         is_standalone = 0
2873
2874     if isinstance(element_or_tree, _Element):
2875         return _tostring(<_Element>element_or_tree, encoding, doctype, method,
2876                          write_declaration, 0, pretty_print, with_tail,
2877                          is_standalone)
2878     elif isinstance(element_or_tree, _ElementTree):
2879         return _tostring((<_ElementTree>element_or_tree)._context_node,
2880                          encoding, doctype, method, write_declaration, 1,
2881                          pretty_print, with_tail, is_standalone)
2882     else:
2883         raise TypeError, u"Type '%s' cannot be serialized." % \
2884             python._fqtypename(element_or_tree)
2885
2886 def tostringlist(element_or_tree, *args, **kwargs):
2887     u"""tostringlist(element_or_tree, *args, **kwargs)
2888
2889     Serialize an element to an encoded string representation of its XML
2890     tree, stored in a list of partial strings.
2891
2892     This is purely for ElementTree 1.3 compatibility.  The result is a
2893     single string wrapped in a list.
2894     """
2895     return [tostring(element_or_tree, *args, **kwargs)]
2896
2897 def tounicode(element_or_tree, *, method=u"xml", bint pretty_print=False,
2898               bint with_tail=True, doctype=None):
2899     u"""tounicode(element_or_tree, method="xml", pretty_print=False,
2900                   with_tail=True, doctype=None)
2901
2902     Serialize an element to the Python unicode representation of its XML
2903     tree.
2904
2905     :deprecated: use ``tostring(el, encoding=unicode)`` instead.
2906
2907     Note that the result does not carry an XML encoding declaration and is
2908     therefore not necessarily suited for serialization to byte streams without
2909     further treatment.
2910
2911     The boolean keyword argument 'pretty_print' enables formatted XML.
2912
2913     The keyword argument 'method' selects the output method: 'xml',
2914     'html' or plain 'text'.
2915
2916     You can prevent the tail text of the element from being serialised
2917     by passing the boolean ``with_tail`` option.  This has no impact
2918     on the tail text of children, which will always be serialised.
2919     """
2920     if isinstance(element_or_tree, _Element):
2921         return _tostring(<_Element>element_or_tree, _unicode, doctype, method,
2922                           0, 0, pretty_print, with_tail, -1)
2923     elif isinstance(element_or_tree, _ElementTree):
2924         return _tostring((<_ElementTree>element_or_tree)._context_node,
2925                          _unicode, doctype, method, 0, 1, pretty_print,
2926                          with_tail, -1)
2927     else:
2928         raise TypeError, u"Type '%s' cannot be serialized." % \
2929             type(element_or_tree)
2930
2931 def parse(source, _BaseParser parser=None, *, base_url=None):
2932     u"""parse(source, parser=None, base_url=None)
2933
2934     Return an ElementTree object loaded with source elements.  If no parser
2935     is provided as second argument, the default parser is used.
2936
2937     The ``source`` can be any of the following:
2938
2939     - a file name/path
2940     - a file object
2941     - a file-like object
2942     - a URL using the HTTP or FTP protocol
2943
2944     To parse from a string, use the ``fromstring()`` function instead.
2945
2946     Note that it is generally faster to parse from a file path or URL
2947     than from an open file object or file-like object.  Transparent
2948     decompression from gzip compressed sources is supported (unless
2949     explicitly disabled in libxml2).
2950
2951     The ``base_url`` keyword allows setting a URL for the document
2952     when parsing from a file-like object.  This is needed when looking
2953     up external entities (DTD, XInclude, ...) with relative paths.
2954     """
2955     cdef _Document doc
2956     try:
2957         doc = _parseDocument(source, parser, base_url)
2958         return _elementTreeFactory(doc, None)
2959     except _TargetParserResult, result_container:
2960         return result_container.result
2961
2962
2963 ################################################################################
2964 # Include submodules
2965
2966 include "readonlytree.pxi" # Read-only implementation of Element proxies
2967 include "classlookup.pxi"  # Element class lookup mechanisms
2968 include "nsclasses.pxi"    # Namespace implementation and registry
2969 include "docloader.pxi"    # Support for custom document loaders
2970 include "parser.pxi"       # XML Parser
2971 include "saxparser.pxi"    # SAX-like Parser interface and tree builder
2972 include "parsertarget.pxi" # ET Parser target
2973 include "serializer.pxi"   # XML output functions
2974 include "iterparse.pxi"    # incremental XML parsing
2975 include "xmlid.pxi"        # XMLID and IDDict
2976 include "xinclude.pxi"     # XInclude
2977 include "cleanup.pxi"      # Cleanup and recursive element removal functions
2978
2979
2980 ################################################################################
2981 # Include submodules for XPath and XSLT
2982
2983 include "extensions.pxi"   # XPath/XSLT extension functions
2984 include "xpath.pxi"        # XPath evaluation
2985 include "xslt.pxi"         # XSL transformations
2986 include "xsltext.pxi"      # XSL extension elements
2987
2988
2989 ################################################################################
2990 # Validation
2991
2992 class DocumentInvalid(LxmlError):
2993     u"""Validation error.
2994
2995     Raised by all document validators when their ``assertValid(tree)``
2996     method fails.
2997     """
2998     pass
2999
3000 cdef class _Validator:
3001     u"Base class for XML validators."
3002     cdef _ErrorLog _error_log
3003     def __cinit__(self):
3004         self._error_log = _ErrorLog()
3005
3006     def validate(self, etree):
3007         u"""validate(self, etree)
3008
3009         Validate the document using this schema.
3010
3011         Returns true if document is valid, false if not.
3012         """
3013         return self(etree)
3014
3015     def assertValid(self, etree):
3016         u"""assertValid(self, etree)
3017
3018         Raises `DocumentInvalid` if the document does not comply with the schema.
3019         """
3020         if not self(etree):
3021             raise DocumentInvalid(self._error_log._buildExceptionMessage(
3022                     u"Document does not comply with schema"),
3023                                   self._error_log)
3024
3025     def assert_(self, etree):
3026         u"""assert_(self, etree)
3027
3028         Raises `AssertionError` if the document does not comply with the schema.
3029         """
3030         if not self(etree):
3031             raise AssertionError, self._error_log._buildExceptionMessage(
3032                 u"Document does not comply with schema")
3033
3034     cpdef _append_log_message(self, int domain, int type, int level, int line,
3035                               message, filename):
3036         self._error_log._receiveGeneric(domain, type, level, line, message,
3037                                         filename)
3038
3039     cpdef _clear_error_log(self):
3040         self._error_log.clear()
3041
3042     property error_log:
3043         u"The log of validation errors and warnings."
3044         def __get__(self):
3045             assert self._error_log is not None, "XPath evaluator not initialised"
3046             return self._error_log.copy()
3047
3048 include "dtd.pxi"        # DTD
3049 include "relaxng.pxi"    # RelaxNG
3050 include "xmlschema.pxi"  # XMLSchema
3051 include "schematron.pxi" # Schematron (requires libxml2 2.6.21+)
3052
3053 ################################################################################
3054 # Public C API
3055
3056 include "public-api.pxi"