5 # The root of all libxml2 errors.
6 class libxmlError(Exception): pass
8 # Type of the wrapper class for the C objects wrappers
11 n = type(_obj).__name__
12 if n != 'PyCObject' and n != 'PyCapsule':
19 # id() is sometimes negative ...
24 return (sys.maxsize - i)
28 # Errors raised by the wrappers when some tree handling failed.
30 class treeError(libxmlError):
31 def __init__(self, msg):
36 class parserError(libxmlError):
37 def __init__(self, msg):
42 class uriError(libxmlError):
43 def __init__(self, msg):
48 class xpathError(libxmlError):
49 def __init__(self, msg):
55 def __init__(self, _obj):
72 def io_read(self, len = -1):
77 ret = self.__io.read()
79 ret = self.__io.read(len)
83 print("failed to read from Python:", type(e))
84 print("on IO:", self.__io)
90 def io_write(self, str, len = -1):
94 return(self.__io.write(str))
95 return(self.__io.write(str, len))
97 class ioReadWrapper(ioWrapper):
98 def __init__(self, _obj, enc = ""):
99 ioWrapper.__init__(self, _obj)
100 self._o = libxml2mod.xmlCreateInputBuffer(self, enc)
106 libxml2mod.xmlFreeParserInputBuffer(self._o)
112 libxml2mod.xmlFreeParserInputBuffer(self._o)
115 class ioWriteWrapper(ioWrapper):
116 def __init__(self, _obj, enc = ""):
117 # print "ioWriteWrapper.__init__", _obj
118 if type(_obj) == type(''):
119 print("write io from a string")
121 elif type(_obj).__name__ == 'PyCapsule':
122 file = libxml2mod.outputBufferGetPythonFile(_obj)
124 ioWrapper.__init__(self, file)
126 ioWrapper.__init__(self, _obj)
128 # elif type(_obj) == types.InstanceType:
129 # print(("write io from instance of %s" % (_obj.__class__)))
130 # ioWrapper.__init__(self, _obj)
131 # self._o = libxml2mod.xmlCreateOutputBuffer(self, enc)
133 file = libxml2mod.outputBufferGetPythonFile(_obj)
135 ioWrapper.__init__(self, file)
137 ioWrapper.__init__(self, _obj)
144 libxml2mod.xmlOutputBufferClose(self._o)
150 libxml2mod.xmlOutputBufferClose(self._o)
156 libxml2mod.xmlOutputBufferClose(self._o)
160 # Example of a class to handle SAX events
163 """Base class for SAX handlers"""
164 def startDocument(self):
165 """called at the start of the document"""
168 def endDocument(self):
169 """called at the end of the document"""
172 def startElement(self, tag, attrs):
173 """called at the start of every element, tag is the name of
174 the element, attrs is a dictionary of the element's attributes"""
177 def endElement(self, tag):
178 """called at the start of every element, tag is the name of
182 def characters(self, data):
183 """called when character data have been read, data is the string
184 containing the data, multiple consecutive characters() callback
188 def cdataBlock(self, data):
189 """called when CDATA section have been read, data is the string
190 containing the data, multiple consecutive cdataBlock() callback
194 def reference(self, name):
195 """called when an entity reference has been found"""
198 def ignorableWhitespace(self, data):
199 """called when potentially ignorable white spaces have been found"""
202 def processingInstruction(self, target, data):
203 """called when a PI has been found, target contains the PI name and
204 data is the associated data in the PI"""
207 def comment(self, content):
208 """called when a comment has been found, content contains the comment"""
211 def externalSubset(self, name, externalID, systemID):
212 """called when a DOCTYPE declaration has been found, name is the
213 DTD name and externalID, systemID are the DTD public and system
214 identifier for that DTd if available"""
217 def internalSubset(self, name, externalID, systemID):
218 """called when a DOCTYPE declaration has been found, name is the
219 DTD name and externalID, systemID are the DTD public and system
220 identifier for that DTD if available"""
223 def entityDecl(self, name, type, externalID, systemID, content):
224 """called when an ENTITY declaration has been found, name is the
225 entity name and externalID, systemID are the entity public and
226 system identifier for that entity if available, type indicates
227 the entity type, and content reports it's string content"""
230 def notationDecl(self, name, externalID, systemID):
231 """called when an NOTATION declaration has been found, name is the
232 notation name and externalID, systemID are the notation public and
233 system identifier for that notation if available"""
236 def attributeDecl(self, elem, name, type, defi, defaultValue, nameList):
237 """called when an ATTRIBUTE definition has been found"""
240 def elementDecl(self, name, type, content):
241 """called when an ELEMENT definition has been found"""
244 def entityDecl(self, name, publicId, systemID, notationName):
245 """called when an unparsed ENTITY declaration has been found,
246 name is the entity name and publicId,, systemID are the entity
247 public and system identifier for that entity if available,
248 and notationName indicate the associated NOTATION"""
251 def warning(self, msg):
255 def error(self, msg):
256 raise parserError(msg)
258 def fatalError(self, msg):
259 raise parserError(msg)
262 # This class is the ancestor of all the Node classes. It provides
263 # the basic functionalities shared by all nodes (and handle
264 # gracefylly the exception), like name, navigation in the tree,
265 # doc reference, content access and serializing to a string or URI
268 def __init__(self, _obj=None):
274 def __eq__(self, other):
277 ret = libxml2mod.compareNodesEqual(self._o, other._o)
281 def __ne__(self, other):
284 ret = libxml2mod.compareNodesEqual(self._o, other._o)
287 ret = libxml2mod.nodeHash(self._o)
291 return self.serialize()
292 def get_parent(self):
293 ret = libxml2mod.parent(self._o)
297 def get_children(self):
298 ret = libxml2mod.children(self._o)
303 ret = libxml2mod.last(self._o)
308 ret = libxml2mod.next(self._o)
312 def get_properties(self):
313 ret = libxml2mod.properties(self._o)
316 return xmlAttr(_obj=ret)
318 ret = libxml2mod.prev(self._o)
322 def get_content(self):
323 return libxml2mod.xmlNodeGetContent(self._o)
324 getContent = get_content # why is this duplicate naming needed ?
326 return libxml2mod.name(self._o)
328 return libxml2mod.type(self._o)
330 ret = libxml2mod.doc(self._o)
332 if self.type in ["document_xml", "document_html"]:
333 return xmlDoc(_obj=self._o)
336 return xmlDoc(_obj=ret)
338 # Those are common attributes to nearly all type of nodes
339 # defined as python2 properties
342 if float(sys.version[0:3]) < 2.2:
343 def __getattr__(self, attr):
345 ret = libxml2mod.parent(self._o)
349 elif attr == "properties":
350 ret = libxml2mod.properties(self._o)
353 return xmlAttr(_obj=ret)
354 elif attr == "children":
355 ret = libxml2mod.children(self._o)
360 ret = libxml2mod.last(self._o)
365 ret = libxml2mod.next(self._o)
370 ret = libxml2mod.prev(self._o)
374 elif attr == "content":
375 return libxml2mod.xmlNodeGetContent(self._o)
377 return libxml2mod.name(self._o)
379 return libxml2mod.type(self._o)
381 ret = libxml2mod.doc(self._o)
383 if self.type == "document_xml" or self.type == "document_html":
384 return xmlDoc(_obj=self._o)
387 return xmlDoc(_obj=ret)
388 raise AttributeError(attr)
390 parent = property(get_parent, None, None, "Parent node")
391 children = property(get_children, None, None, "First child node")
392 last = property(get_last, None, None, "Last sibling node")
393 next = property(get_next, None, None, "Next sibling node")
394 prev = property(get_prev, None, None, "Previous sibling node")
395 properties = property(get_properties, None, None, "List of properies")
396 content = property(get_content, None, None, "Content of this node")
397 name = property(get_name, None, None, "Node name")
398 type = property(get_type, None, None, "Node type")
399 doc = property(get_doc, None, None, "The document this node belongs to")
402 # Serialization routines, the optional arguments have the following
404 # encoding: string to ask saving in a specific encoding
405 # indent: if 1 the serializer is asked to indent the output
407 def serialize(self, encoding = None, format = 0):
408 return libxml2mod.serializeNode(self._o, encoding, format)
409 def saveTo(self, file, encoding = None, format = 0):
410 return libxml2mod.saveNodeTo(self._o, file, encoding, format)
413 # Canonicalization routines:
415 # nodes: the node set (tuple or list) to be included in the
416 # canonized image or None if all document nodes should be
418 # exclusive: the exclusive flag (0 - non-exclusive
419 # canonicalization; otherwise - exclusive canonicalization)
420 # prefixes: the list of inclusive namespace prefixes (strings),
421 # or None if there is no inclusive namespaces (only for
422 # exclusive canonicalization, ignored otherwise)
423 # with_comments: include comments in the result (!=0) or not
431 nodes = [n._o for n in nodes]
432 return libxml2mod.xmlC14NDocDumpMemory(
445 nodes = [n._o for n in nodes]
446 return libxml2mod.xmlC14NDocSaveTo(
455 # Selecting nodes using XPath, a bit slow because the context
456 # is allocated/freed every time but convenient.
458 def xpathEval(self, expr):
462 ctxt = doc.xpathNewContext()
463 ctxt.setContextNode(self)
464 res = ctxt.xpathEval(expr)
465 ctxt.xpathFreeContext()
469 # # Selecting nodes using XPath, faster because the context
470 # # is allocated just once per xmlDoc.
472 # # Removed: DV memleaks c.f. #126735
474 # def xpathEval2(self, expr):
479 # doc._ctxt.setContextNode(self)
481 # doc._ctxt = doc.xpathNewContext()
482 # doc._ctxt.setContextNode(self)
483 # res = doc._ctxt.xpathEval(expr)
485 def xpathEval2(self, expr):
486 return self.xpathEval(expr)
489 def removeNsDef(self, href):
491 Remove a namespace definition from a node. If href is None,
492 remove all of the ns definitions on that node. The removed
493 namespaces are returned as a linked list.
495 Note: If any child nodes referred to the removed namespaces,
496 they will be left with dangling links. You should call
497 renconciliateNs() to fix those pointers.
499 Note: This method does not free memory taken by the ns
500 definitions. You will need to free it manually with the
501 freeNsList() method on the returns xmlNs object.
504 ret = libxml2mod.xmlNodeRemoveNsDef(self._o, href)
505 if ret is None:return None
506 __tmp = xmlNs(_obj=ret)
509 # support for python2 iterators
510 def walk_depth_first(self):
511 return xmlCoreDepthFirstItertor(self)
512 def walk_breadth_first(self):
513 return xmlCoreBreadthFirstItertor(self)
514 __iter__ = walk_depth_first
518 self.doc._ctxt.xpathFreeContext()
521 libxml2mod.xmlFreeDoc(self._o)
525 # implements the depth-first iterator for libxml2 DOM tree
527 class xmlCoreDepthFirstItertor:
528 def __init__(self, node):
537 self.parents.append(self.node)
538 self.node = self.node.children
541 parent = self.parents.pop()
544 self.node = parent.next
548 # implements the breadth-first iterator for libxml2 DOM tree
550 class xmlCoreBreadthFirstItertor:
551 def __init__(self, node):
560 self.parents.append(self.node)
561 self.node = self.node.next
564 parent = self.parents.pop()
567 self.node = parent.children
571 # converters to present a nicer view of the XPath returns
574 # TODO try to cast to the most appropriate node class
575 name = libxml2mod.type(o)
576 if name == "element" or name == "text":
577 return xmlNode(_obj=o)
578 if name == "attribute":
579 return xmlAttr(_obj=o)
580 if name[0:8] == "document":
581 return xmlDoc(_obj=o)
582 if name == "namespace":
584 if name == "elem_decl":
585 return xmlElement(_obj=o)
586 if name == "attribute_decl":
587 return xmlAttribute(_obj=o)
588 if name == "entity_decl":
589 return xmlEntity(_obj=o)
591 return xmlDtd(_obj=o)
592 return xmlNode(_obj=o)
594 def xpathObjectRet(o):
596 if otype == type([]):
597 ret = list(map(xpathObjectRet, o))
599 elif otype == type(()):
600 ret = list(map(xpathObjectRet, o))
602 elif otype == type('') or otype == type(0) or otype == type(0.0):
608 # register an XPath function
610 def registerXPathFunction(ctxt, name, ns_uri, f):
611 ret = libxml2mod.xmlRegisterXPathFunction(ctxt, name, ns_uri, f)
614 # For the xmlTextReader parser configuration
617 PARSER_DEFAULTATTRS=2
619 PARSER_SUBST_ENTITIES=4
622 # For the error callback severities
624 PARSER_SEVERITY_VALIDITY_WARNING=1
625 PARSER_SEVERITY_VALIDITY_ERROR=2
626 PARSER_SEVERITY_WARNING=3
627 PARSER_SEVERITY_ERROR=4
630 # register the libxml2 error handler
632 def registerErrorHandler(f, ctx):
633 """Register a Python written function to for error reporting.
634 The function is called back as f(ctx, error). """
636 if 'libxslt' not in sys.modules:
637 # normal behaviour when libxslt is not imported
638 ret = libxml2mod.xmlRegisterErrorHandler(f,ctx)
640 # when libxslt is already imported, one must
641 # use libxst's error handler instead
643 ret = libxslt.registerErrorHandler(f,ctx)
646 class parserCtxtCore:
648 def __init__(self, _obj=None):
656 libxml2mod.xmlFreeParserCtxt(self._o)
659 def setErrorHandler(self,f,arg):
660 """Register an error handler that will be called back as
661 f(arg,msg,severity,reserved).
663 @reserved is currently always None."""
664 libxml2mod.xmlParserCtxtSetErrorHandler(self._o,f,arg)
666 def getErrorHandler(self):
667 """Return (f,arg) as previously registered with setErrorHandler
669 return libxml2mod.xmlParserCtxtGetErrorHandler(self._o)
671 def addLocalCatalog(self, uri):
672 """Register a local catalog with the parser"""
673 return libxml2mod.addLocalCatalog(self._o, uri)
678 def __init__(self, *args, **kw):
681 def setValidityErrorHandler(self, err_func, warn_func, arg=None):
683 Register error and warning handlers for DTD validation.
684 These will be called back as f(msg,arg)
686 libxml2mod.xmlSetValidErrors(self._o, err_func, warn_func, arg)
689 class SchemaValidCtxtCore:
691 def __init__(self, *args, **kw):
694 def setValidityErrorHandler(self, err_func, warn_func, arg=None):
696 Register error and warning handlers for Schema validation.
697 These will be called back as f(msg,arg)
699 libxml2mod.xmlSchemaSetValidErrors(self._o, err_func, warn_func, arg)
702 class relaxNgValidCtxtCore:
704 def __init__(self, *args, **kw):
707 def setValidityErrorHandler(self, err_func, warn_func, arg=None):
709 Register error and warning handlers for RelaxNG validation.
710 These will be called back as f(msg,arg)
712 libxml2mod.xmlRelaxNGSetValidErrors(self._o, err_func, warn_func, arg)
715 def _xmlTextReaderErrorFunc(xxx_todo_changeme,msg,severity,locator):
716 """Intermediate callback to wrap the locator"""
717 (f,arg) = xxx_todo_changeme
718 return f(arg,msg,severity,xmlTextReaderLocator(locator))
720 class xmlTextReaderCore:
722 def __init__(self, _obj=None):
724 if _obj != None:self._o = _obj;return
729 libxml2mod.xmlFreeTextReader(self._o)
732 def SetErrorHandler(self,f,arg):
733 """Register an error handler that will be called back as
734 f(arg,msg,severity,locator)."""
736 libxml2mod.xmlTextReaderSetErrorHandler(\
739 libxml2mod.xmlTextReaderSetErrorHandler(\
740 self._o,_xmlTextReaderErrorFunc,(f,arg))
742 def GetErrorHandler(self):
743 """Return (f,arg) as previously registered with setErrorHandler
745 f,arg = libxml2mod.xmlTextReaderGetErrorHandler(self._o)
749 # assert f is _xmlTextReaderErrorFunc
753 # The cleanup now goes though a wrapper in libxml.c
756 libxml2mod.xmlPythonCleanupParser()
759 # The interface to xmlRegisterInputCallbacks.
760 # Since this API does not allow to pass a data object along with
761 # match/open callbacks, it is necessary to maintain a list of all
764 __input_callbacks = []
765 def registerInputCallback(func):
766 def findOpenCallback(URI):
767 for cb in reversed(__input_callbacks):
771 libxml2mod.xmlRegisterInputCallback(findOpenCallback)
772 __input_callbacks.append(func)
774 def popInputCallbacks():
775 # First pop python-level callbacks, when no more available - start
776 # popping built-in ones.
777 if len(__input_callbacks) > 0:
778 __input_callbacks.pop()
779 if len(__input_callbacks) == 0:
780 libxml2mod.xmlUnregisterInputCallback()
782 # WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
784 # Everything before this line comes from libxml.py
785 # Everything after this line is automatically generated
787 # WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING