5 # The root of all libxml2 errors.
6 class libxmlError(Exception): pass
9 # id() is sometimes negative ...
14 return (sys.maxint - i)
18 # Errors raised by the wrappers when some tree handling failed.
20 class treeError(libxmlError):
21 def __init__(self, msg):
26 class parserError(libxmlError):
27 def __init__(self, msg):
32 class uriError(libxmlError):
33 def __init__(self, msg):
38 class xpathError(libxmlError):
39 def __init__(self, msg):
45 def __init__(self, _obj):
62 def io_read(self, len = -1):
66 return(self.__io.read())
67 return(self.__io.read(len))
69 def io_write(self, str, len = -1):
73 return(self.__io.write(str))
74 return(self.__io.write(str, len))
76 class ioReadWrapper(ioWrapper):
77 def __init__(self, _obj, enc = ""):
78 ioWrapper.__init__(self, _obj)
79 self._o = libxml2mod.xmlCreateInputBuffer(self, enc)
85 libxml2mod.xmlFreeParserInputBuffer(self._o)
91 libxml2mod.xmlFreeParserInputBuffer(self._o)
94 class ioWriteWrapper(ioWrapper):
95 def __init__(self, _obj, enc = ""):
96 # print "ioWriteWrapper.__init__", _obj
97 if type(_obj) == type(''):
98 print "write io from a string"
100 elif type(_obj) == types.InstanceType:
101 print "write io from instance of %s" % (_obj.__class__)
102 ioWrapper.__init__(self, _obj)
103 self._o = libxml2mod.xmlCreateOutputBuffer(self, enc)
105 file = libxml2mod.outputBufferGetPythonFile(_obj)
107 ioWrapper.__init__(self, file)
109 ioWrapper.__init__(self, _obj)
116 libxml2mod.xmlOutputBufferClose(self._o)
122 libxml2mod.xmlOutputBufferClose(self._o)
128 libxml2mod.xmlOutputBufferClose(self._o)
132 # Example of a class to handle SAX events
135 """Base class for SAX handlers"""
136 def startDocument(self):
137 """called at the start of the document"""
140 def endDocument(self):
141 """called at the end of the document"""
144 def startElement(self, tag, attrs):
145 """called at the start of every element, tag is the name of
146 the element, attrs is a dictionary of the element's attributes"""
149 def endElement(self, tag):
150 """called at the start of every element, tag is the name of
154 def characters(self, data):
155 """called when character data have been read, data is the string
156 containing the data, multiple consecutive characters() callback
160 def cdataBlock(self, data):
161 """called when CDATA section have been read, data is the string
162 containing the data, multiple consecutive cdataBlock() callback
166 def reference(self, name):
167 """called when an entity reference has been found"""
170 def ignorableWhitespace(self, data):
171 """called when potentially ignorable white spaces have been found"""
174 def processingInstruction(self, target, data):
175 """called when a PI has been found, target contains the PI name and
176 data is the associated data in the PI"""
179 def comment(self, content):
180 """called when a comment has been found, content contains the comment"""
183 def externalSubset(self, name, externalID, systemID):
184 """called when a DOCTYPE declaration has been found, name is the
185 DTD name and externalID, systemID are the DTD public and system
186 identifier for that DTd if available"""
189 def internalSubset(self, name, externalID, systemID):
190 """called when a DOCTYPE declaration has been found, name is the
191 DTD name and externalID, systemID are the DTD public and system
192 identifier for that DTD if available"""
195 def entityDecl(self, name, type, externalID, systemID, content):
196 """called when an ENTITY declaration has been found, name is the
197 entity name and externalID, systemID are the entity public and
198 system identifier for that entity if available, type indicates
199 the entity type, and content reports it's string content"""
202 def notationDecl(self, name, externalID, systemID):
203 """called when an NOTATION declaration has been found, name is the
204 notation name and externalID, systemID are the notation public and
205 system identifier for that notation if available"""
208 def attributeDecl(self, elem, name, type, defi, defaultValue, nameList):
209 """called when an ATTRIBUTE definition has been found"""
212 def elementDecl(self, name, type, content):
213 """called when an ELEMENT definition has been found"""
216 def entityDecl(self, name, publicId, systemID, notationName):
217 """called when an unparsed ENTITY declaration has been found,
218 name is the entity name and publicId,, systemID are the entity
219 public and system identifier for that entity if available,
220 and notationName indicate the associated NOTATION"""
223 def warning(self, msg):
227 def error(self, msg):
228 raise parserError(msg)
230 def fatalError(self, msg):
231 raise parserError(msg)
234 # This class is the ancestor of all the Node classes. It provides
235 # the basic functionalities shared by all nodes (and handle
236 # gracefylly the exception), like name, navigation in the tree,
237 # doc reference, content access and serializing to a string or URI
240 def __init__(self, _obj=None):
246 def __eq__(self, other):
249 ret = libxml2mod.compareNodesEqual(self._o, other._o)
253 def __ne__(self, other):
256 ret = libxml2mod.compareNodesEqual(self._o, other._o)
259 ret = libxml2mod.nodeHash(self._o)
263 return self.serialize()
264 def get_parent(self):
265 ret = libxml2mod.parent(self._o)
268 return xmlNode(_obj=ret)
269 def get_children(self):
270 ret = libxml2mod.children(self._o)
273 return xmlNode(_obj=ret)
275 ret = libxml2mod.last(self._o)
278 return xmlNode(_obj=ret)
280 ret = libxml2mod.next(self._o)
283 return xmlNode(_obj=ret)
284 def get_properties(self):
285 ret = libxml2mod.properties(self._o)
288 return xmlAttr(_obj=ret)
290 ret = libxml2mod.prev(self._o)
293 return xmlNode(_obj=ret)
294 def get_content(self):
295 return libxml2mod.xmlNodeGetContent(self._o)
296 getContent = get_content # why is this duplicate naming needed ?
298 return libxml2mod.name(self._o)
300 return libxml2mod.type(self._o)
302 ret = libxml2mod.doc(self._o)
304 if self.type in ["document_xml", "document_html"]:
305 return xmlDoc(_obj=self._o)
308 return xmlDoc(_obj=ret)
310 # Those are common attributes to nearly all type of nodes
311 # defined as python2 properties
314 if float(sys.version[0:3]) < 2.2:
315 def __getattr__(self, attr):
317 ret = libxml2mod.parent(self._o)
320 return xmlNode(_obj=ret)
321 elif attr == "properties":
322 ret = libxml2mod.properties(self._o)
325 return xmlAttr(_obj=ret)
326 elif attr == "children":
327 ret = libxml2mod.children(self._o)
330 return xmlNode(_obj=ret)
332 ret = libxml2mod.last(self._o)
335 return xmlNode(_obj=ret)
337 ret = libxml2mod.next(self._o)
340 return xmlNode(_obj=ret)
342 ret = libxml2mod.prev(self._o)
345 return xmlNode(_obj=ret)
346 elif attr == "content":
347 return libxml2mod.xmlNodeGetContent(self._o)
349 return libxml2mod.name(self._o)
351 return libxml2mod.type(self._o)
353 ret = libxml2mod.doc(self._o)
355 if self.type == "document_xml" or self.type == "document_html":
356 return xmlDoc(_obj=self._o)
359 return xmlDoc(_obj=ret)
360 raise AttributeError,attr
362 parent = property(get_parent, None, None, "Parent node")
363 children = property(get_children, None, None, "First child node")
364 last = property(get_last, None, None, "Last sibling node")
365 next = property(get_next, None, None, "Next sibling node")
366 prev = property(get_prev, None, None, "Previous sibling node")
367 properties = property(get_properties, None, None, "List of properies")
368 content = property(get_content, None, None, "Content of this node")
369 name = property(get_name, None, None, "Node name")
370 type = property(get_type, None, None, "Node type")
371 doc = property(get_doc, None, None, "The document this node belongs to")
374 # Serialization routines, the optional arguments have the following
376 # encoding: string to ask saving in a specific encoding
377 # indent: if 1 the serializer is asked to indent the output
379 def serialize(self, encoding = None, format = 0):
380 return libxml2mod.serializeNode(self._o, encoding, format)
381 def saveTo(self, file, encoding = None, format = 0):
382 return libxml2mod.saveNodeTo(self._o, file, encoding, format)
385 # Canonicalization routines:
387 # nodes: the node set (tuple or list) to be included in the
388 # canonized image or None if all document nodes should be
390 # exclusive: the exclusive flag (0 - non-exclusive
391 # canonicalization; otherwise - exclusive canonicalization)
392 # prefixes: the list of inclusive namespace prefixes (strings),
393 # or None if there is no inclusive namespaces (only for
394 # exclusive canonicalization, ignored otherwise)
395 # with_comments: include comments in the result (!=0) or not
403 nodes = map(lambda n: n._o, nodes)
404 return libxml2mod.xmlC14NDocDumpMemory(
417 nodes = map(lambda n: n._o, nodes)
418 return libxml2mod.xmlC14NDocSaveTo(
427 # Selecting nodes using XPath, a bit slow because the context
428 # is allocated/freed every time but convenient.
430 def xpathEval(self, expr):
434 ctxt = doc.xpathNewContext()
435 ctxt.setContextNode(self)
436 res = ctxt.xpathEval(expr)
437 ctxt.xpathFreeContext()
441 # # Selecting nodes using XPath, faster because the context
442 # # is allocated just once per xmlDoc.
444 # # Removed: DV memleaks c.f. #126735
446 # def xpathEval2(self, expr):
451 # doc._ctxt.setContextNode(self)
453 # doc._ctxt = doc.xpathNewContext()
454 # doc._ctxt.setContextNode(self)
455 # res = doc._ctxt.xpathEval(expr)
457 def xpathEval2(self, expr):
458 return self.xpathEval(expr)
461 def removeNsDef(self, href):
463 Remove a namespace definition from a node. If href is None,
464 remove all of the ns definitions on that node. The removed
465 namespaces are returned as a linked list.
467 Note: If any child nodes referred to the removed namespaces,
468 they will be left with dangling links. You should call
469 renconciliateNs() to fix those pointers.
471 Note: This method does not free memory taken by the ns
472 definitions. You will need to free it manually with the
473 freeNsList() method on the returns xmlNs object.
476 ret = libxml2mod.xmlNodeRemoveNsDef(self._o, href)
477 if ret is None:return None
478 __tmp = xmlNs(_obj=ret)
481 # support for python2 iterators
482 def walk_depth_first(self):
483 return xmlCoreDepthFirstItertor(self)
484 def walk_breadth_first(self):
485 return xmlCoreBreadthFirstItertor(self)
486 __iter__ = walk_depth_first
490 self.doc._ctxt.xpathFreeContext()
493 libxml2mod.xmlFreeDoc(self._o)
497 # implements the depth-first iterator for libxml2 DOM tree
499 class xmlCoreDepthFirstItertor:
500 def __init__(self, node):
509 self.parents.append(self.node)
510 self.node = self.node.children
513 parent = self.parents.pop()
516 self.node = parent.next
519 # implements the breadth-first iterator for libxml2 DOM tree
521 class xmlCoreBreadthFirstItertor:
522 def __init__(self, node):
531 self.parents.append(self.node)
532 self.node = self.node.next
535 parent = self.parents.pop()
538 self.node = parent.children
541 # converters to present a nicer view of the XPath returns
544 # TODO try to cast to the most appropriate node class
545 name = libxml2mod.type(o)
546 if name == "element" or name == "text":
547 return xmlNode(_obj=o)
548 if name == "attribute":
549 return xmlAttr(_obj=o)
550 if name[0:8] == "document":
551 return xmlDoc(_obj=o)
552 if name == "namespace":
554 if name == "elem_decl":
555 return xmlElement(_obj=o)
556 if name == "attribute_decl":
557 return xmlAttribute(_obj=o)
558 if name == "entity_decl":
559 return xmlEntity(_obj=o)
561 return xmlDtd(_obj=o)
562 return xmlNode(_obj=o)
564 def xpathObjectRet(o):
566 if otype == type([]):
567 ret = map(xpathObjectRet, o)
569 elif otype == type(()):
570 ret = map(xpathObjectRet, o)
572 elif otype == type('') or otype == type(0) or otype == type(0.0):
578 # register an XPath function
580 def registerXPathFunction(ctxt, name, ns_uri, f):
581 ret = libxml2mod.xmlRegisterXPathFunction(ctxt, name, ns_uri, f)
584 # For the xmlTextReader parser configuration
587 PARSER_DEFAULTATTRS=2
589 PARSER_SUBST_ENTITIES=4
592 # For the error callback severities
594 PARSER_SEVERITY_VALIDITY_WARNING=1
595 PARSER_SEVERITY_VALIDITY_ERROR=2
596 PARSER_SEVERITY_WARNING=3
597 PARSER_SEVERITY_ERROR=4
600 # register the libxml2 error handler
602 def registerErrorHandler(f, ctx):
603 """Register a Python written function to for error reporting.
604 The function is called back as f(ctx, error). """
606 if not sys.modules.has_key('libxslt'):
607 # normal behaviour when libxslt is not imported
608 ret = libxml2mod.xmlRegisterErrorHandler(f,ctx)
610 # when libxslt is already imported, one must
611 # use libxst's error handler instead
613 ret = libxslt.registerErrorHandler(f,ctx)
616 class parserCtxtCore:
618 def __init__(self, _obj=None):
626 libxml2mod.xmlFreeParserCtxt(self._o)
629 def setErrorHandler(self,f,arg):
630 """Register an error handler that will be called back as
631 f(arg,msg,severity,reserved).
633 @reserved is currently always None."""
634 libxml2mod.xmlParserCtxtSetErrorHandler(self._o,f,arg)
636 def getErrorHandler(self):
637 """Return (f,arg) as previously registered with setErrorHandler
639 return libxml2mod.xmlParserCtxtGetErrorHandler(self._o)
641 def addLocalCatalog(self, uri):
642 """Register a local catalog with the parser"""
643 return libxml2mod.addLocalCatalog(self._o, uri)
648 def __init__(self, *args, **kw):
651 def setValidityErrorHandler(self, err_func, warn_func, arg=None):
653 Register error and warning handlers for DTD validation.
654 These will be called back as f(msg,arg)
656 libxml2mod.xmlSetValidErrors(self._o, err_func, warn_func, arg)
659 class SchemaValidCtxtCore:
661 def __init__(self, *args, **kw):
664 def setValidityErrorHandler(self, err_func, warn_func, arg=None):
666 Register error and warning handlers for Schema validation.
667 These will be called back as f(msg,arg)
669 libxml2mod.xmlSchemaSetValidErrors(self._o, err_func, warn_func, arg)
672 class relaxNgValidCtxtCore:
674 def __init__(self, *args, **kw):
677 def setValidityErrorHandler(self, err_func, warn_func, arg=None):
679 Register error and warning handlers for RelaxNG validation.
680 These will be called back as f(msg,arg)
682 libxml2mod.xmlRelaxNGSetValidErrors(self._o, err_func, warn_func, arg)
685 def _xmlTextReaderErrorFunc((f,arg),msg,severity,locator):
686 """Intermediate callback to wrap the locator"""
687 return f(arg,msg,severity,xmlTextReaderLocator(locator))
689 class xmlTextReaderCore:
691 def __init__(self, _obj=None):
693 if _obj != None:self._o = _obj;return
698 libxml2mod.xmlFreeTextReader(self._o)
701 def SetErrorHandler(self,f,arg):
702 """Register an error handler that will be called back as
703 f(arg,msg,severity,locator)."""
705 libxml2mod.xmlTextReaderSetErrorHandler(\
708 libxml2mod.xmlTextReaderSetErrorHandler(\
709 self._o,_xmlTextReaderErrorFunc,(f,arg))
711 def GetErrorHandler(self):
712 """Return (f,arg) as previously registered with setErrorHandler
714 f,arg = libxml2mod.xmlTextReaderGetErrorHandler(self._o)
718 # assert f is _xmlTextReaderErrorFunc
722 # The cleanup now goes though a wrappe in libxml.c
725 libxml2mod.xmlPythonCleanupParser()
727 # WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
729 # Everything before this line comes from libxml.py
730 # Everything after this line is automatically generated
732 # WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING