2 SAX-based adapter to copy trees from/to the Python standard library.
4 Use the `ElementTreeContentHandler` class to build an ElementTree from
7 Use the `ElementTreeProducer` class or the `saxify()` function to fire
8 the SAX events of an ElementTree against a SAX ContentHandler.
10 See http://codespeak.net/lxml/sax.html
13 from xml.sax.handler import ContentHandler
14 from lxml import etree
15 from lxml.etree import ElementTree, SubElement
16 from lxml.etree import Comment, ProcessingInstruction
18 class SaxError(etree.LxmlError):
25 return tuple(tag[1:].split('}', 1))
29 class ElementTreeContentHandler(ContentHandler):
30 """Build an lxml ElementTree from SAX events.
32 def __init__(self, makeelement=None):
34 self._root_siblings = []
35 self._element_stack = []
36 self._default_ns = None
37 self._ns_mapping = { None : [None] }
38 self._new_mappings = {}
39 if makeelement is None:
40 makeelement = etree.Element
41 self._makeelement = makeelement
44 "Contains the generated ElementTree after parsing is finished."
45 return ElementTree(self._root)
47 etree = property(_get_etree, doc=_get_etree.__doc__)
49 def setDocumentLocator(self, locator):
52 def startDocument(self):
55 def endDocument(self):
58 def startPrefixMapping(self, prefix, uri):
59 self._new_mappings[prefix] = uri
61 self._ns_mapping[prefix].append(uri)
63 self._ns_mapping[prefix] = [uri]
65 self._default_ns = uri
67 def endPrefixMapping(self, prefix):
68 ns_uri_list = self._ns_mapping[prefix]
71 self._default_ns = ns_uri_list[-1]
73 def startElementNS(self, ns_name, qname, attributes=None):
74 ns_uri, local_name = ns_name
76 el_name = "{%s}%s" % ns_name
77 elif self._default_ns:
78 el_name = "{%s}%s" % (self._default_ns, local_name)
85 iter_attributes = attributes.iteritems()
86 except AttributeError:
87 iter_attributes = attributes.items()
89 for name_tuple, value in iter_attributes:
91 attr_name = "{%s}%s" % name_tuple
93 attr_name = name_tuple[1]
94 attrs[attr_name] = value
98 element_stack = self._element_stack
99 if self._root is None:
100 element = self._root = \
101 self._makeelement(el_name, attrs, self._new_mappings)
102 if self._root_siblings and hasattr(element, 'addprevious'):
103 for sibling in self._root_siblings:
104 element.addprevious(sibling)
105 del self._root_siblings[:]
107 element = SubElement(element_stack[-1], el_name,
108 attrs, self._new_mappings)
109 element_stack.append(element)
111 self._new_mappings.clear()
113 def processingInstruction(self, target, data):
114 pi = ProcessingInstruction(target, data)
115 if self._root is None:
116 self._root_siblings.append(pi)
118 self._element_stack[-1].append(pi)
120 def endElementNS(self, ns_name, qname):
121 element = self._element_stack.pop()
122 if ns_name != _getNsTag(element.tag):
123 raise SaxError("Unexpected element closed: {%s}%s" % ns_name)
125 def startElement(self, name, attributes=None):
126 self.startElementNS((None, name), name, attributes)
128 def endElement(self, name):
129 self.endElementNS((None, name), name)
131 def characters(self, data):
132 last_element = self._element_stack[-1]
134 # if there already is a child element, we must append to its tail
135 last_element = last_element[-1]
136 last_element.tail = (last_element.tail or '') + data
138 # otherwise: append to the text
139 last_element.text = (last_element.text or '') + data
141 ignorableWhitespace = characters
144 class ElementTreeProducer(object):
145 """Produces SAX events for an element and children.
147 def __init__(self, element_or_tree, content_handler):
149 element = element_or_tree.getroot()
150 except AttributeError:
151 element = element_or_tree
152 self._element = element
153 self._content_handler = content_handler
154 from xml.sax.xmlreader import AttributesNSImpl as attr_class
155 self._attr_class = attr_class
156 self._empty_attributes = attr_class({}, {})
159 self._content_handler.startDocument()
161 element = self._element
162 if hasattr(element, 'getprevious'):
164 sibling = element.getprevious()
165 while getattr(sibling, 'tag', None) is ProcessingInstruction:
166 siblings.append(sibling)
167 sibling = sibling.getprevious()
168 for sibling in siblings[::-1]:
169 self._recursive_saxify(sibling, {})
171 self._recursive_saxify(element, {})
173 if hasattr(element, 'getnext'):
174 sibling = element.getnext()
175 while getattr(sibling, 'tag', None) is ProcessingInstruction:
176 self._recursive_saxify(sibling, {})
177 sibling = sibling.getnext()
179 self._content_handler.endDocument()
181 def _recursive_saxify(self, element, prefixes):
182 content_handler = self._content_handler
184 if tag is Comment or tag is ProcessingInstruction:
185 if tag is ProcessingInstruction:
186 content_handler.processingInstruction(
187 element.target, element.text)
189 content_handler.characters(element.tail)
193 build_qname = self._build_qname
194 attribs = element.items()
198 for attr_ns_name, value in attribs:
199 attr_ns_tuple = _getNsTag(attr_ns_name)
200 attr_values[attr_ns_tuple] = value
201 attr_qnames[attr_ns_tuple] = build_qname(
202 attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes)
203 sax_attributes = self._attr_class(attr_values, attr_qnames)
205 sax_attributes = self._empty_attributes
207 ns_uri, local_name = _getNsTag(tag)
208 qname = build_qname(ns_uri, local_name, prefixes, new_prefixes)
210 for prefix, uri in new_prefixes:
211 content_handler.startPrefixMapping(prefix, uri)
212 content_handler.startElementNS((ns_uri, local_name),
213 qname, sax_attributes)
215 content_handler.characters(element.text)
216 for child in element:
217 self._recursive_saxify(child, prefixes)
218 content_handler.endElementNS((ns_uri, local_name), qname)
219 for prefix, uri in new_prefixes:
220 content_handler.endPrefixMapping(prefix)
222 content_handler.characters(element.tail)
224 def _build_qname(self, ns_uri, local_name, prefixes, new_prefixes):
228 prefix = prefixes[ns_uri]
230 prefix = prefixes[ns_uri] = 'ns%02d' % len(prefixes)
231 new_prefixes.append( (prefix, ns_uri) )
232 return prefix + ':' + local_name
234 def saxify(element_or_tree, content_handler):
235 """One-shot helper to generate SAX events from an XML tree and fire
236 them against a SAX ContentHandler.
238 return ElementTreeProducer(element_or_tree, content_handler).saxify()