1 """Miscellaneous utility functions useful for dealing with ESIS streams."""
10 import xml.sax.xmlreader
13 _data_match = re.compile(r"[^\\][^\\]*").match
30 n, s = s.split(";", 1)
31 r = r + unichr(int(n))
33 raise ValueError, "can't handle " + `s`
38 for c in map(chr, range(256)):
40 _charmap["\n"] = r"\n"
41 _charmap["\\"] = r"\\"
46 return _null_join(map(_charmap.get, s))
49 class ESISReader(xml.sax.xmlreader.XMLReader):
50 """SAX Reader which reads from an ESIS stream.
52 No verification of the document structure is performed by the
53 reader; a general verifier could be used as the target
54 ContentHandler instance.
58 _lexical_handler = None
68 def __init__(self, contentHandler=None, errorHandler=None):
69 xml.sax.xmlreader.XMLReader.__init__(self)
71 self._attributes = Attributes(self._attrs)
72 self._locator = Locator()
75 self.setContentHandler(contentHandler)
77 self.setErrorHandler(errorHandler)
79 def get_empties(self):
80 return self._empties.keys()
86 def parse(self, source):
88 self._locator._public_id = source.getPublicId()
89 self._locator._system_id = source.getSystemId()
90 fp = source.getByteStream()
91 handler = self.getContentHandler()
93 handler.startDocument()
96 token, data = self._get_token(fp)
100 self._locator._lineno = lineno
101 self._handle_token(token, data)
102 handler = self.getContentHandler()
104 handler.startDocument()
106 def feed(self, data):
107 if not self._started:
108 handler = self.getContentHandler()
110 handler.startDocument()
112 data = self._buffer + data
114 lines = data.split("\n")
116 for line in lines[:-1]:
117 self._lineno = self._lineno + 1
118 self._locator._lineno = self._lineno
120 e = xml.sax.SAXParseException(
121 "ESIS input line contains no token type mark",
123 self.getErrorHandler().error(e)
125 self._handle_token(line[0], line[1:])
126 self._buffer = lines[-1]
131 handler = self.getContentHandler()
133 handler.endDocument()
136 def _get_token(self, fp):
140 e = SAXException("I/O error reading input stream", e)
141 self.getErrorHandler().fatalError(e)
148 e = xml.sax.SAXParseException(
149 "ESIS input line contains no token type mark",
151 self.getErrorHandler().error(e)
153 return line[0], line[1:]
155 def _handle_token(self, token, data):
156 handler = self.getContentHandler()
159 handler.characters(decode(data))
162 handler.endElement(decode(data))
165 self._empties[data] = 1
167 handler.startElement(data, self._attributes)
171 name, value = data.split(' ', 1)
172 if value != "IMPLIED":
173 type, value = value.split(' ', 1)
174 self._attrs[name] = (decode(value), type)
176 # entity reference in SAX?
181 target, data = string.split(data, None, 1)
183 target, data = data, ""
184 handler.processingInstruction(target, decode(data))
186 handler = self.getDTDHandler()
188 handler.notationDecl(data, self._public_id, self._system_id)
189 self._public_id = None
190 self._system_id = None
192 self._public_id = decode(data)
194 self._system_id = decode(data)
200 e = SAXParseException("unknown ESIS token in event stream",
202 self.getErrorHandler().error(e)
204 def setContentHandler(self, handler):
205 old = self.getContentHandler()
207 old.setDocumentLocator(None)
209 handler.setDocumentLocator(self._locator)
210 xml.sax.xmlreader.XMLReader.setContentHandler(self, handler)
212 def getProperty(self, property):
213 if property == xml.sax.handler.property_lexical_handler:
214 return self._lexical_handler
216 elif property == xml.sax.handler.property_declaration_handler:
217 return self._decl_handler
220 raise xml.sax.SAXNotRecognizedException("unknown property %s"
223 def setProperty(self, property, value):
224 if property == xml.sax.handler.property_lexical_handler:
225 if self._lexical_handler:
226 self._lexical_handler.setDocumentLocator(None)
228 value.setDocumentLocator(self._locator)
229 self._lexical_handler = value
231 elif property == xml.sax.handler.property_declaration_handler:
232 if self._decl_handler:
233 self._decl_handler.setDocumentLocator(None)
235 value.setDocumentLocator(self._locator)
236 self._decl_handler = value
239 raise xml.sax.SAXNotRecognizedException()
241 def getFeature(self, feature):
242 if feature == xml.sax.handler.feature_namespaces:
245 return xml.sax.xmlreader.XMLReader.getFeature(self, feature)
247 def setFeature(self, feature, enabled):
248 if feature == xml.sax.handler.feature_namespaces:
251 xml.sax.xmlreader.XMLReader.setFeature(self, feature, enabled)
254 class Attributes(xml.sax.xmlreader.AttributesImpl):
255 # self._attrs has the form {name: (value, type)}
257 def getType(self, name):
258 return self._attrs[name][1]
260 def getValue(self, name):
261 return self._attrs[name][0]
263 def getValueByQName(self, name):
264 return self._attrs[name][0]
266 def __getitem__(self, name):
267 return self._attrs[name][0]
269 def get(self, name, default=None):
270 if self._attrs.has_key(name):
271 return self._attrs[name][0]
276 for name, (value, type) in self._attrs.items():
277 L.append((name, value))
282 for value, type in self._attrs.values():
287 class Locator(xml.sax.xmlreader.Locator):
292 def getLineNumber(self):
295 def getPublicId(self):
296 return self._public_id
298 def getSystemId(self):
299 return self._system_id
302 def parse(stream_or_string, parser=None):
303 if type(stream_or_string) in [type(""), type(u"")]:
304 stream = open(stream_or_string)
306 stream = stream_or_string
308 parser = ESISReader()
309 return xml.dom.pulldom.DOMEventStream(stream, parser, (2 ** 14) - 20)