From d7de47c06770e55969fc6cfde6e3529b67021e02 Mon Sep 17 00:00:00 2001 From: jbj Date: Sun, 13 Jun 2004 17:10:18 +0000 Subject: [PATCH] Add libxml2 handler. CVS patchset: 7313 CVS date: 2004/06/13 17:10:18 --- tools/expatparser.py | 25 +++++++++++---- tools/libxmlparser.py | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 7 deletions(-) create mode 100644 tools/libxmlparser.py diff --git a/tools/expatparser.py b/tools/expatparser.py index aa75e5d..23e192c 100644 --- a/tools/expatparser.py +++ b/tools/expatparser.py @@ -2,13 +2,18 @@ import sys, xml.parsers.expat class RpmExpatParser: def __init__(self, fn): - self.f = open(fn) + try: + self.f = open(fn) + except: + print "unable to open %s" % (fn) + return self.p = xml.parsers.expat.ParserCreate() self.p.StartElementHandler = self.start_element self.p.EndElementHandler = self.end_element self.p.CharacterDataHandler = self.char_data self.n = 2 self.lvl = 0 + self.cdata = 0 def spew(self, l): sys.stdout.write(l) @@ -20,30 +25,36 @@ class RpmExpatParser: def start_element(self, name, attrs): l = self.pad() + '<' + name if attrs.has_key(u'name'): - l = l + ' name=' + attrs[u'name'] + l = l + ' name="' + attrs[u'name'] + '"' l = l + '>' if self.lvl < 2: - l = l + '\r\n' + l = l + '\n' self.spew(l) self.lvl = self.lvl + 1 + self.cdata = 1 def end_element(self, name): self.lvl = self.lvl - 1 l = '' if self.lvl < 2: l = self.pad() + l - l = l + '\r\n' + l = l + '\n' self.spew(l) + self.cdata = 0 def char_data(self, data): - if not data.isspace(): + if self.cdata == 1: + if not data.isspace(): + self.cdata = 2 + if self.cdata > 1: self.spew(data) def read(self, *args): return self.f.read(*args) def ParseFile(self): - self.p.ParseFile(self) + return self.p.ParseFile(self) p = RpmExpatParser('time.xml') -p.ParseFile() +ret = p.ParseFile() +print ret diff --git a/tools/libxmlparser.py b/tools/libxmlparser.py new file mode 100644 index 0000000..c816cb9 --- /dev/null +++ b/tools/libxmlparser.py @@ -0,0 +1,88 @@ +import sys, libxml2 + +class RpmLibxml2Parser: + def __init__(self, fn): + try: + self.p = libxml2.newTextReaderFilename(fn) + except: + print "unable to open %s" % (fn) + return + self.p.SetParserProp(libxml2.PARSER_VALIDATE, 1) + self.p.SetParserProp(libxml2.PARSER_SUBST_ENTITIES, 1) + self.n = 2 + self.lvl = 0 + self.cdata = 0 + + def spew(self, l): + sys.stdout.write(l) + sys.stdout.flush() + + def pad(self): + return (' ' * (self.n * self.lvl)) + + def start_element(self, name, attrs): + l = self.pad() + '<' + name + if attrs.has_key(u'name'): + l = l + ' name="' + attrs[u'name'] + '"' + l = l + '>' + if self.lvl < 2: + l = l + '\n' + self.spew(l) + self.lvl = self.lvl + 1 + self.cdata = 1 + + def end_element(self, name): + self.lvl = self.lvl - 1 + l = '' + if self.lvl < 2: + l = self.pad() + l + l = l + '\n' + self.spew(l) + self.cdata = 0 + + def char_data(self, data): + if self.cdata == 1: + if not data.isspace(): + self.cdata = 2 + if self.cdata > 1: + self.spew(data) + + def processNode(self): +# self.n = self.p.Depth() + 1 + if self.p.NodeType() == 1: # Element + name = self.p.Name() + attrs = {} + while self.p.MoveToNextAttribute(): + attrs[self.p.Name()] = self.p.Value() + self.start_element(name, attrs) + elif self.p.NodeType() == 3: # Text within element + self.char_data(self.p.Value()) + elif self.p.NodeType() == 10: # Start element + self.char_data(self.p.Value()) + elif self.p.NodeType() == 14: # Text + self.char_data(self.p.Value()) + elif self.p.NodeType() == 15: # End element + self.end_element(self.p.Name()) + else: + print "%d %d %s %d %s" % (self.p.Depth(), self.p.NodeType(), + self.p.Name(), self.p.IsEmptyElement(), + self.p.Value()) + while self.p.MoveToNextAttribute(): + print "-- %d %d (%s) [%s]" % (self.p.Depth(), self.p.NodeType(), + self.p.Name(), self.p.Value()) + + def read(self, *args): + return self.p.Read(*args) + + def ParseFile(self): + ret = self.read() + while ret == 1: + self.processNode() + ret = self.read() + return ret + +fn = 'time.xml' +p = RpmLibxml2Parser(fn) +ret = p.ParseFile() +if ret != 0: + print "Error parsing and validating %s" % (fn) -- 2.7.4