From 17a03ba5bc247628c44afb69d5b457f3f0077ac2 Mon Sep 17 00:00:00 2001 From: Seth Vidal Date: Fri, 27 Aug 2004 07:03:40 +0000 Subject: [PATCH] patch from Ville Skytta (this a will be wrong, sorry) to correct decoding/encoding problems. --- dumpMetadata.py | 52 +++++++++++++++++++++++------------------------ genpkgmetadata.py | 6 +++--- 2 files changed, 28 insertions(+), 30 deletions(-) diff --git a/dumpMetadata.py b/dumpMetadata.py index ede68ac..020a770 100644 --- a/dumpMetadata.py +++ b/dumpMetadata.py @@ -86,28 +86,29 @@ def getChecksum(sumtype, file, CHUNK=2**16): def utf8String(string): """hands back a unicoded string""" + if isinstance(string, unicode): + return string try: - string = unicode(string) + x = unicode(string, 'ascii') + return string except UnicodeError: - newstring = '' - for char in string: - if ord(char) > 127: - newstring = newstring + '?' + encodings = ['utf-8', 'iso-8859-1', 'iso-8859-15', 'iso-8859-2'] + for enc in encodings: + try: + x = unicode(string, enc) + except UnicodeError: + pass else: - newstring = newstring + char - return unicode(newstring) - else: - return string + if x.encode(enc) == string: + return x.encode('utf-8') + newstring = '' + for char in string: + if ord(char) > 127: + newstring = newstring + '?' + else: + newstring = newstring + char + return newstring -def xmlCleanString(doc, string): - """hands back a special-char encoded and utf8 cleaned string - Takes a libxml2 document object and the string to clean - document object is needed to not require expensive get_doc function - """ - string = utf8String(string) - string = doc.encodeSpecialChars(string) - return string - def byteranges(file): """takes an rpm file or fileobject and returns byteranges for location of the header""" @@ -506,7 +507,6 @@ def generateXML(doc, node, formatns, rpmObj, sumtype): value = utf8String(value) value = re.sub("\n$", '', value) entry = pkgNode.newChild(None, tag, None) - value = xmlCleanString(doc, value) entry.addContent(value) time = pkgNode.newChild(None, 'time', None) @@ -526,7 +526,6 @@ def generateXML(doc, node, formatns, rpmObj, sumtype): value = utf8String(value) value = re.sub("\n$", '', value) entry = format.newChild(formatns, tag, None) - value = xmlCleanString(doc, value) entry.addContent(value) hr = format.newChild(formatns, 'header-range', None) @@ -580,11 +579,11 @@ def generateXML(doc, node, formatns, rpmObj, sumtype): for file in rpmObj.usefulFiles(): files = format.newChild(None, 'file', None) - file = xmlCleanString(doc, file) + file = utf8String(file) files.addContent(file) for directory in rpmObj.usefulDirs(): files = format.newChild(None, 'file', None) - directory = xmlCleanString(doc, directory) + directory = utf8String(directory) files.addContent(directory) files.newProp('type', 'dir') @@ -601,16 +600,16 @@ def fileListXML(doc, node, rpmObj): version.newProp('rel', str(rpmObj.tagByName('release'))) for file in rpmObj.filenames: files = pkg.newChild(None, 'file', None) - file = xmlCleanString(doc, file) + file = utf8String(file) files.addContent(file) for directory in rpmObj.dirnames: files = pkg.newChild(None, 'file', None) - directory = xmlCleanString(doc, directory) + directory = utf8String(directory) files.addContent(directory) files.newProp('type', 'dir') for ghost in rpmObj.ghostnames: files = pkg.newChild(None, 'file', None) - ghost = xmlCleanString(doc, ghost) + ghost = utf8String(ghost) files.addContent(ghost) files.newProp('type', 'ghost') return pkg @@ -627,8 +626,7 @@ def otherXML(doc, node, rpmObj): clogs = rpmObj.changelogLists() for (name, time, text) in clogs: clog = pkg.newChild(None, 'changelog', None) - text = xmlCleanString(doc, text) - clog.addContent(text) + clog.addContent(utf8String(text)) clog.newProp('author', utf8String(name)) clog.newProp('date', str(time)) return pkg diff --git a/genpkgmetadata.py b/genpkgmetadata.py index b2655b0..fdac294 100755 --- a/genpkgmetadata.py +++ b/genpkgmetadata.py @@ -288,7 +288,7 @@ def doPkgMetadata(cmds, ts): errorprint(_('\nAn error occurred creating primary metadata: %s') % e) continue else: - output = node.serialize(None, cmds['pretty']) + output = node.serialize('UTF-8', cmds['pretty']) basefile.write(output) basefile.write('\n') node.unlinkNode() @@ -301,7 +301,7 @@ def doPkgMetadata(cmds, ts): errorprint(_('\nAn error occurred creating filelists: %s') % e) continue else: - output = node.serialize(None, cmds['pretty']) + output = node.serialize('UTF-8', cmds['pretty']) flfile.write(output) flfile.write('\n') node.unlinkNode() @@ -314,7 +314,7 @@ def doPkgMetadata(cmds, ts): errorprint(_('\nAn error occurred: %s') % e) continue else: - output = node.serialize(None, cmds['pretty']) + output = node.serialize('UTF-8', cmds['pretty']) otherfile.write(output) otherfile.write('\n') node.unlinkNode() -- 2.34.1