def utf8String(string):
"""hands back a unicoded string"""
+ if isinstance(string, unicode):
+ return string
try:
- string = unicode(string)
+ x = unicode(string, 'ascii')
+ return string
except UnicodeError:
- newstring = ''
- for char in string:
- if ord(char) > 127:
- newstring = newstring + '?'
+ encodings = ['utf-8', 'iso-8859-1', 'iso-8859-15', 'iso-8859-2']
+ for enc in encodings:
+ try:
+ x = unicode(string, enc)
+ except UnicodeError:
+ pass
else:
- newstring = newstring + char
- return unicode(newstring)
- else:
- return string
+ if x.encode(enc) == string:
+ return x.encode('utf-8')
+ newstring = ''
+ for char in string:
+ if ord(char) > 127:
+ newstring = newstring + '?'
+ else:
+ newstring = newstring + char
+ return newstring
-def xmlCleanString(doc, string):
- """hands back a special-char encoded and utf8 cleaned string
- Takes a libxml2 document object and the string to clean
- document object is needed to not require expensive get_doc function
- """
- string = utf8String(string)
- string = doc.encodeSpecialChars(string)
- return string
-
def byteranges(file):
"""takes an rpm file or fileobject and returns byteranges for location of the header"""
value = utf8String(value)
value = re.sub("\n$", '', value)
entry = pkgNode.newChild(None, tag, None)
- value = xmlCleanString(doc, value)
entry.addContent(value)
time = pkgNode.newChild(None, 'time', None)
value = utf8String(value)
value = re.sub("\n$", '', value)
entry = format.newChild(formatns, tag, None)
- value = xmlCleanString(doc, value)
entry.addContent(value)
hr = format.newChild(formatns, 'header-range', None)
for file in rpmObj.usefulFiles():
files = format.newChild(None, 'file', None)
- file = xmlCleanString(doc, file)
+ file = utf8String(file)
files.addContent(file)
for directory in rpmObj.usefulDirs():
files = format.newChild(None, 'file', None)
- directory = xmlCleanString(doc, directory)
+ directory = utf8String(directory)
files.addContent(directory)
files.newProp('type', 'dir')
version.newProp('rel', str(rpmObj.tagByName('release')))
for file in rpmObj.filenames:
files = pkg.newChild(None, 'file', None)
- file = xmlCleanString(doc, file)
+ file = utf8String(file)
files.addContent(file)
for directory in rpmObj.dirnames:
files = pkg.newChild(None, 'file', None)
- directory = xmlCleanString(doc, directory)
+ directory = utf8String(directory)
files.addContent(directory)
files.newProp('type', 'dir')
for ghost in rpmObj.ghostnames:
files = pkg.newChild(None, 'file', None)
- ghost = xmlCleanString(doc, ghost)
+ ghost = utf8String(ghost)
files.addContent(ghost)
files.newProp('type', 'ghost')
return pkg
clogs = rpmObj.changelogLists()
for (name, time, text) in clogs:
clog = pkg.newChild(None, 'changelog', None)
- text = xmlCleanString(doc, text)
- clog.addContent(text)
+ clog.addContent(utf8String(text))
clog.newProp('author', utf8String(name))
clog.newProp('date', str(time))
return pkg