From 805ed9f5150d8d202700d4be67af9d7068d8db5b Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Thu, 2 Dec 2010 14:58:27 -0500 Subject: [PATCH] scanner: More XML unicode fixes Coerce input temporarily inside xmlwriter into Unicode to do string manipulation, and then only convert it to "UTF8str" when writing into the data buffer. --- giscanner/xmlwriter.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/giscanner/xmlwriter.py b/giscanner/xmlwriter.py index 84c24c0f..76880de0 100755 --- a/giscanner/xmlwriter.py +++ b/giscanner/xmlwriter.py @@ -95,10 +95,10 @@ class XMLWriter(object): tag_name, attributes, self._indent, self._indent_char, len(tag_name) + 2) - self.write_line('<%s%s>' % (tag_name, attrs)) + self.write_line(u'<%s%s>' % (tag_name, attrs)) def _close_tag(self, tag_name): - self.write_line('' % (tag_name, )) + self.write_line(u'' % (tag_name, )) # Public API @@ -113,17 +113,19 @@ class XMLWriter(object): def get_xml(self): return self._data.getvalue() - def write_line(self, line='', indent=True, do_escape=False): + def write_line(self, line=u'', indent=True, do_escape=False): + if isinstance(line, str): + line = line.decode('utf-8') + assert isinstance(line, unicode) if do_escape: - line = escape(str(line)) - + line = escape(str(line)).decode('utf-8') if indent: self._data.write('%s%s%s' % ( self._indent_char * self._indent, line.encode('utf-8'), self._newline_char)) else: - self._data.write('%s%s' % (line, self._newline_char)) + self._data.write('%s%s' % (line.encode('utf-8'), self._newline_char)) def write_comment(self, text): self.write_line('' % (text, )) @@ -131,11 +133,13 @@ class XMLWriter(object): def write_tag(self, tag_name, attributes, data=None): if attributes is None: attributes = [] - prefix = '<%s' % (tag_name, ) + prefix = u'<%s' % (tag_name, ) if data is not None: - suffix = '>%s' % (escape(data), tag_name) + if isinstance(data, str): + data = data.decode('UTF-8') + suffix = u'>%s' % (escape(data), tag_name) else: - suffix = '/>' + suffix = u'/>' attrs = collect_attributes( tag_name, attributes, self._indent, -- 2.34.1