From 08342a2da067818adb72e87c279d39da5827cdf5 Mon Sep 17 00:00:00 2001 From: Nikita Nemkin Date: Thu, 7 Mar 2013 13:37:12 +0600 Subject: [PATCH] Pass-through single surrogates in Py_UNICODE[] literal encoding routine. --- Cython/Compiler/StringEncoding.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cython/Compiler/StringEncoding.py b/Cython/Compiler/StringEncoding.py index 95d3fa4..1eb77b5 100644 --- a/Cython/Compiler/StringEncoding.py +++ b/Cython/Compiler/StringEncoding.py @@ -280,9 +280,9 @@ def encode_pyunicode_string(s): else: utf16, utf32 = s, [] for code_unit in s: - if 0xDC00 <= code_unit <= 0xDFFF: # low surrogate - high, low = utf32.pop(), code_unit - utf32.append(((high & 0x3FF) << 10) + (low & 0x3FF) + 0x10000) + if 0xDC00 <= code_unit <= 0xDFFF and utf32 and 0xD800 <= utf32[-1] <= 0xDBFF: + high, low = utf32[-1], code_unit + utf32[-1] = ((high & 0x3FF) << 10) + (low & 0x3FF) + 0x10000 else: utf32.append(code_unit) -- 2.7.4