Imported Upstream version 1.7.6
[platform/upstream/harfbuzz.git] / test / shaping / hb_test_tools.py
index 747699b..f7e5943 100644 (file)
@@ -7,9 +7,81 @@ from itertools import *
 diff_symbols = "-+=*&^%$#@!~/"
 diff_colors = ['red', 'green', 'blue']
 
-if sys.version_info[0] >= 3:
+def codepoints(s):
+       return (ord (u) for u in s)
+
+try:
+       unichr = unichr
+
+       if sys.maxunicode < 0x10FFFF:
+               # workarounds for Python 2 "narrow" builds with UCS2-only support.
+
+               _narrow_unichr = unichr
+
+               def unichr(i):
+                       """
+                       Return the unicode character whose Unicode code is the integer 'i'.
+                       The valid range is 0 to 0x10FFFF inclusive.
+
+                       >>> _narrow_unichr(0xFFFF + 1)
+                       Traceback (most recent call last):
+                         File "<stdin>", line 1, in ?
+                       ValueError: unichr() arg not in range(0x10000) (narrow Python build)
+                       >>> unichr(0xFFFF + 1) == u'\U00010000'
+                       True
+                       >>> unichr(1114111) == u'\U0010FFFF'
+                       True
+                       >>> unichr(0x10FFFF + 1)
+                       Traceback (most recent call last):
+                         File "<stdin>", line 1, in ?
+                       ValueError: unichr() arg not in range(0x110000)
+                       """
+                       try:
+                               return _narrow_unichr(i)
+                       except ValueError:
+                               try:
+                                       padded_hex_str = hex(i)[2:].zfill(8)
+                                       escape_str = "\\U" + padded_hex_str
+                                       return escape_str.decode("unicode-escape")
+                               except UnicodeDecodeError:
+                                       raise ValueError('unichr() arg not in range(0x110000)')
+
+               def codepoints(s):
+                       high_surrogate = None
+                       for u in s:
+                               cp = ord (u)
+                               if 0xDC00 <= cp <= 0xDFFF:
+                                       if high_surrogate:
+                                               yield 0x10000 + (high_surrogate - 0xD800) * 0x400 + (cp - 0xDC00)
+                                               high_surrogate = None
+                                       else:
+                                               yield 0xFFFC
+                               else:
+                                       if high_surrogate:
+                                               yield 0xFFFC
+                                               high_surrogate = None
+                                       if 0xD800 <= cp <= 0xDBFF:
+                                               high_surrogate = cp
+                                       else:
+                                               yield cp
+                                               high_surrogate = None
+                       if high_surrogate:
+                               yield 0xFFFC
+
+except NameError:
        unichr = chr
 
+try:
+       unicode = unicode
+except NameError:
+       unicode = str
+
+def tounicode(s, encoding='ascii', errors='strict'):
+       if not isinstance(s, unicode):
+               return s.decode(encoding, errors)
+       else:
+               return s
+
 class ColorFormatter:
 
        class Null:
@@ -221,32 +293,6 @@ class DiffSinks:
                total = passed + failed
                print ("%d out of %d tests passed.  %d failed (%g%%)" % (passed, total, failed, 100. * failed / total))
 
-       @staticmethod
-       def print_ngrams (f, ns=(1,2,3)):
-               gens = tuple (Ngram.generator (n) for n in ns)
-               allstats = Stats ()
-               allgrams = {}
-               for key, lines in DiffHelpers.separate_test_cases (f):
-                       test = Test (lines)
-                       allstats.add (test)
-
-                       for gen in gens:
-                               for ngram in gen (test.unicodes):
-                                       if ngram not in allgrams:
-                                               allgrams[ngram] = Stats ()
-                                       allgrams[ngram].add (test)
-
-               importantgrams = {}
-               for ngram, stats in allgrams.iteritems ():
-                       if stats.failed.count >= 30: # for statistical reasons
-                               importantgrams[ngram] = stats
-               allgrams = importantgrams
-               del importantgrams
-
-               for ngram, stats in allgrams.iteritems ():
-                       print ("zscore: %9f failed: %6d passed: %6d ngram: <%s>" % (stats.zscore (allstats), stats.failed.count, stats.passed.count, ','.join ("U+%04X" % u for u in ngram)))
-
-
 
 class Test:
 
@@ -409,12 +455,12 @@ class Unicode:
 
        @staticmethod
        def decode (s):
-               return u','.join ("U+%04X" % ord (u) for u in unicode (s, 'utf-8')).encode ('utf-8')
+               return u','.join ("U+%04X" % cp for cp in codepoints (tounicode (s, 'utf-8')))
 
        @staticmethod
        def parse (s):
                s = re.sub (r"0[xX]", " ", s)
-               s = re.sub (r"[<+>{},;&#\\xXuUnNiI\n    ]", " ", s)
+               s = re.sub (r"[<+>{},;&#\\xXuUnNiI\n\t]", " ", s)
                return [int (x, 16) for x in s.split ()]
 
        @staticmethod