test/shaping/hb_test_tools.py

   1 #!/usr/bin/env python
   2
   3 from __future__ import print_function, division, absolute_import
   4
   5 import sys, os, re, difflib, unicodedata, errno, cgi
   6 from itertools import *
   7 try:
   8         import unicodedata2 as unicodedata
   9 except Exception:
  10         pass
  11
  12 diff_symbols = "-+=*&^%$#@!~/"
  13 diff_colors = ['red', 'green', 'blue']
  14
  15 def codepoints(s):
  16         return (ord (u) for u in s)
  17
  18 try:
  19         unichr = unichr
  20
  21         if sys.maxunicode < 0x10FFFF:
  22                 # workarounds for Python 2 "narrow" builds with UCS2-only support.
  23
  24                 _narrow_unichr = unichr
  25
  26                 def unichr(i):
  27                         """
  28                         Return the unicode character whose Unicode code is the integer 'i'.
  29                         The valid range is 0 to 0x10FFFF inclusive.
  30
  31                         >>> _narrow_unichr(0xFFFF + 1)
  32                         Traceback (most recent call last):
  33                           File "<stdin>", line 1, in ?
  34                         ValueError: unichr() arg not in range(0x10000) (narrow Python build)
  35                         >>> unichr(0xFFFF + 1) == u'\U00010000'
  36                         True
  37                         >>> unichr(1114111) == u'\U0010FFFF'
  38                         True
  39                         >>> unichr(0x10FFFF + 1)
  40                         Traceback (most recent call last):
  41                           File "<stdin>", line 1, in ?
  42                         ValueError: unichr() arg not in range(0x110000)
  43                         """
  44                         try:
  45                                 return _narrow_unichr(i)
  46                         except ValueError:
  47                                 try:
  48                                         padded_hex_str = hex(i)[2:].zfill(8)
  49                                         escape_str = "\\U" + padded_hex_str
  50                                         return escape_str.decode("unicode-escape")
  51                                 except UnicodeDecodeError:
  52                                         raise ValueError('unichr() arg not in range(0x110000)')
  53
  54                 def codepoints(s):
  55                         high_surrogate = None
  56                         for u in s:
  57                                 cp = ord (u)
  58                                 if 0xDC00 <= cp <= 0xDFFF:
  59                                         if high_surrogate:
  60                                                 yield 0x10000 + (high_surrogate - 0xD800) * 0x400 + (cp - 0xDC00)
  61                                                 high_surrogate = None
  62                                         else:
  63                                                 yield 0xFFFD
  64                                 else:
  65                                         if high_surrogate:
  66                                                 yield 0xFFFD
  67                                                 high_surrogate = None
  68                                         if 0xD800 <= cp <= 0xDBFF:
  69                                                 high_surrogate = cp
  70                                         else:
  71                                                 yield cp
  72                                                 high_surrogate = None
  73                         if high_surrogate:
  74                                 yield 0xFFFD
  75
  76 except NameError:
  77         unichr = chr
  78
  79 try:
  80         unicode = unicode
  81 except NameError:
  82         unicode = str
  83
  84 def tounicode(s, encoding='ascii', errors='strict'):
  85         if not isinstance(s, unicode):
  86                 return s.decode(encoding, errors)
  87         else:
  88                 return s
  89
  90 class ColorFormatter:
  91
  92         class Null:
  93                 @staticmethod
  94                 def start_color (c): return ''
  95                 @staticmethod
  96                 def end_color (): return ''
  97                 @staticmethod
  98                 def escape (s): return s
  99                 @staticmethod
 100                 def newline (): return '\n'
 101
 102         class ANSI:
 103                 @staticmethod
 104                 def start_color (c):
 105                         return {
 106                                 'red': '\033[41;37;1m',
 107                                 'green': '\033[42;37;1m',
 108                                 'blue': '\033[44;37;1m',
 109                         }[c]
 110                 @staticmethod
 111                 def end_color ():
 112                         return '\033[m'
 113                 @staticmethod
 114                 def escape (s): return s
 115                 @staticmethod
 116                 def newline (): return '\n'
 117
 118         class HTML:
 119                 @staticmethod
 120                 def start_color (c):
 121                         return '<span style="background:%s">' % c
 122                 @staticmethod
 123                 def end_color ():
 124                         return '</span>'
 125                 @staticmethod
 126                 def escape (s): return cgi.escape (s)
 127                 @staticmethod
 128                 def newline (): return '<br/>\n'
 129
 130         @staticmethod
 131         def Auto (argv = [], out = sys.stdout):
 132                 format = ColorFormatter.ANSI
 133                 if "--format" in argv:
 134                         argv.remove ("--format")
 135                         format = ColorFormatter.ANSI
 136                 if "--format=ansi" in argv:
 137                         argv.remove ("--format=ansi")
 138                         format = ColorFormatter.ANSI
 139                 if "--format=html" in argv:
 140                         argv.remove ("--format=html")
 141                         format = ColorFormatter.HTML
 142                 if "--no-format" in argv:
 143                         argv.remove ("--no-format")
 144                         format = ColorFormatter.Null
 145                 return format
 146
 147
 148 class DiffColorizer:
 149
 150         diff_regex = re.compile ('([a-za-z0-9_]*)([^a-za-z0-9_]?)')
 151
 152         def __init__ (self, formatter, colors=diff_colors, symbols=diff_symbols):
 153                 self.formatter = formatter
 154                 self.colors = colors
 155                 self.symbols = symbols
 156
 157         def colorize_lines (self, lines):
 158                 lines = (l if l else '' for l in lines)
 159                 ss = [self.diff_regex.sub (r'\1\n\2\n', l).splitlines (True) for l in lines]
 160                 oo = ["",""]
 161                 st = [False, False]
 162                 for l in difflib.Differ().compare (*ss):
 163                         if l[0] == '?':
 164                                 continue
 165                         if l[0] == ' ':
 166                                 for i in range(2):
 167                                         if st[i]:
 168                                                 oo[i] += self.formatter.end_color ()
 169                                                 st[i] = False
 170                                 oo = [o + self.formatter.escape (l[2:]) for o in oo]
 171                                 continue
 172                         if l[0] in self.symbols:
 173                                 i = self.symbols.index (l[0])
 174                                 if not st[i]:
 175                                         oo[i] += self.formatter.start_color (self.colors[i])
 176                                         st[i] = True
 177                                 oo[i] += self.formatter.escape (l[2:])
 178                                 continue
 179                 for i in range(2):
 180                         if st[i]:
 181                                 oo[i] += self.formatter.end_color ()
 182                                 st[i] = False
 183                 oo = [o.replace ('\n', '') for o in oo]
 184                 return [s1+s2+self.formatter.newline () for (s1,s2) in zip (self.symbols, oo) if s2]
 185
 186         def colorize_diff (self, f):
 187                 lines = [None, None]
 188                 for l in f:
 189                         if l[0] not in self.symbols:
 190                                 yield self.formatter.escape (l).replace ('\n', self.formatter.newline ())
 191                                 continue
 192                         i = self.symbols.index (l[0])
 193                         if lines[i]:
 194                                 # Flush
 195                                 for line in self.colorize_lines (lines):
 196                                         yield line
 197                                 lines = [None, None]
 198                         lines[i] = l[1:]
 199                         if (all (lines)):
 200                                 # Flush
 201                                 for line in self.colorize_lines (lines):
 202                                         yield line
 203                                 lines = [None, None]
 204                 if (any (lines)):
 205                         # Flush
 206                         for line in self.colorize_lines (lines):
 207                                 yield line
 208
 209
 210 class ZipDiffer:
 211
 212         @staticmethod
 213         def diff_files (files, symbols=diff_symbols):
 214                 files = tuple (files) # in case it's a generator, copy it
 215                 try:
 216                         for lines in izip_longest (*files):
 217                                 if all (lines[0] == line for line in lines[1:]):
 218                                         sys.stdout.writelines ([" ", lines[0]])
 219                                         continue
 220
 221                                 for i, l in enumerate (lines):
 222                                         if l:
 223                                                 sys.stdout.writelines ([symbols[i], l])
 224                 except IOError as e:
 225                         if e.errno != errno.EPIPE:
 226                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
 227                                 sys.exit (1)
 228
 229
 230 class DiffFilters:
 231
 232         @staticmethod
 233         def filter_failures (f):
 234                 for key, lines in DiffHelpers.separate_test_cases (f):
 235                         lines = list (lines)
 236                         if not DiffHelpers.test_passed (lines):
 237                                 for l in lines: yield l
 238
 239 class Stat:
 240
 241         def __init__ (self):
 242                 self.count = 0
 243                 self.freq = 0
 244
 245         def add (self, test):
 246                 self.count += 1
 247                 self.freq += test.freq
 248
 249 class Stats:
 250
 251         def __init__ (self):
 252                 self.passed = Stat ()
 253                 self.failed = Stat ()
 254                 self.total  = Stat ()
 255
 256         def add (self, test):
 257                 self.total.add (test)
 258                 if test.passed:
 259                         self.passed.add (test)
 260                 else:
 261                         self.failed.add (test)
 262
 263         def mean (self):
 264                 return float (self.passed.count) / self.total.count
 265
 266         def variance (self):
 267                 return (float (self.passed.count) / self.total.count) * \
 268                        (float (self.failed.count) / self.total.count)
 269
 270         def stddev (self):
 271                 return self.variance () ** .5
 272
 273         def zscore (self, population):
 274                 """Calculate the standard score.
 275                    Population is the Stats for population.
 276                    Self is Stats for sample.
 277                    Returns larger absolute value if sample is highly unlikely to be random.
 278                    Anything outside of -3..+3 is very unlikely to be random.
 279                    See: http://en.wikipedia.org/wiki/Standard_score"""
 280
 281                 return (self.mean () - population.mean ()) / population.stddev ()
 282
 283
 284
 285
 286 class DiffSinks:
 287
 288         @staticmethod
 289         def print_stat (f):
 290                 passed = 0
 291                 failed = 0
 292                 # XXX port to Stats, but that would really slow us down here
 293                 for key, lines in DiffHelpers.separate_test_cases (f):
 294                         if DiffHelpers.test_passed (lines):
 295                                 passed += 1
 296                         else:
 297                                 failed += 1
 298                 total = passed + failed
 299                 print ("%d out of %d tests passed.  %d failed (%g%%)" % (passed, total, failed, 100. * failed / total))
 300
 301
 302 class Test:
 303
 304         def __init__ (self, lines):
 305                 self.freq = 1
 306                 self.passed = True
 307                 self.identifier = None
 308                 self.text = None
 309                 self.unicodes = None
 310                 self.glyphs = None
 311                 for l in lines:
 312                         symbol = l[0]
 313                         if symbol != ' ':
 314                                 self.passed = False
 315                         i = 1
 316                         if ':' in l:
 317                                 i = l.index (':')
 318                                 if not self.identifier:
 319                                         self.identifier = l[1:i]
 320                                 i = i + 2 # Skip colon and space
 321                         j = -1
 322                         if l[j] == '\n':
 323                                 j -= 1
 324                         brackets = l[i] + l[j]
 325                         l = l[i+1:-2]
 326                         if brackets == '()':
 327                                 self.text = l
 328                         elif brackets == '<>':
 329                                 self.unicodes = Unicode.parse (l)
 330                         elif brackets == '[]':
 331                                 # XXX we don't handle failed tests here
 332                                 self.glyphs = l
 333
 334
 335 class DiffHelpers:
 336
 337         @staticmethod
 338         def separate_test_cases (f):
 339                 '''Reads lines from f, and if the lines have identifiers, ie.
 340                    have a colon character, groups them by identifier,
 341                    yielding lists of all lines with the same identifier.'''
 342
 343                 def identifier (l):
 344                         if ':' in l[1:]:
 345                                 return l[1:l.index (':')]
 346                         return l
 347                 return groupby (f, key=identifier)
 348
 349         @staticmethod
 350         def test_passed (lines):
 351                 lines = list (lines)
 352                 # XXX This is a hack, but does the job for now.
 353                 if any (l.find("space+0|space+0") >= 0 for l in lines if l[0] == '+'): return True
 354                 if any (l.find("uni25CC") >= 0 for l in lines if l[0] == '+'): return True
 355                 if any (l.find("dottedcircle") >= 0 for l in lines if l[0] == '+'): return True
 356                 if any (l.find("glyph0") >= 0 for l in lines if l[0] == '+'): return True
 357                 if any (l.find("gid0") >= 0 for l in lines if l[0] == '+'): return True
 358                 if any (l.find("notdef") >= 0 for l in lines if l[0] == '+'): return True
 359                 return all (l[0] == ' ' for l in lines)
 360
 361
 362 class FilterHelpers:
 363
 364         @staticmethod
 365         def filter_printer_function (filter_callback):
 366                 def printer (f):
 367                         for line in filter_callback (f):
 368                                 print (line)
 369                 return printer
 370
 371         @staticmethod
 372         def filter_printer_function_no_newline (filter_callback):
 373                 def printer (f):
 374                         for line in filter_callback (f):
 375                                 sys.stdout.writelines ([line])
 376                 return printer
 377
 378
 379 class Ngram:
 380
 381         @staticmethod
 382         def generator (n):
 383
 384                 def gen (f):
 385                         l = []
 386                         for x in f:
 387                                 l.append (x)
 388                                 if len (l) == n:
 389                                         yield tuple (l)
 390                                         l[:1] = []
 391
 392                 gen.n = n
 393                 return gen
 394
 395
 396 class UtilMains:
 397
 398         @staticmethod
 399         def process_multiple_files (callback, mnemonic = "FILE"):
 400
 401                 if "--help" in sys.argv:
 402                         print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
 403                         sys.exit (1)
 404
 405                 try:
 406                         files = sys.argv[1:] if len (sys.argv) > 1 else ['-']
 407                         for s in files:
 408                                 callback (FileHelpers.open_file_or_stdin (s))
 409                 except IOError as e:
 410                         if e.errno != errno.EPIPE:
 411                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
 412                                 sys.exit (1)
 413
 414         @staticmethod
 415         def process_multiple_args (callback, mnemonic):
 416
 417                 if len (sys.argv) == 1 or "--help" in sys.argv:
 418                         print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
 419                         sys.exit (1)
 420
 421                 try:
 422                         for s in sys.argv[1:]:
 423                                 callback (s)
 424                 except IOError as e:
 425                         if e.errno != errno.EPIPE:
 426                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
 427                                 sys.exit (1)
 428
 429         @staticmethod
 430         def filter_multiple_strings_or_stdin (callback, mnemonic, \
 431                                               separator = " ", \
 432                                               concat_separator = False):
 433
 434                 if "--help" in sys.argv:
 435                         print ("Usage:\n  %s %s...\nor:\n  %s\n\nWhen called with no arguments, input is read from standard input." \
 436                               % (sys.argv[0], mnemonic, sys.argv[0]))
 437                         sys.exit (1)
 438
 439                 try:
 440                         if len (sys.argv) == 1:
 441                                 while (1):
 442                                         line = sys.stdin.readline ()
 443                                         if not len (line):
 444                                                 break
 445                                         if line[-1] == '\n':
 446                                                 line = line[:-1]
 447                                         print (callback (line))
 448                         else:
 449                                 args = sys.argv[1:]
 450                                 if concat_separator != False:
 451                                         args = [concat_separator.join (args)]
 452                                 print (separator.join (callback (x) for x in (args)))
 453                 except IOError as e:
 454                         if e.errno != errno.EPIPE:
 455                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
 456                                 sys.exit (1)
 457
 458
 459 class Unicode:
 460
 461         @staticmethod
 462         def decode (s):
 463                 return u','.join ("U+%04X" % cp for cp in codepoints (tounicode (s, 'utf-8')))
 464
 465         @staticmethod
 466         def parse (s):
 467                 s = re.sub (r"0[xX]", " ", s)
 468                 s = re.sub (r"[<+>{},;&#\\xXuUnNiI\n\t]", " ", s)
 469                 return [int (x, 16) for x in s.split ()]
 470
 471         @staticmethod
 472         def encode (s):
 473                 s = u''.join (unichr (x) for x in Unicode.parse (s))
 474                 if sys.version_info[0] == 2: s = s.encode ('utf-8')
 475                 return s
 476
 477         shorthands = {
 478                 "ZERO WIDTH NON-JOINER": "ZWNJ",
 479                 "ZERO WIDTH JOINER": "ZWJ",
 480                 "NARROW NO-BREAK SPACE": "NNBSP",
 481                 "COMBINING GRAPHEME JOINER": "CGJ",
 482                 "LEFT-TO-RIGHT MARK": "LRM",
 483                 "RIGHT-TO-LEFT MARK": "RLM",
 484                 "LEFT-TO-RIGHT EMBEDDING": "LRE",
 485                 "RIGHT-TO-LEFT EMBEDDING": "RLE",
 486                 "POP DIRECTIONAL FORMATTING": "PDF",
 487                 "LEFT-TO-RIGHT OVERRIDE": "LRO",
 488                 "RIGHT-TO-LEFT OVERRIDE": "RLO",
 489         }
 490
 491         @staticmethod
 492         def pretty_name (u):
 493                 try:
 494                         s = unicodedata.name (u)
 495                 except ValueError:
 496                         return "XXX"
 497                 s = re.sub (".* LETTER ", "", s)
 498                 s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s)
 499                 s = re.sub (".* SIGN ", "", s)
 500                 s = re.sub (".* COMBINING ", "", s)
 501                 if re.match (".* VIRAMA", s):
 502                         s = "HALANT"
 503                 if s in Unicode.shorthands:
 504                         s = Unicode.shorthands[s]
 505                 return s
 506
 507         @staticmethod
 508         def pretty_names (s):
 509                 s = re.sub (r"[<+>\\uU]", " ", s)
 510                 s = re.sub (r"0[xX]", " ", s)
 511                 s = [unichr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
 512                 return u' + '.join (Unicode.pretty_name (x) for x in s).encode ('utf-8')
 513
 514
 515 class FileHelpers:
 516
 517         @staticmethod
 518         def open_file_or_stdin (f):
 519                 if f == '-':
 520                         return sys.stdin
 521                 return open (f)
 522
 523
 524 class Manifest:
 525
 526         @staticmethod
 527         def read (s, strict = True):
 528
 529                 if not os.path.exists (s):
 530                         if strict:
 531                                 print ("%s: %s does not exist" % (sys.argv[0], s), file=sys.stderr)
 532                                 sys.exit (1)
 533                         return
 534
 535                 s = os.path.normpath (s)
 536
 537                 if os.path.isdir (s):
 538
 539                         try:
 540                                 m = open (os.path.join (s, "MANIFEST"))
 541                                 items = [x.strip () for x in m.readlines ()]
 542                                 for f in items:
 543                                         for p in Manifest.read (os.path.join (s, f)):
 544                                                 yield p
 545                         except IOError:
 546                                 if strict:
 547                                         print ("%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST")), file=sys.stderr)
 548                                         sys.exit (1)
 549                                 return
 550                 else:
 551                         yield s
 552
 553         @staticmethod
 554         def update_recursive (s):
 555
 556                 for dirpath, dirnames, filenames in os.walk (s, followlinks=True):
 557
 558                         for f in ["MANIFEST", "README", "LICENSE", "COPYING", "AUTHORS", "SOURCES", "ChangeLog"]:
 559                                 if f in dirnames:
 560                                         dirnames.remove (f)
 561                                 if f in filenames:
 562                                         filenames.remove (f)
 563                         dirnames.sort ()
 564                         filenames.sort ()
 565                         ms = os.path.join (dirpath, "MANIFEST")
 566                         print ("  GEN    %s" % ms)
 567                         m = open (ms, "w")
 568                         for f in filenames:
 569                                 print (f, file=m)
 570                         for f in dirnames:
 571                                 print (f, file=m)
 572                         for f in dirnames:
 573                                 Manifest.update_recursive (os.path.join (dirpath, f))
 574
 575 if __name__ == '__main__':
 576         pass