test/shaping/hb_test_tools.py

   1 #!/usr/bin/env python
   2
   3 from __future__ import print_function, division, absolute_import
   4
   5 import sys, os, re, difflib, unicodedata, errno, cgi
   6 from itertools import *
   7
   8 diff_symbols = "-+=*&^%$#@!~/"
   9 diff_colors = ['red', 'green', 'blue']
  10
  11 def codepoints(s):
  12         return (ord (u) for u in s)
  13
  14 try:
  15         unichr = unichr
  16
  17         if sys.maxunicode < 0x10FFFF:
  18                 # workarounds for Python 2 "narrow" builds with UCS2-only support.
  19
  20                 _narrow_unichr = unichr
  21
  22                 def unichr(i):
  23                         """
  24                         Return the unicode character whose Unicode code is the integer 'i'.
  25                         The valid range is 0 to 0x10FFFF inclusive.
  26
  27                         >>> _narrow_unichr(0xFFFF + 1)
  28                         Traceback (most recent call last):
  29                           File "<stdin>", line 1, in ?
  30                         ValueError: unichr() arg not in range(0x10000) (narrow Python build)
  31                         >>> unichr(0xFFFF + 1) == u'\U00010000'
  32                         True
  33                         >>> unichr(1114111) == u'\U0010FFFF'
  34                         True
  35                         >>> unichr(0x10FFFF + 1)
  36                         Traceback (most recent call last):
  37                           File "<stdin>", line 1, in ?
  38                         ValueError: unichr() arg not in range(0x110000)
  39                         """
  40                         try:
  41                                 return _narrow_unichr(i)
  42                         except ValueError:
  43                                 try:
  44                                         padded_hex_str = hex(i)[2:].zfill(8)
  45                                         escape_str = "\\U" + padded_hex_str
  46                                         return escape_str.decode("unicode-escape")
  47                                 except UnicodeDecodeError:
  48                                         raise ValueError('unichr() arg not in range(0x110000)')
  49
  50                 def codepoints(s):
  51                         high_surrogate = None
  52                         for u in s:
  53                                 cp = ord (u)
  54                                 if 0xDC00 <= cp <= 0xDFFF:
  55                                         if high_surrogate:
  56                                                 yield 0x10000 + (high_surrogate - 0xD800) * 0x400 + (cp - 0xDC00)
  57                                                 high_surrogate = None
  58                                         else:
  59                                                 yield 0xFFFC
  60                                 else:
  61                                         if high_surrogate:
  62                                                 yield 0xFFFC
  63                                                 high_surrogate = None
  64                                         if 0xD800 <= cp <= 0xDBFF:
  65                                                 high_surrogate = cp
  66                                         else:
  67                                                 yield cp
  68                                                 high_surrogate = None
  69                         if high_surrogate:
  70                                 yield 0xFFFC
  71
  72 except NameError:
  73         unichr = chr
  74
  75 try:
  76         unicode = unicode
  77 except NameError:
  78         unicode = str
  79
  80 def tounicode(s, encoding='ascii', errors='strict'):
  81         if not isinstance(s, unicode):
  82                 return s.decode(encoding, errors)
  83         else:
  84                 return s
  85
  86 class ColorFormatter:
  87
  88         class Null:
  89                 @staticmethod
  90                 def start_color (c): return ''
  91                 @staticmethod
  92                 def end_color (): return ''
  93                 @staticmethod
  94                 def escape (s): return s
  95                 @staticmethod
  96                 def newline (): return '\n'
  97
  98         class ANSI:
  99                 @staticmethod
 100                 def start_color (c):
 101                         return {
 102                                 'red': '\033[41;37;1m',
 103                                 'green': '\033[42;37;1m',
 104                                 'blue': '\033[44;37;1m',
 105                         }[c]
 106                 @staticmethod
 107                 def end_color ():
 108                         return '\033[m'
 109                 @staticmethod
 110                 def escape (s): return s
 111                 @staticmethod
 112                 def newline (): return '\n'
 113
 114         class HTML:
 115                 @staticmethod
 116                 def start_color (c):
 117                         return '<span style="background:%s">' % c
 118                 @staticmethod
 119                 def end_color ():
 120                         return '</span>'
 121                 @staticmethod
 122                 def escape (s): return cgi.escape (s)
 123                 @staticmethod
 124                 def newline (): return '<br/>\n'
 125
 126         @staticmethod
 127         def Auto (argv = [], out = sys.stdout):
 128                 format = ColorFormatter.ANSI
 129                 if "--format" in argv:
 130                         argv.remove ("--format")
 131                         format = ColorFormatter.ANSI
 132                 if "--format=ansi" in argv:
 133                         argv.remove ("--format=ansi")
 134                         format = ColorFormatter.ANSI
 135                 if "--format=html" in argv:
 136                         argv.remove ("--format=html")
 137                         format = ColorFormatter.HTML
 138                 if "--no-format" in argv:
 139                         argv.remove ("--no-format")
 140                         format = ColorFormatter.Null
 141                 return format
 142
 143
 144 class DiffColorizer:
 145
 146         diff_regex = re.compile ('([a-za-z0-9_]*)([^a-za-z0-9_]?)')
 147
 148         def __init__ (self, formatter, colors=diff_colors, symbols=diff_symbols):
 149                 self.formatter = formatter
 150                 self.colors = colors
 151                 self.symbols = symbols
 152
 153         def colorize_lines (self, lines):
 154                 lines = (l if l else '' for l in lines)
 155                 ss = [self.diff_regex.sub (r'\1\n\2\n', l).splitlines (True) for l in lines]
 156                 oo = ["",""]
 157                 st = [False, False]
 158                 for l in difflib.Differ().compare (*ss):
 159                         if l[0] == '?':
 160                                 continue
 161                         if l[0] == ' ':
 162                                 for i in range(2):
 163                                         if st[i]:
 164                                                 oo[i] += self.formatter.end_color ()
 165                                                 st[i] = False
 166                                 oo = [o + self.formatter.escape (l[2:]) for o in oo]
 167                                 continue
 168                         if l[0] in self.symbols:
 169                                 i = self.symbols.index (l[0])
 170                                 if not st[i]:
 171                                         oo[i] += self.formatter.start_color (self.colors[i])
 172                                         st[i] = True
 173                                 oo[i] += self.formatter.escape (l[2:])
 174                                 continue
 175                 for i in range(2):
 176                         if st[i]:
 177                                 oo[i] += self.formatter.end_color ()
 178                                 st[i] = False
 179                 oo = [o.replace ('\n', '') for o in oo]
 180                 return [s1+s2+self.formatter.newline () for (s1,s2) in zip (self.symbols, oo) if s2]
 181
 182         def colorize_diff (self, f):
 183                 lines = [None, None]
 184                 for l in f:
 185                         if l[0] not in self.symbols:
 186                                 yield self.formatter.escape (l).replace ('\n', self.formatter.newline ())
 187                                 continue
 188                         i = self.symbols.index (l[0])
 189                         if lines[i]:
 190                                 # Flush
 191                                 for line in self.colorize_lines (lines):
 192                                         yield line
 193                                 lines = [None, None]
 194                         lines[i] = l[1:]
 195                         if (all (lines)):
 196                                 # Flush
 197                                 for line in self.colorize_lines (lines):
 198                                         yield line
 199                                 lines = [None, None]
 200                 if (any (lines)):
 201                         # Flush
 202                         for line in self.colorize_lines (lines):
 203                                 yield line
 204
 205
 206 class ZipDiffer:
 207
 208         @staticmethod
 209         def diff_files (files, symbols=diff_symbols):
 210                 files = tuple (files) # in case it's a generator, copy it
 211                 try:
 212                         for lines in izip_longest (*files):
 213                                 if all (lines[0] == line for line in lines[1:]):
 214                                         sys.stdout.writelines ([" ", lines[0]])
 215                                         continue
 216
 217                                 for i, l in enumerate (lines):
 218                                         if l:
 219                                                 sys.stdout.writelines ([symbols[i], l])
 220                 except IOError as e:
 221                         if e.errno != errno.EPIPE:
 222                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
 223                                 sys.exit (1)
 224
 225
 226 class DiffFilters:
 227
 228         @staticmethod
 229         def filter_failures (f):
 230                 for key, lines in DiffHelpers.separate_test_cases (f):
 231                         lines = list (lines)
 232                         if not DiffHelpers.test_passed (lines):
 233                                 for l in lines: yield l
 234
 235 class Stat:
 236
 237         def __init__ (self):
 238                 self.count = 0
 239                 self.freq = 0
 240
 241         def add (self, test):
 242                 self.count += 1
 243                 self.freq += test.freq
 244
 245 class Stats:
 246
 247         def __init__ (self):
 248                 self.passed = Stat ()
 249                 self.failed = Stat ()
 250                 self.total  = Stat ()
 251
 252         def add (self, test):
 253                 self.total.add (test)
 254                 if test.passed:
 255                         self.passed.add (test)
 256                 else:
 257                         self.failed.add (test)
 258
 259         def mean (self):
 260                 return float (self.passed.count) / self.total.count
 261
 262         def variance (self):
 263                 return (float (self.passed.count) / self.total.count) * \
 264                        (float (self.failed.count) / self.total.count)
 265
 266         def stddev (self):
 267                 return self.variance () ** .5
 268
 269         def zscore (self, population):
 270                 """Calculate the standard score.
 271                    Population is the Stats for population.
 272                    Self is Stats for sample.
 273                    Returns larger absolute value if sample is highly unlikely to be random.
 274                    Anything outside of -3..+3 is very unlikely to be random.
 275                    See: http://en.wikipedia.org/wiki/Standard_score"""
 276
 277                 return (self.mean () - population.mean ()) / population.stddev ()
 278
 279
 280
 281
 282 class DiffSinks:
 283
 284         @staticmethod
 285         def print_stat (f):
 286                 passed = 0
 287                 failed = 0
 288                 # XXX port to Stats, but that would really slow us down here
 289                 for key, lines in DiffHelpers.separate_test_cases (f):
 290                         if DiffHelpers.test_passed (lines):
 291                                 passed += 1
 292                         else:
 293                                 failed += 1
 294                 total = passed + failed
 295                 print ("%d out of %d tests passed.  %d failed (%g%%)" % (passed, total, failed, 100. * failed / total))
 296
 297
 298 class Test:
 299
 300         def __init__ (self, lines):
 301                 self.freq = 1
 302                 self.passed = True
 303                 self.identifier = None
 304                 self.text = None
 305                 self.unicodes = None
 306                 self.glyphs = None
 307                 for l in lines:
 308                         symbol = l[0]
 309                         if symbol != ' ':
 310                                 self.passed = False
 311                         i = 1
 312                         if ':' in l:
 313                                 i = l.index (':')
 314                                 if not self.identifier:
 315                                         self.identifier = l[1:i]
 316                                 i = i + 2 # Skip colon and space
 317                         j = -1
 318                         if l[j] == '\n':
 319                                 j -= 1
 320                         brackets = l[i] + l[j]
 321                         l = l[i+1:-2]
 322                         if brackets == '()':
 323                                 self.text = l
 324                         elif brackets == '<>':
 325                                 self.unicodes = Unicode.parse (l)
 326                         elif brackets == '[]':
 327                                 # XXX we don't handle failed tests here
 328                                 self.glyphs = l
 329
 330
 331 class DiffHelpers:
 332
 333         @staticmethod
 334         def separate_test_cases (f):
 335                 '''Reads lines from f, and if the lines have identifiers, ie.
 336                    have a colon character, groups them by identifier,
 337                    yielding lists of all lines with the same identifier.'''
 338
 339                 def identifier (l):
 340                         if ':' in l[1:]:
 341                                 return l[1:l.index (':')]
 342                         return l
 343                 return groupby (f, key=identifier)
 344
 345         @staticmethod
 346         def test_passed (lines):
 347                 lines = list (lines)
 348                 # XXX This is a hack, but does the job for now.
 349                 if any (l.find("space+0|space+0") >= 0 for l in lines if l[0] == '+'): return True
 350                 if any (l.find("uni25CC") >= 0 for l in lines if l[0] == '+'): return True
 351                 if any (l.find("dottedcircle") >= 0 for l in lines if l[0] == '+'): return True
 352                 if any (l.find("glyph0") >= 0 for l in lines if l[0] == '+'): return True
 353                 if any (l.find("gid0") >= 0 for l in lines if l[0] == '+'): return True
 354                 if any (l.find("notdef") >= 0 for l in lines if l[0] == '+'): return True
 355                 return all (l[0] == ' ' for l in lines)
 356
 357
 358 class FilterHelpers:
 359
 360         @staticmethod
 361         def filter_printer_function (filter_callback):
 362                 def printer (f):
 363                         for line in filter_callback (f):
 364                                 print (line)
 365                 return printer
 366
 367         @staticmethod
 368         def filter_printer_function_no_newline (filter_callback):
 369                 def printer (f):
 370                         for line in filter_callback (f):
 371                                 sys.stdout.writelines ([line])
 372                 return printer
 373
 374
 375 class Ngram:
 376
 377         @staticmethod
 378         def generator (n):
 379
 380                 def gen (f):
 381                         l = []
 382                         for x in f:
 383                                 l.append (x)
 384                                 if len (l) == n:
 385                                         yield tuple (l)
 386                                         l[:1] = []
 387
 388                 gen.n = n
 389                 return gen
 390
 391
 392 class UtilMains:
 393
 394         @staticmethod
 395         def process_multiple_files (callback, mnemonic = "FILE"):
 396
 397                 if "--help" in sys.argv:
 398                         print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
 399                         sys.exit (1)
 400
 401                 try:
 402                         files = sys.argv[1:] if len (sys.argv) > 1 else ['-']
 403                         for s in files:
 404                                 callback (FileHelpers.open_file_or_stdin (s))
 405                 except IOError as e:
 406                         if e.errno != errno.EPIPE:
 407                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
 408                                 sys.exit (1)
 409
 410         @staticmethod
 411         def process_multiple_args (callback, mnemonic):
 412
 413                 if len (sys.argv) == 1 or "--help" in sys.argv:
 414                         print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
 415                         sys.exit (1)
 416
 417                 try:
 418                         for s in sys.argv[1:]:
 419                                 callback (s)
 420                 except IOError as e:
 421                         if e.errno != errno.EPIPE:
 422                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
 423                                 sys.exit (1)
 424
 425         @staticmethod
 426         def filter_multiple_strings_or_stdin (callback, mnemonic, \
 427                                               separator = " ", \
 428                                               concat_separator = False):
 429
 430                 if "--help" in sys.argv:
 431                         print ("Usage:\n  %s %s...\nor:\n  %s\n\nWhen called with no arguments, input is read from standard input." \
 432                               % (sys.argv[0], mnemonic, sys.argv[0]))
 433                         sys.exit (1)
 434
 435                 try:
 436                         if len (sys.argv) == 1:
 437                                 while (1):
 438                                         line = sys.stdin.readline ()
 439                                         if not len (line):
 440                                                 break
 441                                         if line[-1] == '\n':
 442                                                 line = line[:-1]
 443                                         print (callback (line))
 444                         else:
 445                                 args = sys.argv[1:]
 446                                 if concat_separator != False:
 447                                         args = [concat_separator.join (args)]
 448                                 print (separator.join (callback (x) for x in (args)))
 449                 except IOError as e:
 450                         if e.errno != errno.EPIPE:
 451                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
 452                                 sys.exit (1)
 453
 454
 455 class Unicode:
 456
 457         @staticmethod
 458         def decode (s):
 459                 return u','.join ("U+%04X" % cp for cp in codepoints (tounicode (s, 'utf-8')))
 460
 461         @staticmethod
 462         def parse (s):
 463                 s = re.sub (r"0[xX]", " ", s)
 464                 s = re.sub (r"[<+>{},;&#\\xXuUnNiI\n\t]", " ", s)
 465                 return [int (x, 16) for x in s.split ()]
 466
 467         @staticmethod
 468         def encode (s):
 469                 s = u''.join (unichr (x) for x in Unicode.parse (s))
 470                 if sys.version_info[0] == 2: s = s.encode ('utf-8')
 471                 return s
 472
 473         shorthands = {
 474                 "ZERO WIDTH NON-JOINER": "ZWNJ",
 475                 "ZERO WIDTH JOINER": "ZWJ",
 476                 "NARROW NO-BREAK SPACE": "NNBSP",
 477                 "COMBINING GRAPHEME JOINER": "CGJ",
 478                 "LEFT-TO-RIGHT MARK": "LRM",
 479                 "RIGHT-TO-LEFT MARK": "RLM",
 480                 "LEFT-TO-RIGHT EMBEDDING": "LRE",
 481                 "RIGHT-TO-LEFT EMBEDDING": "RLE",
 482                 "POP DIRECTIONAL FORMATTING": "PDF",
 483                 "LEFT-TO-RIGHT OVERRIDE": "LRO",
 484                 "RIGHT-TO-LEFT OVERRIDE": "RLO",
 485         }
 486
 487         @staticmethod
 488         def pretty_name (u):
 489                 try:
 490                         s = unicodedata.name (u)
 491                 except ValueError:
 492                         return "XXX"
 493                 s = re.sub (".* LETTER ", "", s)
 494                 s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s)
 495                 s = re.sub (".* SIGN ", "", s)
 496                 s = re.sub (".* COMBINING ", "", s)
 497                 if re.match (".* VIRAMA", s):
 498                         s = "HALANT"
 499                 if s in Unicode.shorthands:
 500                         s = Unicode.shorthands[s]
 501                 return s
 502
 503         @staticmethod
 504         def pretty_names (s):
 505                 s = re.sub (r"[<+>\\uU]", " ", s)
 506                 s = re.sub (r"0[xX]", " ", s)
 507                 s = [unichr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
 508                 return u' + '.join (Unicode.pretty_name (x) for x in s).encode ('utf-8')
 509
 510
 511 class FileHelpers:
 512
 513         @staticmethod
 514         def open_file_or_stdin (f):
 515                 if f == '-':
 516                         return sys.stdin
 517                 return open (f)
 518
 519
 520 class Manifest:
 521
 522         @staticmethod
 523         def read (s, strict = True):
 524
 525                 if not os.path.exists (s):
 526                         if strict:
 527                                 print ("%s: %s does not exist" % (sys.argv[0], s), file=sys.stderr)
 528                                 sys.exit (1)
 529                         return
 530
 531                 s = os.path.normpath (s)
 532
 533                 if os.path.isdir (s):
 534
 535                         try:
 536                                 m = open (os.path.join (s, "MANIFEST"))
 537                                 items = [x.strip () for x in m.readlines ()]
 538                                 for f in items:
 539                                         for p in Manifest.read (os.path.join (s, f)):
 540                                                 yield p
 541                         except IOError:
 542                                 if strict:
 543                                         print ("%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST")), file=sys.stderr)
 544                                         sys.exit (1)
 545                                 return
 546                 else:
 547                         yield s
 548
 549         @staticmethod
 550         def update_recursive (s):
 551
 552                 for dirpath, dirnames, filenames in os.walk (s, followlinks=True):
 553
 554                         for f in ["MANIFEST", "README", "LICENSE", "COPYING", "AUTHORS", "SOURCES", "ChangeLog"]:
 555                                 if f in dirnames:
 556                                         dirnames.remove (f)
 557                                 if f in filenames:
 558                                         filenames.remove (f)
 559                         dirnames.sort ()
 560                         filenames.sort ()
 561                         ms = os.path.join (dirpath, "MANIFEST")
 562                         print ("  GEN    %s" % ms)
 563                         m = open (ms, "w")
 564                         for f in filenames:
 565                                 print (f, file=m)
 566                         for f in dirnames:
 567                                 print (f, file=m)
 568                         for f in dirnames:
 569                                 Manifest.update_recursive (os.path.join (dirpath, f))
 570
 571 if __name__ == '__main__':
 572         pass