test/shaping/hb_test_tools.py

   1 #!/usr/bin/env python
   2
   3 from __future__ import print_function
   4 import sys, os, re, difflib, unicodedata, errno, cgi
   5 from itertools import *
   6
   7 diff_symbols = "-+=*&^%$#@!~/"
   8 diff_colors = ['red', 'green', 'blue']
   9
  10 def codepoints(s):
  11         return (ord (u) for u in s)
  12
  13 try:
  14         unichr = unichr
  15
  16         if sys.maxunicode < 0x10FFFF:
  17                 # workarounds for Python 2 "narrow" builds with UCS2-only support.
  18
  19                 _narrow_unichr = unichr
  20
  21                 def unichr(i):
  22                         """
  23                         Return the unicode character whose Unicode code is the integer 'i'.
  24                         The valid range is 0 to 0x10FFFF inclusive.
  25
  26                         >>> _narrow_unichr(0xFFFF + 1)
  27                         Traceback (most recent call last):
  28                           File "<stdin>", line 1, in ?
  29                         ValueError: unichr() arg not in range(0x10000) (narrow Python build)
  30                         >>> unichr(0xFFFF + 1) == u'\U00010000'
  31                         True
  32                         >>> unichr(1114111) == u'\U0010FFFF'
  33                         True
  34                         >>> unichr(0x10FFFF + 1)
  35                         Traceback (most recent call last):
  36                           File "<stdin>", line 1, in ?
  37                         ValueError: unichr() arg not in range(0x110000)
  38                         """
  39                         try:
  40                                 return _narrow_unichr(i)
  41                         except ValueError:
  42                                 try:
  43                                         padded_hex_str = hex(i)[2:].zfill(8)
  44                                         escape_str = "\\U" + padded_hex_str
  45                                         return escape_str.decode("unicode-escape")
  46                                 except UnicodeDecodeError:
  47                                         raise ValueError('unichr() arg not in range(0x110000)')
  48
  49                 def codepoints(s):
  50                         high_surrogate = None
  51                         for u in s:
  52                                 cp = ord (u)
  53                                 if 0xDC00 <= cp <= 0xDFFF:
  54                                         if high_surrogate:
  55                                                 yield 0x10000 + (high_surrogate - 0xD800) * 0x400 + (cp - 0xDC00)
  56                                                 high_surrogate = None
  57                                         else:
  58                                                 yield 0xFFFC
  59                                 else:
  60                                         if high_surrogate:
  61                                                 yield 0xFFFC
  62                                                 high_surrogate = None
  63                                         if 0xD800 <= cp <= 0xDBFF:
  64                                                 high_surrogate = cp
  65                                         else:
  66                                                 yield cp
  67                                                 high_surrogate = None
  68                         if high_surrogate:
  69                                 yield 0xFFFC
  70
  71 except NameError:
  72         unichr = chr
  73
  74 try:
  75         unicode = unicode
  76 except NameError:
  77         unicode = str
  78
  79 def tounicode(s, encoding='ascii', errors='strict'):
  80         if not isinstance(s, unicode):
  81                 return s.decode(encoding, errors)
  82         else:
  83                 return s
  84
  85 class ColorFormatter:
  86
  87         class Null:
  88                 @staticmethod
  89                 def start_color (c): return ''
  90                 @staticmethod
  91                 def end_color (): return ''
  92                 @staticmethod
  93                 def escape (s): return s
  94                 @staticmethod
  95                 def newline (): return '\n'
  96
  97         class ANSI:
  98                 @staticmethod
  99                 def start_color (c):
 100                         return {
 101                                 'red': '\033[41;37;1m',
 102                                 'green': '\033[42;37;1m',
 103                                 'blue': '\033[44;37;1m',
 104                         }[c]
 105                 @staticmethod
 106                 def end_color ():
 107                         return '\033[m'
 108                 @staticmethod
 109                 def escape (s): return s
 110                 @staticmethod
 111                 def newline (): return '\n'
 112
 113         class HTML:
 114                 @staticmethod
 115                 def start_color (c):
 116                         return '<span style="background:%s">' % c
 117                 @staticmethod
 118                 def end_color ():
 119                         return '</span>'
 120                 @staticmethod
 121                 def escape (s): return cgi.escape (s)
 122                 @staticmethod
 123                 def newline (): return '<br/>\n'
 124
 125         @staticmethod
 126         def Auto (argv = [], out = sys.stdout):
 127                 format = ColorFormatter.ANSI
 128                 if "--format" in argv:
 129                         argv.remove ("--format")
 130                         format = ColorFormatter.ANSI
 131                 if "--format=ansi" in argv:
 132                         argv.remove ("--format=ansi")
 133                         format = ColorFormatter.ANSI
 134                 if "--format=html" in argv:
 135                         argv.remove ("--format=html")
 136                         format = ColorFormatter.HTML
 137                 if "--no-format" in argv:
 138                         argv.remove ("--no-format")
 139                         format = ColorFormatter.Null
 140                 return format
 141
 142
 143 class DiffColorizer:
 144
 145         diff_regex = re.compile ('([a-za-z0-9_]*)([^a-za-z0-9_]?)')
 146
 147         def __init__ (self, formatter, colors=diff_colors, symbols=diff_symbols):
 148                 self.formatter = formatter
 149                 self.colors = colors
 150                 self.symbols = symbols
 151
 152         def colorize_lines (self, lines):
 153                 lines = (l if l else '' for l in lines)
 154                 ss = [self.diff_regex.sub (r'\1\n\2\n', l).splitlines (True) for l in lines]
 155                 oo = ["",""]
 156                 st = [False, False]
 157                 for l in difflib.Differ().compare (*ss):
 158                         if l[0] == '?':
 159                                 continue
 160                         if l[0] == ' ':
 161                                 for i in range(2):
 162                                         if st[i]:
 163                                                 oo[i] += self.formatter.end_color ()
 164                                                 st[i] = False
 165                                 oo = [o + self.formatter.escape (l[2:]) for o in oo]
 166                                 continue
 167                         if l[0] in self.symbols:
 168                                 i = self.symbols.index (l[0])
 169                                 if not st[i]:
 170                                         oo[i] += self.formatter.start_color (self.colors[i])
 171                                         st[i] = True
 172                                 oo[i] += self.formatter.escape (l[2:])
 173                                 continue
 174                 for i in range(2):
 175                         if st[i]:
 176                                 oo[i] += self.formatter.end_color ()
 177                                 st[i] = False
 178                 oo = [o.replace ('\n', '') for o in oo]
 179                 return [s1+s2+self.formatter.newline () for (s1,s2) in zip (self.symbols, oo) if s2]
 180
 181         def colorize_diff (self, f):
 182                 lines = [None, None]
 183                 for l in f:
 184                         if l[0] not in self.symbols:
 185                                 yield self.formatter.escape (l).replace ('\n', self.formatter.newline ())
 186                                 continue
 187                         i = self.symbols.index (l[0])
 188                         if lines[i]:
 189                                 # Flush
 190                                 for line in self.colorize_lines (lines):
 191                                         yield line
 192                                 lines = [None, None]
 193                         lines[i] = l[1:]
 194                         if (all (lines)):
 195                                 # Flush
 196                                 for line in self.colorize_lines (lines):
 197                                         yield line
 198                                 lines = [None, None]
 199                 if (any (lines)):
 200                         # Flush
 201                         for line in self.colorize_lines (lines):
 202                                 yield line
 203
 204
 205 class ZipDiffer:
 206
 207         @staticmethod
 208         def diff_files (files, symbols=diff_symbols):
 209                 files = tuple (files) # in case it's a generator, copy it
 210                 try:
 211                         for lines in izip_longest (*files):
 212                                 if all (lines[0] == line for line in lines[1:]):
 213                                         sys.stdout.writelines ([" ", lines[0]])
 214                                         continue
 215
 216                                 for i, l in enumerate (lines):
 217                                         if l:
 218                                                 sys.stdout.writelines ([symbols[i], l])
 219                 except IOError as e:
 220                         if e.errno != errno.EPIPE:
 221                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
 222                                 sys.exit (1)
 223
 224
 225 class DiffFilters:
 226
 227         @staticmethod
 228         def filter_failures (f):
 229                 for key, lines in DiffHelpers.separate_test_cases (f):
 230                         lines = list (lines)
 231                         if not DiffHelpers.test_passed (lines):
 232                                 for l in lines: yield l
 233
 234 class Stat:
 235
 236         def __init__ (self):
 237                 self.count = 0
 238                 self.freq = 0
 239
 240         def add (self, test):
 241                 self.count += 1
 242                 self.freq += test.freq
 243
 244 class Stats:
 245
 246         def __init__ (self):
 247                 self.passed = Stat ()
 248                 self.failed = Stat ()
 249                 self.total  = Stat ()
 250
 251         def add (self, test):
 252                 self.total.add (test)
 253                 if test.passed:
 254                         self.passed.add (test)
 255                 else:
 256                         self.failed.add (test)
 257
 258         def mean (self):
 259                 return float (self.passed.count) / self.total.count
 260
 261         def variance (self):
 262                 return (float (self.passed.count) / self.total.count) * \
 263                        (float (self.failed.count) / self.total.count)
 264
 265         def stddev (self):
 266                 return self.variance () ** .5
 267
 268         def zscore (self, population):
 269                 """Calculate the standard score.
 270                    Population is the Stats for population.
 271                    Self is Stats for sample.
 272                    Returns larger absolute value if sample is highly unlikely to be random.
 273                    Anything outside of -3..+3 is very unlikely to be random.
 274                    See: http://en.wikipedia.org/wiki/Standard_score"""
 275
 276                 return (self.mean () - population.mean ()) / population.stddev ()
 277
 278
 279
 280
 281 class DiffSinks:
 282
 283         @staticmethod
 284         def print_stat (f):
 285                 passed = 0
 286                 failed = 0
 287                 # XXX port to Stats, but that would really slow us down here
 288                 for key, lines in DiffHelpers.separate_test_cases (f):
 289                         if DiffHelpers.test_passed (lines):
 290                                 passed += 1
 291                         else:
 292                                 failed += 1
 293                 total = passed + failed
 294                 print ("%d out of %d tests passed.  %d failed (%g%%)" % (passed, total, failed, 100. * failed / total))
 295
 296
 297 class Test:
 298
 299         def __init__ (self, lines):
 300                 self.freq = 1
 301                 self.passed = True
 302                 self.identifier = None
 303                 self.text = None
 304                 self.unicodes = None
 305                 self.glyphs = None
 306                 for l in lines:
 307                         symbol = l[0]
 308                         if symbol != ' ':
 309                                 self.passed = False
 310                         i = 1
 311                         if ':' in l:
 312                                 i = l.index (':')
 313                                 if not self.identifier:
 314                                         self.identifier = l[1:i]
 315                                 i = i + 2 # Skip colon and space
 316                         j = -1
 317                         if l[j] == '\n':
 318                                 j -= 1
 319                         brackets = l[i] + l[j]
 320                         l = l[i+1:-2]
 321                         if brackets == '()':
 322                                 self.text = l
 323                         elif brackets == '<>':
 324                                 self.unicodes = Unicode.parse (l)
 325                         elif brackets == '[]':
 326                                 # XXX we don't handle failed tests here
 327                                 self.glyphs = l
 328
 329
 330 class DiffHelpers:
 331
 332         @staticmethod
 333         def separate_test_cases (f):
 334                 '''Reads lines from f, and if the lines have identifiers, ie.
 335                    have a colon character, groups them by identifier,
 336                    yielding lists of all lines with the same identifier.'''
 337
 338                 def identifier (l):
 339                         if ':' in l[1:]:
 340                                 return l[1:l.index (':')]
 341                         return l
 342                 return groupby (f, key=identifier)
 343
 344         @staticmethod
 345         def test_passed (lines):
 346                 lines = list (lines)
 347                 # XXX This is a hack, but does the job for now.
 348                 if any (l.find("space+0|space+0") >= 0 for l in lines if l[0] == '+'): return True
 349                 if any (l.find("uni25CC") >= 0 for l in lines if l[0] == '+'): return True
 350                 if any (l.find("dottedcircle") >= 0 for l in lines if l[0] == '+'): return True
 351                 if any (l.find("glyph0") >= 0 for l in lines if l[0] == '+'): return True
 352                 if any (l.find("gid0") >= 0 for l in lines if l[0] == '+'): return True
 353                 if any (l.find("notdef") >= 0 for l in lines if l[0] == '+'): return True
 354                 return all (l[0] == ' ' for l in lines)
 355
 356
 357 class FilterHelpers:
 358
 359         @staticmethod
 360         def filter_printer_function (filter_callback):
 361                 def printer (f):
 362                         for line in filter_callback (f):
 363                                 print (line)
 364                 return printer
 365
 366         @staticmethod
 367         def filter_printer_function_no_newline (filter_callback):
 368                 def printer (f):
 369                         for line in filter_callback (f):
 370                                 sys.stdout.writelines ([line])
 371                 return printer
 372
 373
 374 class Ngram:
 375
 376         @staticmethod
 377         def generator (n):
 378
 379                 def gen (f):
 380                         l = []
 381                         for x in f:
 382                                 l.append (x)
 383                                 if len (l) == n:
 384                                         yield tuple (l)
 385                                         l[:1] = []
 386
 387                 gen.n = n
 388                 return gen
 389
 390
 391 class UtilMains:
 392
 393         @staticmethod
 394         def process_multiple_files (callback, mnemonic = "FILE"):
 395
 396                 if "--help" in sys.argv:
 397                         print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
 398                         sys.exit (1)
 399
 400                 try:
 401                         files = sys.argv[1:] if len (sys.argv) > 1 else ['-']
 402                         for s in files:
 403                                 callback (FileHelpers.open_file_or_stdin (s))
 404                 except IOError as e:
 405                         if e.errno != errno.EPIPE:
 406                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
 407                                 sys.exit (1)
 408
 409         @staticmethod
 410         def process_multiple_args (callback, mnemonic):
 411
 412                 if len (sys.argv) == 1 or "--help" in sys.argv:
 413                         print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
 414                         sys.exit (1)
 415
 416                 try:
 417                         for s in sys.argv[1:]:
 418                                 callback (s)
 419                 except IOError as e:
 420                         if e.errno != errno.EPIPE:
 421                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
 422                                 sys.exit (1)
 423
 424         @staticmethod
 425         def filter_multiple_strings_or_stdin (callback, mnemonic, \
 426                                               separator = " ", \
 427                                               concat_separator = False):
 428
 429                 if "--help" in sys.argv:
 430                         print ("Usage:\n  %s %s...\nor:\n  %s\n\nWhen called with no arguments, input is read from standard input." \
 431                               % (sys.argv[0], mnemonic, sys.argv[0]))
 432                         sys.exit (1)
 433
 434                 try:
 435                         if len (sys.argv) == 1:
 436                                 while (1):
 437                                         line = sys.stdin.readline ()
 438                                         if not len (line):
 439                                                 break
 440                                         if line[-1] == '\n':
 441                                                 line = line[:-1]
 442                                         print (callback (line))
 443                         else:
 444                                 args = sys.argv[1:]
 445                                 if concat_separator != False:
 446                                         args = [concat_separator.join (args)]
 447                                 print (separator.join (callback (x) for x in (args)))
 448                 except IOError as e:
 449                         if e.errno != errno.EPIPE:
 450                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
 451                                 sys.exit (1)
 452
 453
 454 class Unicode:
 455
 456         @staticmethod
 457         def decode (s):
 458                 return u','.join ("U+%04X" % cp for cp in codepoints (tounicode (s, 'utf-8')))
 459
 460         @staticmethod
 461         def parse (s):
 462                 s = re.sub (r"0[xX]", " ", s)
 463                 s = re.sub (r"[<+>{},;&#\\xXuUnNiI\n\t]", " ", s)
 464                 return [int (x, 16) for x in s.split ()]
 465
 466         @staticmethod
 467         def encode (s):
 468                 s = u''.join (unichr (x) for x in Unicode.parse (s))
 469                 if sys.version_info[0] == 2: s = s.encode ('utf-8')
 470                 return s
 471
 472         shorthands = {
 473                 "ZERO WIDTH NON-JOINER": "ZWNJ",
 474                 "ZERO WIDTH JOINER": "ZWJ",
 475                 "NARROW NO-BREAK SPACE": "NNBSP",
 476                 "COMBINING GRAPHEME JOINER": "CGJ",
 477                 "LEFT-TO-RIGHT MARK": "LRM",
 478                 "RIGHT-TO-LEFT MARK": "RLM",
 479                 "LEFT-TO-RIGHT EMBEDDING": "LRE",
 480                 "RIGHT-TO-LEFT EMBEDDING": "RLE",
 481                 "POP DIRECTIONAL FORMATTING": "PDF",
 482                 "LEFT-TO-RIGHT OVERRIDE": "LRO",
 483                 "RIGHT-TO-LEFT OVERRIDE": "RLO",
 484         }
 485
 486         @staticmethod
 487         def pretty_name (u):
 488                 try:
 489                         s = unicodedata.name (u)
 490                 except ValueError:
 491                         return "XXX"
 492                 s = re.sub (".* LETTER ", "", s)
 493                 s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s)
 494                 s = re.sub (".* SIGN ", "", s)
 495                 s = re.sub (".* COMBINING ", "", s)
 496                 if re.match (".* VIRAMA", s):
 497                         s = "HALANT"
 498                 if s in Unicode.shorthands:
 499                         s = Unicode.shorthands[s]
 500                 return s
 501
 502         @staticmethod
 503         def pretty_names (s):
 504                 s = re.sub (r"[<+>\\uU]", " ", s)
 505                 s = re.sub (r"0[xX]", " ", s)
 506                 s = [unichr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
 507                 return u' + '.join (Unicode.pretty_name (x) for x in s).encode ('utf-8')
 508
 509
 510 class FileHelpers:
 511
 512         @staticmethod
 513         def open_file_or_stdin (f):
 514                 if f == '-':
 515                         return sys.stdin
 516                 return file (f)
 517
 518
 519 class Manifest:
 520
 521         @staticmethod
 522         def read (s, strict = True):
 523
 524                 if not os.path.exists (s):
 525                         if strict:
 526                                 print ("%s: %s does not exist" % (sys.argv[0], s), file=sys.stderr)
 527                                 sys.exit (1)
 528                         return
 529
 530                 s = os.path.normpath (s)
 531
 532                 if os.path.isdir (s):
 533
 534                         try:
 535                                 m = file (os.path.join (s, "MANIFEST"))
 536                                 items = [x.strip () for x in m.readlines ()]
 537                                 for f in items:
 538                                         for p in Manifest.read (os.path.join (s, f)):
 539                                                 yield p
 540                         except IOError:
 541                                 if strict:
 542                                         print ("%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST")), file=sys.stderr)
 543                                         sys.exit (1)
 544                                 return
 545                 else:
 546                         yield s
 547
 548         @staticmethod
 549         def update_recursive (s):
 550
 551                 for dirpath, dirnames, filenames in os.walk (s, followlinks=True):
 552
 553                         for f in ["MANIFEST", "README", "LICENSE", "COPYING", "AUTHORS", "SOURCES", "ChangeLog"]:
 554                                 if f in dirnames:
 555                                         dirnames.remove (f)
 556                                 if f in filenames:
 557                                         filenames.remove (f)
 558                         dirnames.sort ()
 559                         filenames.sort ()
 560                         ms = os.path.join (dirpath, "MANIFEST")
 561                         print ("  GEN    %s" % ms)
 562                         m = open (ms, "w")
 563                         for f in filenames:
 564                                 print (f, file=m)
 565                         for f in dirnames:
 566                                 print (f, file=m)
 567                         for f in dirnames:
 568                                 Manifest.update_recursive (os.path.join (dirpath, f))
 569
 570 if __name__ == '__main__':
 571         pass