test/shaping/hb_test_tools.py

   1 #!/usr/bin/env python
   2
   3 from __future__ import print_function
   4 import sys, os, re, difflib, unicodedata, errno, cgi
   5 from itertools import *
   6
   7 diff_symbols = "-+=*&^%$#@!~/"
   8 diff_colors = ['red', 'green', 'blue']
   9
  10 if sys.version_info[0] >= 3:
  11         unichr = chr
  12
  13 class ColorFormatter:
  14
  15         class Null:
  16                 @staticmethod
  17                 def start_color (c): return ''
  18                 @staticmethod
  19                 def end_color (): return ''
  20                 @staticmethod
  21                 def escape (s): return s
  22                 @staticmethod
  23                 def newline (): return '\n'
  24
  25         class ANSI:
  26                 @staticmethod
  27                 def start_color (c):
  28                         return {
  29                                 'red': '\033[41;37;1m',
  30                                 'green': '\033[42;37;1m',
  31                                 'blue': '\033[44;37;1m',
  32                         }[c]
  33                 @staticmethod
  34                 def end_color ():
  35                         return '\033[m'
  36                 @staticmethod
  37                 def escape (s): return s
  38                 @staticmethod
  39                 def newline (): return '\n'
  40
  41         class HTML:
  42                 @staticmethod
  43                 def start_color (c):
  44                         return '<span style="background:%s">' % c
  45                 @staticmethod
  46                 def end_color ():
  47                         return '</span>'
  48                 @staticmethod
  49                 def escape (s): return cgi.escape (s)
  50                 @staticmethod
  51                 def newline (): return '<br/>\n'
  52
  53         @staticmethod
  54         def Auto (argv = [], out = sys.stdout):
  55                 format = ColorFormatter.ANSI
  56                 if "--format" in argv:
  57                         argv.remove ("--format")
  58                         format = ColorFormatter.ANSI
  59                 if "--format=ansi" in argv:
  60                         argv.remove ("--format=ansi")
  61                         format = ColorFormatter.ANSI
  62                 if "--format=html" in argv:
  63                         argv.remove ("--format=html")
  64                         format = ColorFormatter.HTML
  65                 if "--no-format" in argv:
  66                         argv.remove ("--no-format")
  67                         format = ColorFormatter.Null
  68                 return format
  69
  70
  71 class DiffColorizer:
  72
  73         diff_regex = re.compile ('([a-za-z0-9_]*)([^a-za-z0-9_]?)')
  74
  75         def __init__ (self, formatter, colors=diff_colors, symbols=diff_symbols):
  76                 self.formatter = formatter
  77                 self.colors = colors
  78                 self.symbols = symbols
  79
  80         def colorize_lines (self, lines):
  81                 lines = (l if l else '' for l in lines)
  82                 ss = [self.diff_regex.sub (r'\1\n\2\n', l).splitlines (True) for l in lines]
  83                 oo = ["",""]
  84                 st = [False, False]
  85                 for l in difflib.Differ().compare (*ss):
  86                         if l[0] == '?':
  87                                 continue
  88                         if l[0] == ' ':
  89                                 for i in range(2):
  90                                         if st[i]:
  91                                                 oo[i] += self.formatter.end_color ()
  92                                                 st[i] = False
  93                                 oo = [o + self.formatter.escape (l[2:]) for o in oo]
  94                                 continue
  95                         if l[0] in self.symbols:
  96                                 i = self.symbols.index (l[0])
  97                                 if not st[i]:
  98                                         oo[i] += self.formatter.start_color (self.colors[i])
  99                                         st[i] = True
 100                                 oo[i] += self.formatter.escape (l[2:])
 101                                 continue
 102                 for i in range(2):
 103                         if st[i]:
 104                                 oo[i] += self.formatter.end_color ()
 105                                 st[i] = False
 106                 oo = [o.replace ('\n', '') for o in oo]
 107                 return [s1+s2+self.formatter.newline () for (s1,s2) in zip (self.symbols, oo) if s2]
 108
 109         def colorize_diff (self, f):
 110                 lines = [None, None]
 111                 for l in f:
 112                         if l[0] not in self.symbols:
 113                                 yield self.formatter.escape (l).replace ('\n', self.formatter.newline ())
 114                                 continue
 115                         i = self.symbols.index (l[0])
 116                         if lines[i]:
 117                                 # Flush
 118                                 for line in self.colorize_lines (lines):
 119                                         yield line
 120                                 lines = [None, None]
 121                         lines[i] = l[1:]
 122                         if (all (lines)):
 123                                 # Flush
 124                                 for line in self.colorize_lines (lines):
 125                                         yield line
 126                                 lines = [None, None]
 127                 if (any (lines)):
 128                         # Flush
 129                         for line in self.colorize_lines (lines):
 130                                 yield line
 131
 132
 133 class ZipDiffer:
 134
 135         @staticmethod
 136         def diff_files (files, symbols=diff_symbols):
 137                 files = tuple (files) # in case it's a generator, copy it
 138                 try:
 139                         for lines in izip_longest (*files):
 140                                 if all (lines[0] == line for line in lines[1:]):
 141                                         sys.stdout.writelines ([" ", lines[0]])
 142                                         continue
 143
 144                                 for i, l in enumerate (lines):
 145                                         if l:
 146                                                 sys.stdout.writelines ([symbols[i], l])
 147                 except IOError as e:
 148                         if e.errno != errno.EPIPE:
 149                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
 150                                 sys.exit (1)
 151
 152
 153 class DiffFilters:
 154
 155         @staticmethod
 156         def filter_failures (f):
 157                 for key, lines in DiffHelpers.separate_test_cases (f):
 158                         lines = list (lines)
 159                         if not DiffHelpers.test_passed (lines):
 160                                 for l in lines: yield l
 161
 162 class Stat:
 163
 164         def __init__ (self):
 165                 self.count = 0
 166                 self.freq = 0
 167
 168         def add (self, test):
 169                 self.count += 1
 170                 self.freq += test.freq
 171
 172 class Stats:
 173
 174         def __init__ (self):
 175                 self.passed = Stat ()
 176                 self.failed = Stat ()
 177                 self.total  = Stat ()
 178
 179         def add (self, test):
 180                 self.total.add (test)
 181                 if test.passed:
 182                         self.passed.add (test)
 183                 else:
 184                         self.failed.add (test)
 185
 186         def mean (self):
 187                 return float (self.passed.count) / self.total.count
 188
 189         def variance (self):
 190                 return (float (self.passed.count) / self.total.count) * \
 191                        (float (self.failed.count) / self.total.count)
 192
 193         def stddev (self):
 194                 return self.variance () ** .5
 195
 196         def zscore (self, population):
 197                 """Calculate the standard score.
 198                    Population is the Stats for population.
 199                    Self is Stats for sample.
 200                    Returns larger absolute value if sample is highly unlikely to be random.
 201                    Anything outside of -3..+3 is very unlikely to be random.
 202                    See: http://en.wikipedia.org/wiki/Standard_score"""
 203
 204                 return (self.mean () - population.mean ()) / population.stddev ()
 205
 206
 207
 208
 209 class DiffSinks:
 210
 211         @staticmethod
 212         def print_stat (f):
 213                 passed = 0
 214                 failed = 0
 215                 # XXX port to Stats, but that would really slow us down here
 216                 for key, lines in DiffHelpers.separate_test_cases (f):
 217                         if DiffHelpers.test_passed (lines):
 218                                 passed += 1
 219                         else:
 220                                 failed += 1
 221                 total = passed + failed
 222                 print ("%d out of %d tests passed.  %d failed (%g%%)" % (passed, total, failed, 100. * failed / total))
 223
 224         @staticmethod
 225         def print_ngrams (f, ns=(1,2,3)):
 226                 gens = tuple (Ngram.generator (n) for n in ns)
 227                 allstats = Stats ()
 228                 allgrams = {}
 229                 for key, lines in DiffHelpers.separate_test_cases (f):
 230                         test = Test (lines)
 231                         allstats.add (test)
 232
 233                         for gen in gens:
 234                                 for ngram in gen (test.unicodes):
 235                                         if ngram not in allgrams:
 236                                                 allgrams[ngram] = Stats ()
 237                                         allgrams[ngram].add (test)
 238
 239                 importantgrams = {}
 240                 for ngram, stats in allgrams.iteritems ():
 241                         if stats.failed.count >= 30: # for statistical reasons
 242                                 importantgrams[ngram] = stats
 243                 allgrams = importantgrams
 244                 del importantgrams
 245
 246                 for ngram, stats in allgrams.iteritems ():
 247                         print ("zscore: %9f failed: %6d passed: %6d ngram: <%s>" % (stats.zscore (allstats), stats.failed.count, stats.passed.count, ','.join ("U+%04X" % u for u in ngram)))
 248
 249
 250
 251 class Test:
 252
 253         def __init__ (self, lines):
 254                 self.freq = 1
 255                 self.passed = True
 256                 self.identifier = None
 257                 self.text = None
 258                 self.unicodes = None
 259                 self.glyphs = None
 260                 for l in lines:
 261                         symbol = l[0]
 262                         if symbol != ' ':
 263                                 self.passed = False
 264                         i = 1
 265                         if ':' in l:
 266                                 i = l.index (':')
 267                                 if not self.identifier:
 268                                         self.identifier = l[1:i]
 269                                 i = i + 2 # Skip colon and space
 270                         j = -1
 271                         if l[j] == '\n':
 272                                 j -= 1
 273                         brackets = l[i] + l[j]
 274                         l = l[i+1:-2]
 275                         if brackets == '()':
 276                                 self.text = l
 277                         elif brackets == '<>':
 278                                 self.unicodes = Unicode.parse (l)
 279                         elif brackets == '[]':
 280                                 # XXX we don't handle failed tests here
 281                                 self.glyphs = l
 282
 283
 284 class DiffHelpers:
 285
 286         @staticmethod
 287         def separate_test_cases (f):
 288                 '''Reads lines from f, and if the lines have identifiers, ie.
 289                    have a colon character, groups them by identifier,
 290                    yielding lists of all lines with the same identifier.'''
 291
 292                 def identifier (l):
 293                         if ':' in l[1:]:
 294                                 return l[1:l.index (':')]
 295                         return l
 296                 return groupby (f, key=identifier)
 297
 298         @staticmethod
 299         def test_passed (lines):
 300                 lines = list (lines)
 301                 # XXX This is a hack, but does the job for now.
 302                 if any (l.find("space+0|space+0") >= 0 for l in lines if l[0] == '+'): return True
 303                 if any (l.find("uni25CC") >= 0 for l in lines if l[0] == '+'): return True
 304                 if any (l.find("dottedcircle") >= 0 for l in lines if l[0] == '+'): return True
 305                 if any (l.find("glyph0") >= 0 for l in lines if l[0] == '+'): return True
 306                 if any (l.find("gid0") >= 0 for l in lines if l[0] == '+'): return True
 307                 if any (l.find("notdef") >= 0 for l in lines if l[0] == '+'): return True
 308                 return all (l[0] == ' ' for l in lines)
 309
 310
 311 class FilterHelpers:
 312
 313         @staticmethod
 314         def filter_printer_function (filter_callback):
 315                 def printer (f):
 316                         for line in filter_callback (f):
 317                                 print (line)
 318                 return printer
 319
 320         @staticmethod
 321         def filter_printer_function_no_newline (filter_callback):
 322                 def printer (f):
 323                         for line in filter_callback (f):
 324                                 sys.stdout.writelines ([line])
 325                 return printer
 326
 327
 328 class Ngram:
 329
 330         @staticmethod
 331         def generator (n):
 332
 333                 def gen (f):
 334                         l = []
 335                         for x in f:
 336                                 l.append (x)
 337                                 if len (l) == n:
 338                                         yield tuple (l)
 339                                         l[:1] = []
 340
 341                 gen.n = n
 342                 return gen
 343
 344
 345 class UtilMains:
 346
 347         @staticmethod
 348         def process_multiple_files (callback, mnemonic = "FILE"):
 349
 350                 if "--help" in sys.argv:
 351                         print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
 352                         sys.exit (1)
 353
 354                 try:
 355                         files = sys.argv[1:] if len (sys.argv) > 1 else ['-']
 356                         for s in files:
 357                                 callback (FileHelpers.open_file_or_stdin (s))
 358                 except IOError as e:
 359                         if e.errno != errno.EPIPE:
 360                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
 361                                 sys.exit (1)
 362
 363         @staticmethod
 364         def process_multiple_args (callback, mnemonic):
 365
 366                 if len (sys.argv) == 1 or "--help" in sys.argv:
 367                         print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
 368                         sys.exit (1)
 369
 370                 try:
 371                         for s in sys.argv[1:]:
 372                                 callback (s)
 373                 except IOError as e:
 374                         if e.errno != errno.EPIPE:
 375                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
 376                                 sys.exit (1)
 377
 378         @staticmethod
 379         def filter_multiple_strings_or_stdin (callback, mnemonic, \
 380                                               separator = " ", \
 381                                               concat_separator = False):
 382
 383                 if "--help" in sys.argv:
 384                         print ("Usage:\n  %s %s...\nor:\n  %s\n\nWhen called with no arguments, input is read from standard input." \
 385                               % (sys.argv[0], mnemonic, sys.argv[0]))
 386                         sys.exit (1)
 387
 388                 try:
 389                         if len (sys.argv) == 1:
 390                                 while (1):
 391                                         line = sys.stdin.readline ()
 392                                         if not len (line):
 393                                                 break
 394                                         if line[-1] == '\n':
 395                                                 line = line[:-1]
 396                                         print (callback (line))
 397                         else:
 398                                 args = sys.argv[1:]
 399                                 if concat_separator != False:
 400                                         args = [concat_separator.join (args)]
 401                                 print (separator.join (callback (x) for x in (args)))
 402                 except IOError as e:
 403                         if e.errno != errno.EPIPE:
 404                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
 405                                 sys.exit (1)
 406
 407
 408 class Unicode:
 409
 410         @staticmethod
 411         def decode (s):
 412                 return u','.join ("U+%04X" % ord (u) for u in unicode (s, 'utf-8')).encode ('utf-8')
 413
 414         @staticmethod
 415         def parse (s):
 416                 s = re.sub (r"0[xX]", " ", s)
 417                 s = re.sub (r"[<+>{},;&#\\xXuUnNiI\n    ]", " ", s)
 418                 return [int (x, 16) for x in s.split ()]
 419
 420         @staticmethod
 421         def encode (s):
 422                 s = u''.join (unichr (x) for x in Unicode.parse (s))
 423                 if sys.version_info[0] == 2: s = s.encode ('utf-8')
 424                 return s
 425
 426         shorthands = {
 427                 "ZERO WIDTH NON-JOINER": "ZWNJ",
 428                 "ZERO WIDTH JOINER": "ZWJ",
 429                 "NARROW NO-BREAK SPACE": "NNBSP",
 430                 "COMBINING GRAPHEME JOINER": "CGJ",
 431                 "LEFT-TO-RIGHT MARK": "LRM",
 432                 "RIGHT-TO-LEFT MARK": "RLM",
 433                 "LEFT-TO-RIGHT EMBEDDING": "LRE",
 434                 "RIGHT-TO-LEFT EMBEDDING": "RLE",
 435                 "POP DIRECTIONAL FORMATTING": "PDF",
 436                 "LEFT-TO-RIGHT OVERRIDE": "LRO",
 437                 "RIGHT-TO-LEFT OVERRIDE": "RLO",
 438         }
 439
 440         @staticmethod
 441         def pretty_name (u):
 442                 try:
 443                         s = unicodedata.name (u)
 444                 except ValueError:
 445                         return "XXX"
 446                 s = re.sub (".* LETTER ", "", s)
 447                 s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s)
 448                 s = re.sub (".* SIGN ", "", s)
 449                 s = re.sub (".* COMBINING ", "", s)
 450                 if re.match (".* VIRAMA", s):
 451                         s = "HALANT"
 452                 if s in Unicode.shorthands:
 453                         s = Unicode.shorthands[s]
 454                 return s
 455
 456         @staticmethod
 457         def pretty_names (s):
 458                 s = re.sub (r"[<+>\\uU]", " ", s)
 459                 s = re.sub (r"0[xX]", " ", s)
 460                 s = [unichr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
 461                 return u' + '.join (Unicode.pretty_name (x) for x in s).encode ('utf-8')
 462
 463
 464 class FileHelpers:
 465
 466         @staticmethod
 467         def open_file_or_stdin (f):
 468                 if f == '-':
 469                         return sys.stdin
 470                 return file (f)
 471
 472
 473 class Manifest:
 474
 475         @staticmethod
 476         def read (s, strict = True):
 477
 478                 if not os.path.exists (s):
 479                         if strict:
 480                                 print ("%s: %s does not exist" % (sys.argv[0], s), file=sys.stderr)
 481                                 sys.exit (1)
 482                         return
 483
 484                 s = os.path.normpath (s)
 485
 486                 if os.path.isdir (s):
 487
 488                         try:
 489                                 m = file (os.path.join (s, "MANIFEST"))
 490                                 items = [x.strip () for x in m.readlines ()]
 491                                 for f in items:
 492                                         for p in Manifest.read (os.path.join (s, f)):
 493                                                 yield p
 494                         except IOError:
 495                                 if strict:
 496                                         print ("%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST")), file=sys.stderr)
 497                                         sys.exit (1)
 498                                 return
 499                 else:
 500                         yield s
 501
 502         @staticmethod
 503         def update_recursive (s):
 504
 505                 for dirpath, dirnames, filenames in os.walk (s, followlinks=True):
 506
 507                         for f in ["MANIFEST", "README", "LICENSE", "COPYING", "AUTHORS", "SOURCES", "ChangeLog"]:
 508                                 if f in dirnames:
 509                                         dirnames.remove (f)
 510                                 if f in filenames:
 511                                         filenames.remove (f)
 512                         dirnames.sort ()
 513                         filenames.sort ()
 514                         ms = os.path.join (dirpath, "MANIFEST")
 515                         print ("  GEN    %s" % ms)
 516                         m = open (ms, "w")
 517                         for f in filenames:
 518                                 print (f, file=m)
 519                         for f in dirnames:
 520                                 print (f, file=m)
 521                         for f in dirnames:
 522                                 Manifest.update_recursive (os.path.join (dirpath, f))
 523
 524 if __name__ == '__main__':
 525         pass