test/shaping/hb_test_tools.py

   1 #!/usr/bin/python
   2
   3 import sys, os, re, difflib, unicodedata, errno, cgi
   4 from itertools import *
   5
   6 diff_symbols = "-+=*&^%$#@!~/"
   7 diff_colors = ['red', 'green', 'blue']
   8
   9 class ColorFormatter:
  10
  11         class Null:
  12                 @staticmethod
  13                 def start_color (c): return ''
  14                 @staticmethod
  15                 def end_color (): return ''
  16                 @staticmethod
  17                 def escape (s): return s
  18                 @staticmethod
  19                 def newline (): return '\n'
  20
  21         class ANSI:
  22                 @staticmethod
  23                 def start_color (c):
  24                         return {
  25                                 'red': '\033[41;37;1m',
  26                                 'green': '\033[42;37;1m',
  27                                 'blue': '\033[44;37;1m',
  28                         }[c]
  29                 @staticmethod
  30                 def end_color ():
  31                         return '\033[m'
  32                 @staticmethod
  33                 def escape (s): return s
  34                 @staticmethod
  35                 def newline (): return '\n'
  36
  37         class HTML:
  38                 @staticmethod
  39                 def start_color (c):
  40                         return '<span style="background:%s">' % c
  41                 @staticmethod
  42                 def end_color ():
  43                         return '</span>'
  44                 @staticmethod
  45                 def escape (s): return cgi.escape (s)
  46                 @staticmethod
  47                 def newline (): return '<br/>\n'
  48
  49         @staticmethod
  50         def Auto (argv = [], out = sys.stdout):
  51                 format = ColorFormatter.ANSI
  52                 if "--format" in argv:
  53                         argv.remove ("--format")
  54                         format = ColorFormatter.ANSI
  55                 if "--format=ansi" in argv:
  56                         argv.remove ("--format=ansi")
  57                         format = ColorFormatter.ANSI
  58                 if "--format=html" in argv:
  59                         argv.remove ("--format=html")
  60                         format = ColorFormatter.HTML
  61                 if "--no-format" in argv:
  62                         argv.remove ("--no-format")
  63                         format = ColorFormatter.Null
  64                 return format
  65
  66
  67 class DiffColorizer:
  68
  69         diff_regex = re.compile ('([a-za-z0-9_]*)([^a-za-z0-9_]?)')
  70
  71         def __init__ (self, formatter, colors=diff_colors, symbols=diff_symbols):
  72                 self.formatter = formatter
  73                 self.colors = colors
  74                 self.symbols = symbols
  75
  76         def colorize_lines (self, lines):
  77                 lines = (l if l else '' for l in lines)
  78                 ss = [self.diff_regex.sub (r'\1\n\2\n', l).splitlines (True) for l in lines]
  79                 oo = ["",""]
  80                 st = [False, False]
  81                 for l in difflib.Differ().compare (*ss):
  82                         if l[0] == '?':
  83                                 continue
  84                         if l[0] == ' ':
  85                                 for i in range(2):
  86                                         if st[i]:
  87                                                 oo[i] += self.formatter.end_color ()
  88                                                 st[i] = False
  89                                 oo = [o + self.formatter.escape (l[2:]) for o in oo]
  90                                 continue
  91                         if l[0] in self.symbols:
  92                                 i = self.symbols.index (l[0])
  93                                 if not st[i]:
  94                                         oo[i] += self.formatter.start_color (self.colors[i])
  95                                         st[i] = True
  96                                 oo[i] += self.formatter.escape (l[2:])
  97                                 continue
  98                 for i in range(2):
  99                         if st[i]:
 100                                 oo[i] += self.formatter.end_color ()
 101                                 st[i] = False
 102                 oo = [o.replace ('\n', '') for o in oo]
 103                 return [s1+s2+self.formatter.newline () for (s1,s2) in zip (self.symbols, oo) if s2]
 104
 105         def colorize_diff (self, f):
 106                 lines = [None, None]
 107                 for l in f:
 108                         if l[0] not in self.symbols:
 109                                 yield self.formatter.escape (l).replace ('\n', self.formatter.newline ())
 110                                 continue
 111                         i = self.symbols.index (l[0])
 112                         if lines[i]:
 113                                 # Flush
 114                                 for line in self.colorize_lines (lines):
 115                                         yield line
 116                                 lines = [None, None]
 117                         lines[i] = l[1:]
 118                         if (all (lines)):
 119                                 # Flush
 120                                 for line in self.colorize_lines (lines):
 121                                         yield line
 122                                 lines = [None, None]
 123                 if (any (lines)):
 124                         # Flush
 125                         for line in self.colorize_lines (lines):
 126                                 yield line
 127
 128
 129 class ZipDiffer:
 130
 131         @staticmethod
 132         def diff_files (files, symbols=diff_symbols):
 133                 files = tuple (files) # in case it's a generator, copy it
 134                 try:
 135                         for lines in izip_longest (*files):
 136                                 if all (lines[0] == line for line in lines[1:]):
 137                                         sys.stdout.writelines ([" ", lines[0]])
 138                                         continue
 139
 140                                 for i, l in enumerate (lines):
 141                                         if l:
 142                                                 sys.stdout.writelines ([symbols[i], l])
 143                 except IOError as e:
 144                         if e.errno != errno.EPIPE:
 145                                 print >> sys.stderr, "%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)
 146                                 sys.exit (1)
 147
 148
 149 class DiffFilters:
 150
 151         @staticmethod
 152         def filter_failures (f):
 153                 for key, lines in DiffHelpers.separate_test_cases (f):
 154                         lines = list (lines)
 155                         if not DiffHelpers.test_passed (lines):
 156                                 for l in lines: yield l
 157
 158 class Stat:
 159
 160         def __init__ (self):
 161                 self.count = 0
 162                 self.freq = 0
 163
 164         def add (self, test):
 165                 self.count += 1
 166                 self.freq += test.freq
 167
 168 class Stats:
 169
 170         def __init__ (self):
 171                 self.passed = Stat ()
 172                 self.failed = Stat ()
 173                 self.total  = Stat ()
 174
 175         def add (self, test):
 176                 self.total.add (test)
 177                 if test.passed:
 178                         self.passed.add (test)
 179                 else:
 180                         self.failed.add (test)
 181
 182         def mean (self):
 183                 return float (self.passed.count) / self.total.count
 184
 185         def variance (self):
 186                 return (float (self.passed.count) / self.total.count) * \
 187                        (float (self.failed.count) / self.total.count)
 188
 189         def stddev (self):
 190                 return self.variance () ** .5
 191
 192         def zscore (self, population):
 193                 """Calculate the standard score.
 194                    Population is the Stats for population.
 195                    Self is Stats for sample.
 196                    Returns larger absolute value if sample is highly unlikely to be random.
 197                    Anything outside of -3..+3 is very unlikely to be random.
 198                    See: http://en.wikipedia.org/wiki/Standard_score"""
 199
 200                 return (self.mean () - population.mean ()) / population.stddev ()
 201
 202
 203
 204
 205 class DiffSinks:
 206
 207         @staticmethod
 208         def print_stat (f):
 209                 passed = 0
 210                 failed = 0
 211                 # XXX port to Stats, but that would really slow us down here
 212                 for key, lines in DiffHelpers.separate_test_cases (f):
 213                         if DiffHelpers.test_passed (lines):
 214                                 passed += 1
 215                         else:
 216                                 failed += 1
 217                 total = passed + failed
 218                 print "%d out of %d tests passed.  %d failed (%g%%)" % (passed, total, failed, 100. * failed / total)
 219
 220         @staticmethod
 221         def print_ngrams (f, ns=(1,2,3)):
 222                 gens = tuple (Ngram.generator (n) for n in ns)
 223                 allstats = Stats ()
 224                 allgrams = {}
 225                 for key, lines in DiffHelpers.separate_test_cases (f):
 226                         test = Test (lines)
 227                         allstats.add (test)
 228
 229                         for gen in gens:
 230                                 for ngram in gen (test.unicodes):
 231                                         if ngram not in allgrams:
 232                                                 allgrams[ngram] = Stats ()
 233                                         allgrams[ngram].add (test)
 234
 235                 importantgrams = {}
 236                 for ngram, stats in allgrams.iteritems ():
 237                         if stats.failed.count >= 30: # for statistical reasons
 238                                 importantgrams[ngram] = stats
 239                 allgrams = importantgrams
 240                 del importantgrams
 241
 242                 for ngram, stats in allgrams.iteritems ():
 243                         print "zscore: %9f failed: %6d passed: %6d ngram: <%s>" % (stats.zscore (allstats), stats.failed.count, stats.passed.count, ','.join ("U+%04X" % u for u in ngram))
 244
 245
 246
 247 class Test:
 248
 249         def __init__ (self, lines):
 250                 self.freq = 1
 251                 self.passed = True
 252                 self.identifier = None
 253                 self.text = None
 254                 self.unicodes = None
 255                 self.glyphs = None
 256                 for l in lines:
 257                         symbol = l[0]
 258                         if symbol != ' ':
 259                                 self.passed = False
 260                         i = 1
 261                         if ':' in l:
 262                                 i = l.index (':')
 263                                 if not self.identifier:
 264                                         self.identifier = l[1:i]
 265                                 i = i + 2 # Skip colon and space
 266                         j = -1
 267                         if l[j] == '\n':
 268                                 j -= 1
 269                         brackets = l[i] + l[j]
 270                         l = l[i+1:-2]
 271                         if brackets == '()':
 272                                 self.text = l
 273                         elif brackets == '<>':
 274                                 self.unicodes = Unicode.parse (l)
 275                         elif brackets == '[]':
 276                                 # XXX we don't handle failed tests here
 277                                 self.glyphs = l
 278
 279
 280 class DiffHelpers:
 281
 282         @staticmethod
 283         def separate_test_cases (f):
 284                 '''Reads lines from f, and if the lines have identifiers, ie.
 285                    have a colon character, groups them by identifier,
 286                    yielding lists of all lines with the same identifier.'''
 287
 288                 def identifier (l):
 289                         if ':' in l[1:]:
 290                                 return l[1:l.index (':')]
 291                         return l
 292                 return groupby (f, key=identifier)
 293
 294         @staticmethod
 295         def test_passed (lines):
 296                 lines = list (lines)
 297                 # XXX This is a hack, but does the job for now.
 298                 if any (l.find("space+0|space+0") >= 0 for l in lines if l[0] == '+'): return True
 299                 if any (l.find("uni25CC") >= 0 for l in lines if l[0] == '+'): return True
 300                 if any (l.find("dottedcircle") >= 0 for l in lines if l[0] == '+'): return True
 301                 if any (l.find("glyph0") >= 0 for l in lines if l[0] == '+'): return True
 302                 if any (l.find("gid0") >= 0 for l in lines if l[0] == '+'): return True
 303                 if any (l.find("notdef") >= 0 for l in lines if l[0] == '+'): return True
 304                 return all (l[0] == ' ' for l in lines)
 305
 306
 307 class FilterHelpers:
 308
 309         @staticmethod
 310         def filter_printer_function (filter_callback):
 311                 def printer (f):
 312                         for line in filter_callback (f):
 313                                 print line
 314                 return printer
 315
 316         @staticmethod
 317         def filter_printer_function_no_newline (filter_callback):
 318                 def printer (f):
 319                         for line in filter_callback (f):
 320                                 sys.stdout.writelines ([line])
 321                 return printer
 322
 323
 324 class Ngram:
 325
 326         @staticmethod
 327         def generator (n):
 328
 329                 def gen (f):
 330                         l = []
 331                         for x in f:
 332                                 l.append (x)
 333                                 if len (l) == n:
 334                                         yield tuple (l)
 335                                         l[:1] = []
 336
 337                 gen.n = n
 338                 return gen
 339
 340
 341 class UtilMains:
 342
 343         @staticmethod
 344         def process_multiple_files (callback, mnemonic = "FILE"):
 345
 346                 if "--help" in sys.argv:
 347                         print "Usage: %s %s..." % (sys.argv[0], mnemonic)
 348                         sys.exit (1)
 349
 350                 try:
 351                         files = sys.argv[1:] if len (sys.argv) > 1 else ['-']
 352                         for s in files:
 353                                 callback (FileHelpers.open_file_or_stdin (s))
 354                 except IOError as e:
 355                         if e.errno != errno.EPIPE:
 356                                 print >> sys.stderr, "%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)
 357                                 sys.exit (1)
 358
 359         @staticmethod
 360         def process_multiple_args (callback, mnemonic):
 361
 362                 if len (sys.argv) == 1 or "--help" in sys.argv:
 363                         print "Usage: %s %s..." % (sys.argv[0], mnemonic)
 364                         sys.exit (1)
 365
 366                 try:
 367                         for s in sys.argv[1:]:
 368                                 callback (s)
 369                 except IOError as e:
 370                         if e.errno != errno.EPIPE:
 371                                 print >> sys.stderr, "%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)
 372                                 sys.exit (1)
 373
 374         @staticmethod
 375         def filter_multiple_strings_or_stdin (callback, mnemonic, \
 376                                               separator = " ", \
 377                                               concat_separator = False):
 378
 379                 if "--help" in sys.argv:
 380                         print "Usage:\n  %s %s...\nor:\n  %s\n\nWhen called with no arguments, input is read from standard input." \
 381                               % (sys.argv[0], mnemonic, sys.argv[0])
 382                         sys.exit (1)
 383
 384                 try:
 385                         if len (sys.argv) == 1:
 386                                 while (1):
 387                                         line = sys.stdin.readline ()
 388                                         if not len (line):
 389                                                 break
 390                                         if line[-1] == '\n':
 391                                                 line = line[:-1]
 392                                         print callback (line)
 393                         else:
 394                                 args = sys.argv[1:]
 395                                 if concat_separator != False:
 396                                         args = [concat_separator.join (args)]
 397                                 print separator.join (callback (x) for x in (args))
 398                 except IOError as e:
 399                         if e.errno != errno.EPIPE:
 400                                 print >> sys.stderr, "%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)
 401                                 sys.exit (1)
 402
 403
 404 class Unicode:
 405
 406         @staticmethod
 407         def decode (s):
 408                 return '<' + u','.join ("U+%04X" % ord (u) for u in unicode (s, 'utf-8')).encode ('utf-8') + '>'
 409
 410         @staticmethod
 411         def parse (s):
 412                 s = re.sub (r"0[xX]", " ", s)
 413                 s = re.sub (r"[<+>,;&#\\xXuU\n  ]", " ", s)
 414                 return [int (x, 16) for x in s.split (' ') if len (x)]
 415
 416         @staticmethod
 417         def encode (s):
 418                 return u''.join (unichr (x) for x in Unicode.parse (s)).encode ('utf-8')
 419
 420         shorthands = {
 421                 "ZERO WIDTH NON-JOINER": "ZWNJ",
 422                 "ZERO WIDTH JOINER": "ZWJ",
 423                 "NARROW NO-BREAK SPACE": "NNBSP",
 424                 "COMBINING GRAPHEME JOINER": "CGJ",
 425                 "LEFT-TO-RIGHT MARK": "LRM",
 426                 "RIGHT-TO-LEFT MARK": "RLM",
 427                 "LEFT-TO-RIGHT EMBEDDING": "LRE",
 428                 "RIGHT-TO-LEFT EMBEDDING": "RLE",
 429                 "POP DIRECTIONAL FORMATTING": "PDF",
 430                 "LEFT-TO-RIGHT OVERRIDE": "LRO",
 431                 "RIGHT-TO-LEFT OVERRIDE": "RLO",
 432         }
 433
 434         @staticmethod
 435         def pretty_name (u):
 436                 try:
 437                         s = unicodedata.name (u)
 438                 except ValueError:
 439                         return "XXX"
 440                 s = re.sub (".* LETTER ", "", s)
 441                 s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s)
 442                 s = re.sub (".* SIGN ", "", s)
 443                 s = re.sub (".* COMBINING ", "", s)
 444                 if re.match (".* VIRAMA", s):
 445                         s = "HALANT"
 446                 if s in Unicode.shorthands:
 447                         s = Unicode.shorthands[s]
 448                 return s
 449
 450         @staticmethod
 451         def pretty_names (s):
 452                 s = re.sub (r"[<+>\\uU]", " ", s)
 453                 s = re.sub (r"0[xX]", " ", s)
 454                 s = [unichr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
 455                 return u' + '.join (Unicode.pretty_name (x) for x in s).encode ('utf-8')
 456
 457
 458 class FileHelpers:
 459
 460         @staticmethod
 461         def open_file_or_stdin (f):
 462                 if f == '-':
 463                         return sys.stdin
 464                 return file (f)
 465
 466
 467 class Manifest:
 468
 469         @staticmethod
 470         def read (s, strict = True):
 471
 472                 if not os.path.exists (s):
 473                         if strict:
 474                                 print >> sys.stderr, "%s: %s does not exist" % (sys.argv[0], s)
 475                                 sys.exit (1)
 476                         return
 477
 478                 s = os.path.normpath (s)
 479
 480                 if os.path.isdir (s):
 481
 482                         try:
 483                                 m = file (os.path.join (s, "MANIFEST"))
 484                                 items = [x.strip () for x in m.readlines ()]
 485                                 for f in items:
 486                                         for p in Manifest.read (os.path.join (s, f)):
 487                                                 yield p
 488                         except IOError:
 489                                 if strict:
 490                                         print >> sys.stderr, "%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST"))
 491                                         sys.exit (1)
 492                                 return
 493                 else:
 494                         yield s
 495
 496         @staticmethod
 497         def update_recursive (s):
 498
 499                 for dirpath, dirnames, filenames in os.walk (s, followlinks=True):
 500
 501                         for f in ["MANIFEST", "README", "LICENSE", "COPYING", "AUTHORS", "SOURCES", "ChangeLog"]:
 502                                 if f in dirnames:
 503                                         dirnames.remove (f)
 504                                 if f in filenames:
 505                                         filenames.remove (f)
 506                         dirnames.sort ()
 507                         filenames.sort ()
 508                         ms = os.path.join (dirpath, "MANIFEST")
 509                         print "  GEN    %s" % ms
 510                         m = open (ms, "w")
 511                         for f in filenames:
 512                                 print >> m, f
 513                         for f in dirnames:
 514                                 print >> m, f
 515                         for f in dirnames:
 516                                 Manifest.update_recursive (os.path.join (dirpath, f))
 517
 518 if __name__ == '__main__':
 519         pass