test/shaping/hb_test_tools.py

   1 #!/usr/bin/python
   2
   3 import sys, os, re, difflib, unicodedata, errno, cgi
   4 from itertools import *
   5
   6 diff_symbols = "-+=*&^%$#@!~/"
   7 diff_colors = ['red', 'green', 'blue']
   8
   9 class ColorFormatter:
  10
  11         class Null:
  12                 @staticmethod
  13                 def start_color (c): return ''
  14                 @staticmethod
  15                 def end_color (): return ''
  16                 @staticmethod
  17                 def escape (s): return s
  18                 @staticmethod
  19                 def newline (): return '\n'
  20
  21         class ANSI:
  22                 @staticmethod
  23                 def start_color (c):
  24                         return {
  25                                 'red': '\033[41;37;1m',
  26                                 'green': '\033[42;37;1m',
  27                                 'blue': '\033[44;37;1m',
  28                         }[c]
  29                 @staticmethod
  30                 def end_color ():
  31                         return '\033[m'
  32                 @staticmethod
  33                 def escape (s): return s
  34                 @staticmethod
  35                 def newline (): return '\n'
  36
  37         class HTML:
  38                 @staticmethod
  39                 def start_color (c):
  40                         return '<span style="background:%s">' % c
  41                 @staticmethod
  42                 def end_color ():
  43                         return '</span>'
  44                 @staticmethod
  45                 def escape (s): return cgi.escape (s)
  46                 @staticmethod
  47                 def newline (): return '<br/>\n'
  48
  49         @staticmethod
  50         def Auto (argv = [], out = sys.stdout):
  51                 format = ColorFormatter.ANSI
  52                 if "--format" in argv:
  53                         argv.remove ("--format")
  54                         format = ColorFormatter.ANSI
  55                 if "--format=ansi" in argv:
  56                         argv.remove ("--format=ansi")
  57                         format = ColorFormatter.ANSI
  58                 if "--format=html" in argv:
  59                         argv.remove ("--format=html")
  60                         format = ColorFormatter.HTML
  61                 if "--no-format" in argv:
  62                         argv.remove ("--no-format")
  63                         format = ColorFormatter.Null
  64                 return format
  65
  66
  67 class DiffColorizer:
  68
  69         diff_regex = re.compile ('([a-za-z0-9_]*)([^a-za-z0-9_]?)')
  70
  71         def __init__ (self, formatter, colors=diff_colors, symbols=diff_symbols):
  72                 self.formatter = formatter
  73                 self.colors = colors
  74                 self.symbols = symbols
  75
  76         def colorize_lines (self, lines):
  77                 lines = (l if l else '' for l in lines)
  78                 ss = [self.diff_regex.sub (r'\1\n\2\n', l).splitlines (True) for l in lines]
  79                 oo = ["",""]
  80                 st = [False, False]
  81                 for l in difflib.Differ().compare (*ss):
  82                         if l[0] == '?':
  83                                 continue
  84                         if l[0] == ' ':
  85                                 for i in range(2):
  86                                         if st[i]:
  87                                                 oo[i] += self.formatter.end_color ()
  88                                                 st[i] = False
  89                                 oo = [o + self.formatter.escape (l[2:]) for o in oo]
  90                                 continue
  91                         if l[0] in self.symbols:
  92                                 i = self.symbols.index (l[0])
  93                                 if not st[i]:
  94                                         oo[i] += self.formatter.start_color (self.colors[i])
  95                                         st[i] = True
  96                                 oo[i] += self.formatter.escape (l[2:])
  97                                 continue
  98                 for i in range(2):
  99                         if st[i]:
 100                                 oo[i] += self.formatter.end_color ()
 101                                 st[i] = False
 102                 oo = [o.replace ('\n', '') for o in oo]
 103                 return [s1+s2+self.formatter.newline () for (s1,s2) in zip (self.symbols, oo) if s2]
 104
 105         def colorize_diff (self, f):
 106                 lines = [None, None]
 107                 for l in f:
 108                         if l[0] not in self.symbols:
 109                                 yield self.formatter.escape (l).replace ('\n', self.formatter.newline ())
 110                                 continue
 111                         i = self.symbols.index (l[0])
 112                         if lines[i]:
 113                                 # Flush
 114                                 for line in self.colorize_lines (lines):
 115                                         yield line
 116                                 lines = [None, None]
 117                         lines[i] = l[1:]
 118                         if (all (lines)):
 119                                 # Flush
 120                                 for line in self.colorize_lines (lines):
 121                                         yield line
 122                                 lines = [None, None]
 123                 if (any (lines)):
 124                         # Flush
 125                         for line in self.colorize_lines (lines):
 126                                 yield line
 127
 128
 129 class ZipDiffer:
 130
 131         @staticmethod
 132         def diff_files (files, symbols=diff_symbols):
 133                 files = tuple (files) # in case it's a generator, copy it
 134                 try:
 135                         for lines in izip_longest (*files):
 136                                 if all (lines[0] == line for line in lines[1:]):
 137                                         sys.stdout.writelines ([" ", lines[0]])
 138                                         continue
 139
 140                                 for i, l in enumerate (lines):
 141                                         if l:
 142                                                 sys.stdout.writelines ([symbols[i], l])
 143                 except IOError as e:
 144                         if e.errno != errno.EPIPE:
 145                                 print >> sys.stderr, "%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)
 146                                 sys.exit (1)
 147
 148
 149 class DiffFilters:
 150
 151         @staticmethod
 152         def filter_failures (f):
 153                 for key, lines in DiffHelpers.separate_test_cases (f):
 154                         lines = list (lines)
 155                         if not DiffHelpers.test_passed (lines):
 156                                 for l in lines: yield l
 157
 158 class Stat:
 159
 160         def __init__ (self):
 161                 self.count = 0
 162                 self.freq = 0
 163
 164         def add (self, test):
 165                 self.count += 1
 166                 self.freq += test.freq
 167
 168 class Stats:
 169
 170         def __init__ (self):
 171                 self.passed = Stat ()
 172                 self.failed = Stat ()
 173                 self.total  = Stat ()
 174
 175         def add (self, test):
 176                 self.total.add (test)
 177                 if test.passed:
 178                         self.passed.add (test)
 179                 else:
 180                         self.failed.add (test)
 181
 182         def mean (self):
 183                 return float (self.passed.count) / self.total.count
 184
 185         def variance (self):
 186                 return (float (self.passed.count) / self.total.count) * \
 187                        (float (self.failed.count) / self.total.count)
 188
 189         def stddev (self):
 190                 return self.variance () ** .5
 191
 192         def zscore (self, population):
 193                 """Calculate the standard score.
 194                    Population is the Stats for population.
 195                    Self is Stats for sample.
 196                    Returns larger absolute value if sample is highly unlikely to be random.
 197                    Anything outside of -3..+3 is very unlikely to be random.
 198                    See: http://en.wikipedia.org/wiki/Standard_score"""
 199
 200                 return (self.mean () - population.mean ()) / population.stddev ()
 201
 202
 203
 204
 205 class DiffSinks:
 206
 207         @staticmethod
 208         def print_stat (f):
 209                 passed = 0
 210                 failed = 0
 211                 # XXX port to Stats, but that would really slow us down here
 212                 for key, lines in DiffHelpers.separate_test_cases (f):
 213                         if DiffHelpers.test_passed (lines):
 214                                 passed += 1
 215                         else:
 216                                 failed += 1
 217                 total = passed + failed
 218                 print "%d out of %d tests passed.  %d failed (%g%%)" % (passed, total, failed, 100. * failed / total)
 219
 220         @staticmethod
 221         def print_ngrams (f, ns=(1,2,3)):
 222                 gens = tuple (Ngram.generator (n) for n in ns)
 223                 allstats = Stats ()
 224                 allgrams = {}
 225                 for key, lines in DiffHelpers.separate_test_cases (f):
 226                         test = Test (lines)
 227                         allstats.add (test)
 228
 229                         for gen in gens:
 230                                 for ngram in gen (test.unicodes):
 231                                         if ngram not in allgrams:
 232                                                 allgrams[ngram] = Stats ()
 233                                         allgrams[ngram].add (test)
 234
 235                 importantgrams = {}
 236                 for ngram, stats in allgrams.iteritems ():
 237                         if stats.failed.count >= 30: # for statistical reasons
 238                                 importantgrams[ngram] = stats
 239                 allgrams = importantgrams
 240                 del importantgrams
 241
 242                 for ngram, stats in allgrams.iteritems ():
 243                         print "zscore: %9f failed: %6d passed: %6d ngram: <%s>" % (stats.zscore (allstats), stats.failed.count, stats.passed.count, ','.join ("U+%04X" % u for u in ngram))
 244
 245
 246
 247 class Test:
 248
 249         def __init__ (self, lines):
 250                 self.freq = 1
 251                 self.passed = True
 252                 self.identifier = None
 253                 self.text = None
 254                 self.unicodes = None
 255                 self.glyphs = None
 256                 for l in lines:
 257                         symbol = l[0]
 258                         if symbol != ' ':
 259                                 self.passed = False
 260                         i = 1
 261                         if ':' in l:
 262                                 i = l.index (':')
 263                                 if not self.identifier:
 264                                         self.identifier = l[1:i]
 265                                 i = i + 2 # Skip colon and space
 266                         j = -1
 267                         if l[j] == '\n':
 268                                 j -= 1
 269                         brackets = l[i] + l[j]
 270                         l = l[i+1:-2]
 271                         if brackets == '()':
 272                                 self.text = l
 273                         elif brackets == '<>':
 274                                 self.unicodes = Unicode.parse (l)
 275                         elif brackets == '[]':
 276                                 # XXX we don't handle failed tests here
 277                                 self.glyphs = l
 278
 279
 280 class DiffHelpers:
 281
 282         @staticmethod
 283         def separate_test_cases (f):
 284                 '''Reads lines from f, and if the lines have identifiers, ie.
 285                    have a colon character, groups them by identifier,
 286                    yielding lists of all lines with the same identifier.'''
 287
 288                 def identifier (l):
 289                         if ':' in l[1:]:
 290                                 return l[1:l.index (':')]
 291                         return l
 292                 return groupby (f, key=identifier)
 293
 294         @staticmethod
 295         def test_passed (lines):
 296                 lines = list (lines)
 297                 # XXX This is a hack, but does the job for now.
 298                 if any (l.find("space|space") >= 0 for l in lines): return True
 299                 if any (l.find("uni25CC") >= 0 for l in lines): return True
 300                 if any (l.find("dottedcircle") >= 0 for l in lines): return True
 301                 return all (l[0] == ' ' for l in lines)
 302
 303
 304 class FilterHelpers:
 305
 306         @staticmethod
 307         def filter_printer_function (filter_callback):
 308                 def printer (f):
 309                         for line in filter_callback (f):
 310                                 print line
 311                 return printer
 312
 313         @staticmethod
 314         def filter_printer_function_no_newline (filter_callback):
 315                 def printer (f):
 316                         for line in filter_callback (f):
 317                                 sys.stdout.writelines ([line])
 318                 return printer
 319
 320
 321 class Ngram:
 322
 323         @staticmethod
 324         def generator (n):
 325
 326                 def gen (f):
 327                         l = []
 328                         for x in f:
 329                                 l.append (x)
 330                                 if len (l) == n:
 331                                         yield tuple (l)
 332                                         l[:1] = []
 333
 334                 gen.n = n
 335                 return gen
 336
 337
 338 class UtilMains:
 339
 340         @staticmethod
 341         def process_multiple_files (callback, mnemonic = "FILE"):
 342
 343                 if "--help" in sys.argv:
 344                         print "Usage: %s %s..." % (sys.argv[0], mnemonic)
 345                         sys.exit (1)
 346
 347                 try:
 348                         files = sys.argv[1:] if len (sys.argv) > 1 else ['-']
 349                         for s in files:
 350                                 callback (FileHelpers.open_file_or_stdin (s))
 351                 except IOError as e:
 352                         if e.errno != errno.EPIPE:
 353                                 print >> sys.stderr, "%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)
 354                                 sys.exit (1)
 355
 356         @staticmethod
 357         def process_multiple_args (callback, mnemonic):
 358
 359                 if len (sys.argv) == 1 or "--help" in sys.argv:
 360                         print "Usage: %s %s..." % (sys.argv[0], mnemonic)
 361                         sys.exit (1)
 362
 363                 try:
 364                         for s in sys.argv[1:]:
 365                                 callback (s)
 366                 except IOError as e:
 367                         if e.errno != errno.EPIPE:
 368                                 print >> sys.stderr, "%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)
 369                                 sys.exit (1)
 370
 371         @staticmethod
 372         def filter_multiple_strings_or_stdin (callback, mnemonic, \
 373                                               separator = " ", \
 374                                               concat_separator = False):
 375
 376                 if "--help" in sys.argv:
 377                         print "Usage:\n  %s %s...\nor:\n  %s\n\nWhen called with no arguments, input is read from standard input." \
 378                               % (sys.argv[0], mnemonic, sys.argv[0])
 379                         sys.exit (1)
 380
 381                 try:
 382                         if len (sys.argv) == 1:
 383                                 while (1):
 384                                         line = sys.stdin.readline ()
 385                                         if not len (line):
 386                                                 break
 387                                         if line[-1] == '\n':
 388                                                 line = line[:-1]
 389                                         print callback (line)
 390                         else:
 391                                 args = sys.argv[1:]
 392                                 if concat_separator != False:
 393                                         args = [concat_separator.join (args)]
 394                                 print separator.join (callback (x) for x in (args))
 395                 except IOError as e:
 396                         if e.errno != errno.EPIPE:
 397                                 print >> sys.stderr, "%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)
 398                                 sys.exit (1)
 399
 400
 401 class Unicode:
 402
 403         @staticmethod
 404         def decode (s):
 405                 return '<' + u','.join ("U+%04X" % ord (u) for u in unicode (s, 'utf-8')).encode ('utf-8') + '>'
 406
 407         @staticmethod
 408         def parse (s):
 409                 s = re.sub (r"[<+>,\\uU\n       ]", " ", s)
 410                 s = re.sub (r"0[xX]", " ", s)
 411                 return [int (x, 16) for x in s.split (' ') if len (x)]
 412
 413         @staticmethod
 414         def encode (s):
 415                 return u''.join (unichr (x) for x in Unicode.parse (s)).encode ('utf-8')
 416
 417         shorthands = {
 418                 "ZERO WIDTH NON-JOINER": "ZWNJ",
 419                 "ZERO WIDTH JOINER": "ZWJ",
 420                 "NARROW NO-BREAK SPACE": "NNBSP",
 421                 "COMBINING GRAPHEME JOINER": "CGJ",
 422                 "LEFT-TO-RIGHT MARK": "LRM",
 423                 "RIGHT-TO-LEFT MARK": "RLM",
 424                 "LEFT-TO-RIGHT EMBEDDING": "LRE",
 425                 "RIGHT-TO-LEFT EMBEDDING": "RLE",
 426                 "POP DIRECTIONAL FORMATTING": "PDF",
 427                 "LEFT-TO-RIGHT OVERRIDE": "LRO",
 428                 "RIGHT-TO-LEFT OVERRIDE": "RLO",
 429         }
 430
 431         @staticmethod
 432         def pretty_name (u):
 433                 try:
 434                         s = unicodedata.name (u)
 435                 except ValueError:
 436                         return "XXX"
 437                 s = re.sub (".* LETTER ", "", s)
 438                 s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s)
 439                 s = re.sub (".* SIGN ", "", s)
 440                 s = re.sub (".* COMBINING ", "", s)
 441                 if re.match (".* VIRAMA", s):
 442                         s = "HALANT"
 443                 if s in Unicode.shorthands:
 444                         s = Unicode.shorthands[s]
 445                 return s
 446
 447         @staticmethod
 448         def pretty_names (s):
 449                 s = re.sub (r"[<+>\\uU]", " ", s)
 450                 s = re.sub (r"0[xX]", " ", s)
 451                 s = [unichr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
 452                 return u' + '.join (Unicode.pretty_name (x) for x in s).encode ('utf-8')
 453
 454
 455 class FileHelpers:
 456
 457         @staticmethod
 458         def open_file_or_stdin (f):
 459                 if f == '-':
 460                         return sys.stdin
 461                 return file (f)
 462
 463
 464 class Manifest:
 465
 466         @staticmethod
 467         def read (s, strict = True):
 468
 469                 if not os.path.exists (s):
 470                         if strict:
 471                                 print >> sys.stderr, "%s: %s does not exist" % (sys.argv[0], s)
 472                                 sys.exit (1)
 473                         return
 474
 475                 s = os.path.normpath (s)
 476
 477                 if os.path.isdir (s):
 478
 479                         try:
 480                                 m = file (os.path.join (s, "MANIFEST"))
 481                                 items = [x.strip () for x in m.readlines ()]
 482                                 for f in items:
 483                                         for p in Manifest.read (os.path.join (s, f)):
 484                                                 yield p
 485                         except IOError:
 486                                 if strict:
 487                                         print >> sys.stderr, "%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST"))
 488                                         sys.exit (1)
 489                                 return
 490                 else:
 491                         yield s
 492
 493         @staticmethod
 494         def update_recursive (s):
 495
 496                 for dirpath, dirnames, filenames in os.walk (s, followlinks=True):
 497
 498                         for f in ["MANIFEST", "README", "LICENSE", "COPYING", "AUTHORS", "SOURCES", "ChangeLog"]:
 499                                 if f in dirnames:
 500                                         dirnames.remove (f)
 501                                 if f in filenames:
 502                                         filenames.remove (f)
 503                         dirnames.sort ()
 504                         filenames.sort ()
 505                         ms = os.path.join (dirpath, "MANIFEST")
 506                         print "  GEN    %s" % ms
 507                         m = open (ms, "w")
 508                         for f in filenames:
 509                                 print >> m, f
 510                         for f in dirnames:
 511                                 print >> m, f
 512                         for f in dirnames:
 513                                 Manifest.update_recursive (os.path.join (dirpath, f))
 514
 515 if __name__ == '__main__':
 516         pass