test/shaping/hb_test_tools.py

   1 #!/usr/bin/env python3
   2
   3 import sys, os, re, difflib, unicodedata, errno, cgi, itertools
   4 from itertools import *
   5
   6 diff_symbols = "-+=*&^%$#@!~/"
   7 diff_colors = ['red', 'green', 'blue']
   8
   9 def codepoints(s):
  10         return (ord (u) for u in s)
  11
  12 class ColorFormatter:
  13
  14         class Null:
  15                 @staticmethod
  16                 def start_color (c): return ''
  17                 @staticmethod
  18                 def end_color (): return ''
  19                 @staticmethod
  20                 def escape (s): return s
  21                 @staticmethod
  22                 def newline (): return '\n'
  23
  24         class ANSI:
  25                 @staticmethod
  26                 def start_color (c):
  27                         return {
  28                                 'red': '\033[41;37;1m',
  29                                 'green': '\033[42;37;1m',
  30                                 'blue': '\033[44;37;1m',
  31                         }[c]
  32                 @staticmethod
  33                 def end_color ():
  34                         return '\033[m'
  35                 @staticmethod
  36                 def escape (s): return s
  37                 @staticmethod
  38                 def newline (): return '\n'
  39
  40         class HTML:
  41                 @staticmethod
  42                 def start_color (c):
  43                         return '<span style="background:%s">' % c
  44                 @staticmethod
  45                 def end_color ():
  46                         return '</span>'
  47                 @staticmethod
  48                 def escape (s): return cgi.escape (s)
  49                 @staticmethod
  50                 def newline (): return '<br/>\n'
  51
  52         @staticmethod
  53         def Auto (argv = [], out = sys.stdout):
  54                 format = ColorFormatter.ANSI
  55                 if "--format" in argv:
  56                         argv.remove ("--format")
  57                         format = ColorFormatter.ANSI
  58                 if "--format=ansi" in argv:
  59                         argv.remove ("--format=ansi")
  60                         format = ColorFormatter.ANSI
  61                 if "--format=html" in argv:
  62                         argv.remove ("--format=html")
  63                         format = ColorFormatter.HTML
  64                 if "--no-format" in argv:
  65                         argv.remove ("--no-format")
  66                         format = ColorFormatter.Null
  67                 return format
  68
  69
  70 class DiffColorizer:
  71
  72         diff_regex = re.compile ('([a-za-z0-9_]*)([^a-za-z0-9_]?)')
  73
  74         def __init__ (self, formatter, colors=diff_colors, symbols=diff_symbols):
  75                 self.formatter = formatter
  76                 self.colors = colors
  77                 self.symbols = symbols
  78
  79         def colorize_lines (self, lines):
  80                 lines = (l if l else '' for l in lines)
  81                 ss = [self.diff_regex.sub (r'\1\n\2\n', l).splitlines (True) for l in lines]
  82                 oo = ["",""]
  83                 st = [False, False]
  84                 for l in difflib.Differ().compare (*ss):
  85                         if l[0] == '?':
  86                                 continue
  87                         if l[0] == ' ':
  88                                 for i in range(2):
  89                                         if st[i]:
  90                                                 oo[i] += self.formatter.end_color ()
  91                                                 st[i] = False
  92                                 oo = [o + self.formatter.escape (l[2:]) for o in oo]
  93                                 continue
  94                         if l[0] in self.symbols:
  95                                 i = self.symbols.index (l[0])
  96                                 if not st[i]:
  97                                         oo[i] += self.formatter.start_color (self.colors[i])
  98                                         st[i] = True
  99                                 oo[i] += self.formatter.escape (l[2:])
 100                                 continue
 101                 for i in range(2):
 102                         if st[i]:
 103                                 oo[i] += self.formatter.end_color ()
 104                                 st[i] = False
 105                 oo = [o.replace ('\n', '') for o in oo]
 106                 return [s1+s2+self.formatter.newline () for (s1,s2) in zip (self.symbols, oo) if s2]
 107
 108         def colorize_diff (self, f):
 109                 lines = [None, None]
 110                 for l in f:
 111                         if l[0] not in self.symbols:
 112                                 yield self.formatter.escape (l).replace ('\n', self.formatter.newline ())
 113                                 continue
 114                         i = self.symbols.index (l[0])
 115                         if lines[i]:
 116                                 # Flush
 117                                 for line in self.colorize_lines (lines):
 118                                         yield line
 119                                 lines = [None, None]
 120                         lines[i] = l[1:]
 121                         if (all (lines)):
 122                                 # Flush
 123                                 for line in self.colorize_lines (lines):
 124                                         yield line
 125                                 lines = [None, None]
 126                 if (any (lines)):
 127                         # Flush
 128                         for line in self.colorize_lines (lines):
 129                                 yield line
 130
 131
 132 class ZipDiffer:
 133
 134         @staticmethod
 135         def diff_files (files, symbols=diff_symbols):
 136                 files = tuple (files) # in case it's a generator, copy it
 137                 try:
 138                         for lines in itertools.zip_longest (*files):
 139                                 if all (lines[0] == line for line in lines[1:]):
 140                                         sys.stdout.writelines ([" ", lines[0]])
 141                                         continue
 142
 143                                 for i, l in enumerate (lines):
 144                                         if l:
 145                                                 sys.stdout.writelines ([symbols[i], l])
 146                 except IOError as e:
 147                         if e.errno != errno.EPIPE:
 148                                 sys.exit ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror))
 149
 150
 151 class DiffFilters:
 152
 153         @staticmethod
 154         def filter_failures (f):
 155                 for key, lines in DiffHelpers.separate_test_cases (f):
 156                         lines = list (lines)
 157                         if not DiffHelpers.test_passed (lines):
 158                                 for l in lines: yield l
 159
 160 class Stat:
 161
 162         def __init__ (self):
 163                 self.count = 0
 164                 self.freq = 0
 165
 166         def add (self, test):
 167                 self.count += 1
 168                 self.freq += test.freq
 169
 170 class Stats:
 171
 172         def __init__ (self):
 173                 self.passed = Stat ()
 174                 self.failed = Stat ()
 175                 self.total  = Stat ()
 176
 177         def add (self, test):
 178                 self.total.add (test)
 179                 if test.passed:
 180                         self.passed.add (test)
 181                 else:
 182                         self.failed.add (test)
 183
 184         def mean (self):
 185                 return float (self.passed.count) / self.total.count
 186
 187         def variance (self):
 188                 return (float (self.passed.count) / self.total.count) * \
 189                        (float (self.failed.count) / self.total.count)
 190
 191         def stddev (self):
 192                 return self.variance () ** .5
 193
 194         def zscore (self, population):
 195                 """Calculate the standard score.
 196                    Population is the Stats for population.
 197                    Self is Stats for sample.
 198                    Returns larger absolute value if sample is highly unlikely to be random.
 199                    Anything outside of -3..+3 is very unlikely to be random.
 200                    See: https://en.wikipedia.org/wiki/Standard_score"""
 201
 202                 return (self.mean () - population.mean ()) / population.stddev ()
 203
 204
 205
 206
 207 class DiffSinks:
 208
 209         @staticmethod
 210         def print_stat (f):
 211                 passed = 0
 212                 failed = 0
 213                 # XXX port to Stats, but that would really slow us down here
 214                 for key, lines in DiffHelpers.separate_test_cases (f):
 215                         if DiffHelpers.test_passed (lines):
 216                                 passed += 1
 217                         else:
 218                                 failed += 1
 219                 total = passed + failed
 220                 print ("%d out of %d tests passed.  %d failed (%g%%)" % (passed, total, failed, 100. * failed / total))
 221
 222
 223 class Test:
 224
 225         def __init__ (self, lines):
 226                 self.freq = 1
 227                 self.passed = True
 228                 self.identifier = None
 229                 self.text = None
 230                 self.unicodes = None
 231                 self.glyphs = None
 232                 for l in lines:
 233                         symbol = l[0]
 234                         if symbol != ' ':
 235                                 self.passed = False
 236                         i = 1
 237                         if ':' in l:
 238                                 i = l.index (':')
 239                                 if not self.identifier:
 240                                         self.identifier = l[1:i]
 241                                 i = i + 2 # Skip colon and space
 242                         j = -1
 243                         if l[j] == '\n':
 244                                 j -= 1
 245                         brackets = l[i] + l[j]
 246                         l = l[i+1:-2]
 247                         if brackets == '()':
 248                                 self.text = l
 249                         elif brackets == '<>':
 250                                 self.unicodes = Unicode.parse (l)
 251                         elif brackets == '[]':
 252                                 # XXX we don't handle failed tests here
 253                                 self.glyphs = l
 254
 255
 256 class DiffHelpers:
 257
 258         @staticmethod
 259         def separate_test_cases (f):
 260                 '''Reads lines from f, and if the lines have identifiers, ie.
 261                    have a colon character, groups them by identifier,
 262                    yielding lists of all lines with the same identifier.'''
 263
 264                 def identifier (l):
 265                         if ':' in l[1:]:
 266                                 return l[1:l.index (':')]
 267                         return l
 268                 return groupby (f, key=identifier)
 269
 270         @staticmethod
 271         def test_passed (lines):
 272                 lines = list (lines)
 273                 # XXX This is a hack, but does the job for now.
 274                 if any (l.find("space+0|space+0") >= 0 for l in lines if l[0] == '+'): return True
 275                 if any (l.find("uni25CC") >= 0 for l in lines if l[0] == '+'): return True
 276                 if any (l.find("dottedcircle") >= 0 for l in lines if l[0] == '+'): return True
 277                 if any (l.find("glyph0") >= 0 for l in lines if l[0] == '+'): return True
 278                 if any (l.find("gid0") >= 0 for l in lines if l[0] == '+'): return True
 279                 if any (l.find("notdef") >= 0 for l in lines if l[0] == '+'): return True
 280                 return all (l[0] == ' ' for l in lines)
 281
 282
 283 class FilterHelpers:
 284
 285         @staticmethod
 286         def filter_printer_function (filter_callback):
 287                 def printer (f):
 288                         for line in filter_callback (f):
 289                                 print (line)
 290                 return printer
 291
 292         @staticmethod
 293         def filter_printer_function_no_newline (filter_callback):
 294                 def printer (f):
 295                         for line in filter_callback (f):
 296                                 sys.stdout.writelines ([line])
 297                 return printer
 298
 299
 300 class Ngram:
 301
 302         @staticmethod
 303         def generator (n):
 304
 305                 def gen (f):
 306                         l = []
 307                         for x in f:
 308                                 l.append (x)
 309                                 if len (l) == n:
 310                                         yield tuple (l)
 311                                         l[:1] = []
 312
 313                 gen.n = n
 314                 return gen
 315
 316
 317 class UtilMains:
 318
 319         @staticmethod
 320         def process_multiple_files (callback, mnemonic = "FILE"):
 321
 322                 if "--help" in sys.argv:
 323                         sys.exit ("Usage: %s %s..." % (sys.argv[0], mnemonic))
 324
 325                 try:
 326                         files = sys.argv[1:] if len (sys.argv) > 1 else ['-']
 327                         for s in files:
 328                                 callback (FileHelpers.open_file_or_stdin (s))
 329                 except IOError as e:
 330                         if e.errno != errno.EPIPE:
 331                                 sys.exit ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror))
 332
 333         @staticmethod
 334         def process_multiple_args (callback, mnemonic):
 335
 336                 if len (sys.argv) == 1 or "--help" in sys.argv:
 337                         sys.exit ("Usage: %s %s..." % (sys.argv[0], mnemonic))
 338
 339                 try:
 340                         for s in sys.argv[1:]:
 341                                 callback (s)
 342                 except IOError as e:
 343                         if e.errno != errno.EPIPE:
 344                                 sys.exit ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror))
 345
 346         @staticmethod
 347         def filter_multiple_strings_or_stdin (callback, mnemonic, \
 348                                               separator = " ", \
 349                                               concat_separator = False):
 350
 351                 if "--help" in sys.argv:
 352                         sys.exit ("""Usage:
 353   %s %s...
 354 or:
 355   %s
 356 When called with no arguments, input is read from standard input.
 357 """ % (sys.argv[0], mnemonic, sys.argv[0]))
 358
 359                 try:
 360                         if len (sys.argv) == 1:
 361                                 while (1):
 362                                         line = sys.stdin.readline ()
 363                                         if not len (line):
 364                                                 break
 365                                         if line[-1] == '\n':
 366                                                 line = line[:-1]
 367                                         print (callback (line))
 368                         else:
 369                                 args = sys.argv[1:]
 370                                 if concat_separator != False:
 371                                         args = [concat_separator.join (args)]
 372                                 print (separator.join (callback (x) for x in (args)))
 373                 except IOError as e:
 374                         if e.errno != errno.EPIPE:
 375                                 sys.exit ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror))
 376
 377
 378 class Unicode:
 379
 380         @staticmethod
 381         def decode (s):
 382                 return ','.join ("U+%04X" % cp for cp in codepoints (s))
 383
 384         @staticmethod
 385         def parse (s):
 386                 s = re.sub (r"0[xX]", " ", s)
 387                 s = re.sub (r"[<+>{},;&#\\xXuUnNiI\n\t]", " ", s)
 388                 return [int (x, 16) for x in s.split ()]
 389
 390         @staticmethod
 391         def encode (s):
 392                 return ''.join (chr (x) for x in Unicode.parse (s))
 393
 394         shorthands = {
 395                 "ZERO WIDTH NON-JOINER": "ZWNJ",
 396                 "ZERO WIDTH JOINER": "ZWJ",
 397                 "NARROW NO-BREAK SPACE": "NNBSP",
 398                 "COMBINING GRAPHEME JOINER": "CGJ",
 399                 "LEFT-TO-RIGHT MARK": "LRM",
 400                 "RIGHT-TO-LEFT MARK": "RLM",
 401                 "LEFT-TO-RIGHT EMBEDDING": "LRE",
 402                 "RIGHT-TO-LEFT EMBEDDING": "RLE",
 403                 "POP DIRECTIONAL FORMATTING": "PDF",
 404                 "LEFT-TO-RIGHT OVERRIDE": "LRO",
 405                 "RIGHT-TO-LEFT OVERRIDE": "RLO",
 406         }
 407
 408         @staticmethod
 409         def pretty_name (u):
 410                 try:
 411                         s = unicodedata.name (u)
 412                 except ValueError:
 413                         return "XXX"
 414                 s = re.sub (".* LETTER ", "", s)
 415                 s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s)
 416                 s = re.sub (".* SIGN ", "", s)
 417                 s = re.sub (".* COMBINING ", "", s)
 418                 if re.match (".* VIRAMA", s):
 419                         s = "HALANT"
 420                 if s in Unicode.shorthands:
 421                         s = Unicode.shorthands[s]
 422                 return s
 423
 424         @staticmethod
 425         def pretty_names (s):
 426                 s = re.sub (r"[<+>\\uU]", " ", s)
 427                 s = re.sub (r"0[xX]", " ", s)
 428                 s = [chr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
 429                 return ' + '.join (Unicode.pretty_name (x) for x in s)
 430
 431
 432 class FileHelpers:
 433
 434         @staticmethod
 435         def open_file_or_stdin (f):
 436                 if f == '-':
 437                         return sys.stdin
 438                 return open (f)
 439
 440
 441 class Manifest:
 442
 443         @staticmethod
 444         def read (s, strict = True):
 445
 446                 if not os.path.exists (s):
 447                         if strict:
 448                                 sys.exit ("%s: %s does not exist" % (sys.argv[0], s))
 449                         return
 450
 451                 s = os.path.normpath (s)
 452
 453                 if os.path.isdir (s):
 454
 455                         try:
 456                                 m = open (os.path.join (s, "MANIFEST"))
 457                                 items = [x.strip () for x in m.readlines ()]
 458                                 for f in items:
 459                                         for p in Manifest.read (os.path.join (s, f)):
 460                                                 yield p
 461                         except IOError:
 462                                 if strict:
 463                                         sys.exit ("%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST")))
 464                                 return
 465                 else:
 466                         yield s
 467
 468         @staticmethod
 469         def update_recursive (s):
 470
 471                 for dirpath, dirnames, filenames in os.walk (s, followlinks=True):
 472
 473                         for f in ["MANIFEST", "README", "LICENSE", "COPYING", "AUTHORS", "SOURCES", "ChangeLog"]:
 474                                 if f in dirnames:
 475                                         dirnames.remove (f)
 476                                 if f in filenames:
 477                                         filenames.remove (f)
 478                         dirnames.sort ()
 479                         filenames.sort ()
 480                         ms = os.path.join (dirpath, "MANIFEST")
 481                         print ("  GEN    %s" % ms)
 482                         m = open (ms, "w")
 483                         for f in filenames:
 484                                 print (f, file=m)
 485                         for f in dirnames:
 486                                 print (f, file=m)
 487                         for f in dirnames:
 488                                 Manifest.update_recursive (os.path.join (dirpath, f))
 489
 490 if __name__ == '__main__':
 491         pass