ccb0e1c320f578ba4ec4e160775b634838402a9d
[platform/upstream/harfbuzz.git] / test / shaping / hb_test_tools.py
1 #!/usr/bin/python
2
3 import sys, os, re, difflib, unicodedata, errno, cgi
4 from itertools import *
5
6 diff_symbols = "-+=*&^%$#@!~/"
7 diff_colors = ['red', 'green', 'blue']
8
9 class ColorFormatter:
10
11         class Null:
12                 @staticmethod
13                 def start_color (c): return ''
14                 @staticmethod
15                 def end_color (): return ''
16                 @staticmethod
17                 def escape (s): return s
18                 @staticmethod
19                 def newline (): return '\n'
20
21         class ANSI:
22                 @staticmethod
23                 def start_color (c):
24                         return {
25                                 'red': '\033[41;37;1m',
26                                 'green': '\033[42;37;1m',
27                                 'blue': '\033[44;37;1m',
28                         }[c]
29                 @staticmethod
30                 def end_color ():
31                         return '\033[m'
32                 @staticmethod
33                 def escape (s): return s
34                 @staticmethod
35                 def newline (): return '\n'
36
37         class HTML:
38                 @staticmethod
39                 def start_color (c):
40                         return '<span style="background:%s">' % c
41                 @staticmethod
42                 def end_color ():
43                         return '</span>'
44                 @staticmethod
45                 def escape (s): return cgi.escape (s)
46                 @staticmethod
47                 def newline (): return '<br/>\n'
48
49         @staticmethod
50         def Auto (argv = [], out = sys.stdout):
51                 format = ColorFormatter.ANSI
52                 if "--format" in argv:
53                         argv.remove ("--format")
54                         format = ColorFormatter.ANSI
55                 if "--format=ansi" in argv:
56                         argv.remove ("--format=ansi")
57                         format = ColorFormatter.ANSI
58                 if "--format=html" in argv:
59                         argv.remove ("--format=html")
60                         format = ColorFormatter.HTML
61                 if "--no-format" in argv:
62                         argv.remove ("--no-format")
63                         format = ColorFormatter.Null
64                 return format
65
66
67 class DiffColorizer:
68
69         diff_regex = re.compile ('([a-za-z0-9_]*)([^a-za-z0-9_]?)')
70
71         def __init__ (self, formatter, colors=diff_colors, symbols=diff_symbols):
72                 self.formatter = formatter
73                 self.colors = colors
74                 self.symbols = symbols
75
76         def colorize_lines (self, lines):
77                 lines = (l if l else '' for l in lines)
78                 ss = [self.diff_regex.sub (r'\1\n\2\n', l).splitlines (True) for l in lines]
79                 oo = ["",""]
80                 st = [False, False]
81                 for l in difflib.Differ().compare (*ss):
82                         if l[0] == '?':
83                                 continue
84                         if l[0] == ' ':
85                                 for i in range(2):
86                                         if st[i]:
87                                                 oo[i] += self.formatter.end_color ()
88                                                 st[i] = False
89                                 oo = [o + self.formatter.escape (l[2:]) for o in oo]
90                                 continue
91                         if l[0] in self.symbols:
92                                 i = self.symbols.index (l[0])
93                                 if not st[i]:
94                                         oo[i] += self.formatter.start_color (self.colors[i])
95                                         st[i] = True
96                                 oo[i] += self.formatter.escape (l[2:])
97                                 continue
98                 for i in range(2):
99                         if st[i]:
100                                 oo[i] += self.formatter.end_color ()
101                                 st[i] = False
102                 oo = [o.replace ('\n', '') for o in oo]
103                 return [s1+s2+self.formatter.newline () for (s1,s2) in zip (self.symbols, oo) if s2]
104
105         def colorize_diff (self, f):
106                 lines = [None, None]
107                 for l in f:
108                         if l[0] not in self.symbols:
109                                 yield self.formatter.escape (l).replace ('\n', self.formatter.newline ())
110                                 continue
111                         i = self.symbols.index (l[0])
112                         if lines[i]:
113                                 # Flush
114                                 for line in self.colorize_lines (lines):
115                                         yield line
116                                 lines = [None, None]
117                         lines[i] = l[1:]
118                         if (all (lines)):
119                                 # Flush
120                                 for line in self.colorize_lines (lines):
121                                         yield line
122                                 lines = [None, None]
123                 if (any (lines)):
124                         # Flush
125                         for line in self.colorize_lines (lines):
126                                 yield line
127
128
129 class ZipDiffer:
130
131         @staticmethod
132         def diff_files (files, symbols=diff_symbols):
133                 files = tuple (files) # in case it's a generator, copy it
134                 try:
135                         for lines in izip_longest (*files):
136                                 if all (lines[0] == line for line in lines[1:]):
137                                         sys.stdout.writelines ([" ", lines[0]])
138                                         continue
139
140                                 for i, l in enumerate (lines):
141                                         if l:
142                                                 sys.stdout.writelines ([symbols[i], l])
143                 except IOError as e:
144                         if e.errno != errno.EPIPE:
145                                 print >> sys.stderr, "%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)
146                                 sys.exit (1)
147
148
149 class DiffFilters:
150
151         @staticmethod
152         def filter_failures (f):
153                 for key, lines in DiffHelpers.separate_test_cases (f):
154                         lines = list (lines)
155                         if not DiffHelpers.test_passed (lines):
156                                 for l in lines: yield l
157
158 class Stat:
159
160         def __init__ (self):
161                 self.count = 0
162                 self.freq = 0
163
164         def add (self, test):
165                 self.count += 1
166                 self.freq += test.freq
167
168 class Stats:
169
170         def __init__ (self):
171                 self.passed = Stat ()
172                 self.failed = Stat ()
173                 self.total  = Stat ()
174
175         def add (self, test):
176                 self.total.add (test)
177                 if test.passed:
178                         self.passed.add (test)
179                 else:
180                         self.failed.add (test)
181
182         def mean (self):
183                 return float (self.passed.count) / self.total.count
184
185         def variance (self):
186                 return (float (self.passed.count) / self.total.count) * \
187                        (float (self.failed.count) / self.total.count)
188
189         def stddev (self):
190                 return self.variance () ** .5
191
192         def zscore (self, population):
193                 """Calculate the standard score.
194                    Population is the Stats for population.
195                    Self is Stats for sample.
196                    Returns larger absolute value if sample is highly unlikely to be random.
197                    Anything outside of -3..+3 is very unlikely to be random.
198                    See: http://en.wikipedia.org/wiki/Standard_score"""
199
200                 return (self.mean () - population.mean ()) / population.stddev ()
201
202
203
204
205 class DiffSinks:
206
207         @staticmethod
208         def print_stat (f):
209                 passed = 0
210                 failed = 0
211                 # XXX port to Stats, but that would really slow us down here
212                 for key, lines in DiffHelpers.separate_test_cases (f):
213                         if DiffHelpers.test_passed (lines):
214                                 passed += 1
215                         else:
216                                 failed += 1
217                 total = passed + failed
218                 print "%d out of %d tests passed.  %d failed (%g%%)" % (passed, total, failed, 100. * failed / total)
219
220         @staticmethod
221         def print_ngrams (f, ns=(1,2,3)):
222                 gens = tuple (Ngram.generator (n) for n in ns)
223                 allstats = Stats ()
224                 allgrams = {}
225                 for key, lines in DiffHelpers.separate_test_cases (f):
226                         test = Test (lines)
227                         allstats.add (test)
228
229                         for gen in gens:
230                                 for ngram in gen (test.unicodes):
231                                         if ngram not in allgrams:
232                                                 allgrams[ngram] = Stats ()
233                                         allgrams[ngram].add (test)
234
235                 importantgrams = {}
236                 for ngram, stats in allgrams.iteritems ():
237                         if stats.failed.count >= 30: # for statistical reasons
238                                 importantgrams[ngram] = stats
239                 allgrams = importantgrams
240                 del importantgrams
241
242                 for ngram, stats in allgrams.iteritems ():
243                         print "zscore: %9f failed: %6d passed: %6d ngram: <%s>" % (stats.zscore (allstats), stats.failed.count, stats.passed.count, ','.join ("U+%04X" % u for u in ngram))
244
245
246
247 class Test:
248
249         def __init__ (self, lines):
250                 self.freq = 1
251                 self.passed = True
252                 self.identifier = None
253                 self.text = None
254                 self.unicodes = None
255                 self.glyphs = None
256                 for l in lines:
257                         symbol = l[0]
258                         if symbol != ' ':
259                                 self.passed = False
260                         i = 1
261                         if ':' in l:
262                                 i = l.index (':')
263                                 if not self.identifier:
264                                         self.identifier = l[1:i]
265                                 i = i + 2 # Skip colon and space
266                         j = -1
267                         if l[j] == '\n':
268                                 j -= 1
269                         brackets = l[i] + l[j]
270                         l = l[i+1:-2]
271                         if brackets == '()':
272                                 self.text = l
273                         elif brackets == '<>':
274                                 self.unicodes = Unicode.parse (l)
275                         elif brackets == '[]':
276                                 # XXX we don't handle failed tests here
277                                 self.glyphs = l
278
279
280 class DiffHelpers:
281
282         @staticmethod
283         def separate_test_cases (f):
284                 '''Reads lines from f, and if the lines have identifiers, ie.
285                    have a colon character, groups them by identifier,
286                    yielding lists of all lines with the same identifier.'''
287
288                 def identifier (l):
289                         if ':' in l[1:]:
290                                 return l[1:l.index (':')]
291                         return l
292                 return groupby (f, key=identifier)
293
294         @staticmethod
295         def test_passed (lines):
296                 lines = list (lines)
297                 # XXX This is a hack, but does the job for now.
298                 if any (l.find("space+0|space+0") >= 0 for l in lines if l[0] == '+'): return True
299                 if any (l.find("uni25CC") >= 0 for l in lines if l[0] == '+'): return True
300                 if any (l.find("dottedcircle") >= 0 for l in lines if l[0] == '+'): return True
301                 if any (l.find("glyph0") >= 0 for l in lines if l[0] == '+'): return True
302                 if any (l.find("gid0") >= 0 for l in lines if l[0] == '+'): return True
303                 if any (l.find("notdef") >= 0 for l in lines if l[0] == '+'): return True
304                 return all (l[0] == ' ' for l in lines)
305
306
307 class FilterHelpers:
308
309         @staticmethod
310         def filter_printer_function (filter_callback):
311                 def printer (f):
312                         for line in filter_callback (f):
313                                 print line
314                 return printer
315
316         @staticmethod
317         def filter_printer_function_no_newline (filter_callback):
318                 def printer (f):
319                         for line in filter_callback (f):
320                                 sys.stdout.writelines ([line])
321                 return printer
322
323
324 class Ngram:
325
326         @staticmethod
327         def generator (n):
328
329                 def gen (f):
330                         l = []
331                         for x in f:
332                                 l.append (x)
333                                 if len (l) == n:
334                                         yield tuple (l)
335                                         l[:1] = []
336
337                 gen.n = n
338                 return gen
339
340
341 class UtilMains:
342
343         @staticmethod
344         def process_multiple_files (callback, mnemonic = "FILE"):
345
346                 if "--help" in sys.argv:
347                         print "Usage: %s %s..." % (sys.argv[0], mnemonic)
348                         sys.exit (1)
349
350                 try:
351                         files = sys.argv[1:] if len (sys.argv) > 1 else ['-']
352                         for s in files:
353                                 callback (FileHelpers.open_file_or_stdin (s))
354                 except IOError as e:
355                         if e.errno != errno.EPIPE:
356                                 print >> sys.stderr, "%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)
357                                 sys.exit (1)
358
359         @staticmethod
360         def process_multiple_args (callback, mnemonic):
361
362                 if len (sys.argv) == 1 or "--help" in sys.argv:
363                         print "Usage: %s %s..." % (sys.argv[0], mnemonic)
364                         sys.exit (1)
365
366                 try:
367                         for s in sys.argv[1:]:
368                                 callback (s)
369                 except IOError as e:
370                         if e.errno != errno.EPIPE:
371                                 print >> sys.stderr, "%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)
372                                 sys.exit (1)
373
374         @staticmethod
375         def filter_multiple_strings_or_stdin (callback, mnemonic, \
376                                               separator = " ", \
377                                               concat_separator = False):
378
379                 if "--help" in sys.argv:
380                         print "Usage:\n  %s %s...\nor:\n  %s\n\nWhen called with no arguments, input is read from standard input." \
381                               % (sys.argv[0], mnemonic, sys.argv[0])
382                         sys.exit (1)
383
384                 try:
385                         if len (sys.argv) == 1:
386                                 while (1):
387                                         line = sys.stdin.readline ()
388                                         if not len (line):
389                                                 break
390                                         if line[-1] == '\n':
391                                                 line = line[:-1]
392                                         print callback (line)
393                         else:
394                                 args = sys.argv[1:]
395                                 if concat_separator != False:
396                                         args = [concat_separator.join (args)]
397                                 print separator.join (callback (x) for x in (args))
398                 except IOError as e:
399                         if e.errno != errno.EPIPE:
400                                 print >> sys.stderr, "%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)
401                                 sys.exit (1)
402
403
404 class Unicode:
405
406         @staticmethod
407         def decode (s):
408                 return '<' + u','.join ("U+%04X" % ord (u) for u in unicode (s, 'utf-8')).encode ('utf-8') + '>'
409
410         @staticmethod
411         def parse (s):
412                 s = re.sub (r"0[xX]", " ", s)
413                 s = re.sub (r"[<+>,;&#\\xXuU\n  ]", " ", s)
414                 return [int (x, 16) for x in s.split (' ') if len (x)]
415
416         @staticmethod
417         def encode (s):
418                 return u''.join (unichr (x) for x in Unicode.parse (s)).encode ('utf-8')
419
420         shorthands = {
421                 "ZERO WIDTH NON-JOINER": "ZWNJ",
422                 "ZERO WIDTH JOINER": "ZWJ",
423                 "NARROW NO-BREAK SPACE": "NNBSP",
424                 "COMBINING GRAPHEME JOINER": "CGJ",
425                 "LEFT-TO-RIGHT MARK": "LRM",
426                 "RIGHT-TO-LEFT MARK": "RLM",
427                 "LEFT-TO-RIGHT EMBEDDING": "LRE",
428                 "RIGHT-TO-LEFT EMBEDDING": "RLE",
429                 "POP DIRECTIONAL FORMATTING": "PDF",
430                 "LEFT-TO-RIGHT OVERRIDE": "LRO",
431                 "RIGHT-TO-LEFT OVERRIDE": "RLO",
432         }
433
434         @staticmethod
435         def pretty_name (u):
436                 try:
437                         s = unicodedata.name (u)
438                 except ValueError:
439                         return "XXX"
440                 s = re.sub (".* LETTER ", "", s)
441                 s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s)
442                 s = re.sub (".* SIGN ", "", s)
443                 s = re.sub (".* COMBINING ", "", s)
444                 if re.match (".* VIRAMA", s):
445                         s = "HALANT"
446                 if s in Unicode.shorthands:
447                         s = Unicode.shorthands[s]
448                 return s
449
450         @staticmethod
451         def pretty_names (s):
452                 s = re.sub (r"[<+>\\uU]", " ", s)
453                 s = re.sub (r"0[xX]", " ", s)
454                 s = [unichr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
455                 return u' + '.join (Unicode.pretty_name (x) for x in s).encode ('utf-8')
456
457
458 class FileHelpers:
459
460         @staticmethod
461         def open_file_or_stdin (f):
462                 if f == '-':
463                         return sys.stdin
464                 return file (f)
465
466
467 class Manifest:
468
469         @staticmethod
470         def read (s, strict = True):
471
472                 if not os.path.exists (s):
473                         if strict:
474                                 print >> sys.stderr, "%s: %s does not exist" % (sys.argv[0], s)
475                                 sys.exit (1)
476                         return
477
478                 s = os.path.normpath (s)
479
480                 if os.path.isdir (s):
481
482                         try:
483                                 m = file (os.path.join (s, "MANIFEST"))
484                                 items = [x.strip () for x in m.readlines ()]
485                                 for f in items:
486                                         for p in Manifest.read (os.path.join (s, f)):
487                                                 yield p
488                         except IOError:
489                                 if strict:
490                                         print >> sys.stderr, "%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST"))
491                                         sys.exit (1)
492                                 return
493                 else:
494                         yield s
495
496         @staticmethod
497         def update_recursive (s):
498
499                 for dirpath, dirnames, filenames in os.walk (s, followlinks=True):
500
501                         for f in ["MANIFEST", "README", "LICENSE", "COPYING", "AUTHORS", "SOURCES", "ChangeLog"]:
502                                 if f in dirnames:
503                                         dirnames.remove (f)
504                                 if f in filenames:
505                                         filenames.remove (f)
506                         dirnames.sort ()
507                         filenames.sort ()
508                         ms = os.path.join (dirpath, "MANIFEST")
509                         print "  GEN    %s" % ms
510                         m = open (ms, "w")
511                         for f in filenames:
512                                 print >> m, f
513                         for f in dirnames:
514                                 print >> m, f
515                         for f in dirnames:
516                                 Manifest.update_recursive (os.path.join (dirpath, f))
517
518 if __name__ == '__main__':
519         pass