47fa6eb9fd7f9b5945096c73b793b8cd3140701c
[framework/uifw/harfbuzz.git] / test / shaping / hb_test_tools.py
1 #!/usr/bin/python
2
3 import sys, os, re, difflib, unicodedata, errno, cgi
4 from itertools import *
5
6 diff_symbols = "-+=*&^%$#@!~/"
7 diff_colors = ['red', 'green', 'blue']
8
9 class ColorFormatter:
10
11         class Null:
12                 @staticmethod
13                 def start_color (c): return ''
14                 @staticmethod
15                 def end_color (): return ''
16                 @staticmethod
17                 def escape (s): return s
18                 @staticmethod
19                 def newline (): return '\n'
20
21         class ANSI:
22                 @staticmethod
23                 def start_color (c):
24                         return {
25                                 'red': '\033[41;37;1m',
26                                 'green': '\033[42;37;1m',
27                                 'blue': '\033[44;37;1m',
28                         }[c]
29                 @staticmethod
30                 def end_color ():
31                         return '\033[m'
32                 @staticmethod
33                 def escape (s): return s
34                 @staticmethod
35                 def newline (): return '\n'
36
37         class HTML:
38                 @staticmethod
39                 def start_color (c):
40                         return '<span style="background:%s">' % c
41                 @staticmethod
42                 def end_color ():
43                         return '</span>'
44                 @staticmethod
45                 def escape (s): return cgi.escape (s)
46                 @staticmethod
47                 def newline (): return '<br/>\n'
48
49         @staticmethod
50         def Auto (argv = [], out = sys.stdout):
51                 format = ColorFormatter.ANSI
52                 if "--format" in argv:
53                         argv.remove ("--format")
54                         format = ColorFormatter.ANSI
55                 if "--format=ansi" in argv:
56                         argv.remove ("--format=ansi")
57                         format = ColorFormatter.ANSI
58                 if "--format=html" in argv:
59                         argv.remove ("--format=html")
60                         format = ColorFormatter.HTML
61                 if "--no-format" in argv:
62                         argv.remove ("--no-format")
63                         format = ColorFormatter.Null
64                 return format
65
66
67 class DiffColorizer:
68
69         diff_regex = re.compile ('([a-za-z0-9_]*)([^a-za-z0-9_]?)')
70
71         def __init__ (self, formatter, colors=diff_colors, symbols=diff_symbols):
72                 self.formatter = formatter
73                 self.colors = colors
74                 self.symbols = symbols
75
76         def colorize_lines (self, lines):
77                 lines = (l if l else '' for l in lines)
78                 ss = [self.diff_regex.sub (r'\1\n\2\n', l).splitlines (True) for l in lines]
79                 oo = ["",""]
80                 st = [False, False]
81                 for l in difflib.Differ().compare (*ss):
82                         if l[0] == '?':
83                                 continue
84                         if l[0] == ' ':
85                                 for i in range(2):
86                                         if st[i]:
87                                                 oo[i] += self.formatter.end_color ()
88                                                 st[i] = False
89                                 oo = [o + self.formatter.escape (l[2:]) for o in oo]
90                                 continue
91                         if l[0] in self.symbols:
92                                 i = self.symbols.index (l[0])
93                                 if not st[i]:
94                                         oo[i] += self.formatter.start_color (self.colors[i])
95                                         st[i] = True
96                                 oo[i] += self.formatter.escape (l[2:])
97                                 continue
98                 for i in range(2):
99                         if st[i]:
100                                 oo[i] += self.formatter.end_color ()
101                                 st[i] = False
102                 oo = [o.replace ('\n', '') for o in oo]
103                 return [s1+s2+self.formatter.newline () for (s1,s2) in zip (self.symbols, oo) if s2]
104
105         def colorize_diff (self, f):
106                 lines = [None, None]
107                 for l in f:
108                         if l[0] not in self.symbols:
109                                 yield self.formatter.escape (l).replace ('\n', self.formatter.newline ())
110                                 continue
111                         i = self.symbols.index (l[0])
112                         if lines[i]:
113                                 # Flush
114                                 for line in self.colorize_lines (lines):
115                                         yield line
116                                 lines = [None, None]
117                         lines[i] = l[1:]
118                         if (all (lines)):
119                                 # Flush
120                                 for line in self.colorize_lines (lines):
121                                         yield line
122                                 lines = [None, None]
123                 if (any (lines)):
124                         # Flush
125                         for line in self.colorize_lines (lines):
126                                 yield line
127
128
129 class ZipDiffer:
130
131         @staticmethod
132         def diff_files (files, symbols=diff_symbols):
133                 files = tuple (files) # in case it's a generator, copy it
134                 try:
135                         for lines in izip_longest (*files):
136                                 if all (lines[0] == line for line in lines[1:]):
137                                         sys.stdout.writelines ([" ", lines[0]])
138                                         continue
139
140                                 for i, l in enumerate (lines):
141                                         if l:
142                                                 sys.stdout.writelines ([symbols[i], l])
143                 except IOError as e:
144                         if e.errno != errno.EPIPE:
145                                 print >> sys.stderr, "%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)
146                                 sys.exit (1)
147
148
149 class DiffFilters:
150
151         @staticmethod
152         def filter_failures (f):
153                 for key, lines in DiffHelpers.separate_test_cases (f):
154                         lines = list (lines)
155                         if not DiffHelpers.test_passed (lines):
156                                 for l in lines: yield l
157
158 class Stat:
159
160         def __init__ (self):
161                 self.count = 0
162                 self.freq = 0
163
164         def add (self, test):
165                 self.count += 1
166                 self.freq += test.freq
167
168 class Stats:
169
170         def __init__ (self):
171                 self.passed = Stat ()
172                 self.failed = Stat ()
173                 self.total  = Stat ()
174
175         def add (self, test):
176                 self.total.add (test)
177                 if test.passed:
178                         self.passed.add (test)
179                 else:
180                         self.failed.add (test)
181
182         def mean (self):
183                 return float (self.passed.count) / self.total.count
184
185         def variance (self):
186                 return (float (self.passed.count) / self.total.count) * \
187                        (float (self.failed.count) / self.total.count)
188
189         def stddev (self):
190                 return self.variance () ** .5
191
192         def zscore (self, population):
193                 """Calculate the standard score.
194                    Population is the Stats for population.
195                    Self is Stats for sample.
196                    Returns larger absolute value if sample is highly unlikely to be random.
197                    Anything outside of -3..+3 is very unlikely to be random.
198                    See: http://en.wikipedia.org/wiki/Standard_score"""
199
200                 return (self.mean () - population.mean ()) / population.stddev ()
201
202
203
204
205 class DiffSinks:
206
207         @staticmethod
208         def print_stat (f):
209                 passed = 0
210                 failed = 0
211                 # XXX port to Stats, but that would really slow us down here
212                 for key, lines in DiffHelpers.separate_test_cases (f):
213                         if DiffHelpers.test_passed (lines):
214                                 passed += 1
215                         else:
216                                 failed += 1
217                 total = passed + failed
218                 print "%d out of %d tests passed.  %d failed (%g%%)" % (passed, total, failed, 100. * failed / total)
219
220         @staticmethod
221         def print_ngrams (f, ns=(1,2,3)):
222                 gens = tuple (Ngram.generator (n) for n in ns)
223                 allstats = Stats ()
224                 allgrams = {}
225                 for key, lines in DiffHelpers.separate_test_cases (f):
226                         test = Test (lines)
227                         allstats.add (test)
228
229                         for gen in gens:
230                                 for ngram in gen (test.unicodes):
231                                         if ngram not in allgrams:
232                                                 allgrams[ngram] = Stats ()
233                                         allgrams[ngram].add (test)
234
235                 importantgrams = {}
236                 for ngram, stats in allgrams.iteritems ():
237                         if stats.failed.count >= 30: # for statistical reasons
238                                 importantgrams[ngram] = stats
239                 allgrams = importantgrams
240                 del importantgrams
241
242                 for ngram, stats in allgrams.iteritems ():
243                         print "zscore: %9f failed: %6d passed: %6d ngram: <%s>" % (stats.zscore (allstats), stats.failed.count, stats.passed.count, ','.join ("U+%04X" % u for u in ngram))
244
245
246
247 class Test:
248
249         def __init__ (self, lines):
250                 self.freq = 1
251                 self.passed = True
252                 self.identifier = None
253                 self.text = None
254                 self.unicodes = None
255                 self.glyphs = None
256                 for l in lines:
257                         symbol = l[0]
258                         if symbol != ' ':
259                                 self.passed = False
260                         i = 1
261                         if ':' in l:
262                                 i = l.index (':')
263                                 if not self.identifier:
264                                         self.identifier = l[1:i]
265                                 i = i + 2 # Skip colon and space
266                         j = -1
267                         if l[j] == '\n':
268                                 j -= 1
269                         brackets = l[i] + l[j]
270                         l = l[i+1:-2]
271                         if brackets == '()':
272                                 self.text = l
273                         elif brackets == '<>':
274                                 self.unicodes = Unicode.parse (l)
275                         elif brackets == '[]':
276                                 # XXX we don't handle failed tests here
277                                 self.glyphs = l
278
279
280 class DiffHelpers:
281
282         @staticmethod
283         def separate_test_cases (f):
284                 '''Reads lines from f, and if the lines have identifiers, ie.
285                    have a colon character, groups them by identifier,
286                    yielding lists of all lines with the same identifier.'''
287
288                 def identifier (l):
289                         if ':' in l[1:]:
290                                 return l[1:l.index (':')]
291                         return l
292                 return groupby (f, key=identifier)
293
294         @staticmethod
295         def test_passed (lines):
296                 return all (l[0] == ' ' for l in lines)
297
298
299 class FilterHelpers:
300
301         @staticmethod
302         def filter_printer_function (filter_callback):
303                 def printer (f):
304                         for line in filter_callback (f):
305                                 print line
306                 return printer
307
308         @staticmethod
309         def filter_printer_function_no_newline (filter_callback):
310                 def printer (f):
311                         for line in filter_callback (f):
312                                 sys.stdout.writelines ([line])
313                 return printer
314
315
316 class Ngram:
317
318         @staticmethod
319         def generator (n):
320
321                 def gen (f):
322                         l = []
323                         for x in f:
324                                 l.append (x)
325                                 if len (l) == n:
326                                         yield tuple (l)
327                                         l[:1] = []
328
329                 gen.n = n
330                 return gen
331
332
333 class UtilMains:
334
335         @staticmethod
336         def process_multiple_files (callback, mnemonic = "FILE"):
337
338                 if "--help" in sys.argv:
339                         print "Usage: %s %s..." % (sys.argv[0], mnemonic)
340                         sys.exit (1)
341
342                 try:
343                         files = sys.argv[1:] if len (sys.argv) > 1 else ['-']
344                         for s in files:
345                                 callback (FileHelpers.open_file_or_stdin (s))
346                 except IOError as e:
347                         if e.errno != errno.EPIPE:
348                                 print >> sys.stderr, "%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)
349                                 sys.exit (1)
350
351         @staticmethod
352         def process_multiple_args (callback, mnemonic):
353
354                 if len (sys.argv) == 1 or "--help" in sys.argv:
355                         print "Usage: %s %s..." % (sys.argv[0], mnemonic)
356                         sys.exit (1)
357
358                 try:
359                         for s in sys.argv[1:]:
360                                 callback (s)
361                 except IOError as e:
362                         if e.errno != errno.EPIPE:
363                                 print >> sys.stderr, "%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)
364                                 sys.exit (1)
365
366         @staticmethod
367         def filter_multiple_strings_or_stdin (callback, mnemonic, \
368                                               separator = " ", \
369                                               concat_separator = False):
370
371                 if "--help" in sys.argv:
372                         print "Usage:\n  %s %s...\nor:\n  %s\n\nWhen called with no arguments, input is read from standard input." \
373                               % (sys.argv[0], mnemonic, sys.argv[0])
374                         sys.exit (1)
375
376                 try:
377                         if len (sys.argv) == 1:
378                                 while (1):
379                                         line = sys.stdin.readline ()
380                                         if not len (line):
381                                                 break
382                                         if line[-1] == '\n':
383                                                 line = line[:-1]
384                                         print callback (line)
385                         else:
386                                 args = sys.argv[1:]
387                                 if concat_separator != False:
388                                         args = [concat_separator.join (args)]
389                                 print separator.join (callback (x) for x in (args))
390                 except IOError as e:
391                         if e.errno != errno.EPIPE:
392                                 print >> sys.stderr, "%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)
393                                 sys.exit (1)
394
395
396 class Unicode:
397
398         @staticmethod
399         def decode (s):
400                 return '<' + u','.join ("U+%04X" % ord (u) for u in unicode (s, 'utf-8')).encode ('utf-8') + '>'
401
402         @staticmethod
403         def parse (s):
404                 s = re.sub (r"[<+>,\\uU\n       ]", " ", s)
405                 s = re.sub (r"0[xX]", " ", s)
406                 return [int (x, 16) for x in s.split (' ') if len (x)]
407
408         @staticmethod
409         def encode (s):
410                 return u''.join (unichr (x) for x in Unicode.parse (s)).encode ('utf-8')
411
412         shorthands = {
413                 "ZERO WIDTH NON-JOINER": "ZWNJ",
414                 "ZERO WIDTH JOINER": "ZWJ",
415                 "NARROW NO-BREAK SPACE": "NNBSP",
416                 "COMBINING GRAPHEME JOINER": "CGJ",
417                 "LEFT-TO-RIGHT MARK": "LRM",
418                 "RIGHT-TO-LEFT MARK": "RLM",
419                 "LEFT-TO-RIGHT EMBEDDING": "LRE",
420                 "RIGHT-TO-LEFT EMBEDDING": "RLE",
421                 "POP DIRECTIONAL FORMATTING": "PDF",
422                 "LEFT-TO-RIGHT OVERRIDE": "LRO",
423                 "RIGHT-TO-LEFT OVERRIDE": "RLO",
424         }
425
426         @staticmethod
427         def pretty_name (u):
428                 try:
429                         s = unicodedata.name (u)
430                 except ValueError:
431                         return "XXX"
432                 s = re.sub (".* LETTER ", "", s)
433                 s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s)
434                 s = re.sub (".* SIGN ", "", s)
435                 s = re.sub (".* COMBINING ", "", s)
436                 if re.match (".* VIRAMA", s):
437                         s = "HALANT"
438                 if s in Unicode.shorthands:
439                         s = Unicode.shorthands[s]
440                 return s
441
442         @staticmethod
443         def pretty_names (s):
444                 s = re.sub (r"[<+>\\uU]", " ", s)
445                 s = re.sub (r"0[xX]", " ", s)
446                 s = [unichr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
447                 return u' + '.join (Unicode.pretty_name (x) for x in s).encode ('utf-8')
448
449
450 class FileHelpers:
451
452         @staticmethod
453         def open_file_or_stdin (f):
454                 if f == '-':
455                         return sys.stdin
456                 return file (f)
457
458
459 class Manifest:
460
461         @staticmethod
462         def read (s, strict = True):
463
464                 if not os.path.exists (s):
465                         if strict:
466                                 print >> sys.stderr, "%s: %s does not exist" % (sys.argv[0], s)
467                                 sys.exit (1)
468                         return
469
470                 s = os.path.normpath (s)
471
472                 if os.path.isdir (s):
473
474                         try:
475                                 m = file (os.path.join (s, "MANIFEST"))
476                                 items = [x.strip () for x in m.readlines ()]
477                                 for f in items:
478                                         for p in Manifest.read (os.path.join (s, f)):
479                                                 yield p
480                         except IOError:
481                                 if strict:
482                                         print >> sys.stderr, "%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST"))
483                                         sys.exit (1)
484                                 return
485                 else:
486                         yield s
487
488         @staticmethod
489         def update_recursive (s):
490
491                 for dirpath, dirnames, filenames in os.walk (s, followlinks=True):
492
493                         for f in ["MANIFEST", "README", "LICENSE", "COPYING", "AUTHORS", "SOURCES", "ChangeLog"]:
494                                 if f in dirnames:
495                                         dirnames.remove (f)
496                                 if f in filenames:
497                                         filenames.remove (f)
498                         dirnames.sort ()
499                         filenames.sort ()
500                         ms = os.path.join (dirpath, "MANIFEST")
501                         print "  GEN    %s" % ms
502                         m = open (ms, "w")
503                         for f in filenames:
504                                 print >> m, f
505                         for f in dirnames:
506                                 print >> m, f
507                         for f in dirnames:
508                                 Manifest.update_recursive (os.path.join (dirpath, f))
509
510 if __name__ == '__main__':
511         pass