Imported Upstream version 0.9.3
[platform/upstream/harfbuzz.git] / test / shaping / hb_test_tools.py
1 #!/usr/bin/python
2
3 import sys, os, re, difflib, unicodedata, errno, cgi
4 from itertools import *
5
6 diff_symbols = "-+=*&^%$#@!~/"
7 diff_colors = ['red', 'green', 'blue']
8
9 class ColorFormatter:
10
11         class Null:
12                 @staticmethod
13                 def start_color (c): return ''
14                 @staticmethod
15                 def end_color (): return ''
16                 @staticmethod
17                 def escape (s): return s
18                 @staticmethod
19                 def newline (): return '\n'
20
21         class ANSI:
22                 @staticmethod
23                 def start_color (c):
24                         return {
25                                 'red': '\033[41;37;1m',
26                                 'green': '\033[42;37;1m',
27                                 'blue': '\033[44;37;1m',
28                         }[c]
29                 @staticmethod
30                 def end_color ():
31                         return '\033[m'
32                 @staticmethod
33                 def escape (s): return s
34                 @staticmethod
35                 def newline (): return '\n'
36
37         class HTML:
38                 @staticmethod
39                 def start_color (c):
40                         return '<span style="background:%s">' % c
41                 @staticmethod
42                 def end_color ():
43                         return '</span>'
44                 @staticmethod
45                 def escape (s): return cgi.escape (s)
46                 @staticmethod
47                 def newline (): return '<br/>\n'
48
49         @staticmethod
50         def Auto (argv = [], out = sys.stdout):
51                 format = ColorFormatter.ANSI
52                 if "--format" in argv:
53                         argv.remove ("--format")
54                         format = ColorFormatter.ANSI
55                 if "--format=ansi" in argv:
56                         argv.remove ("--format=ansi")
57                         format = ColorFormatter.ANSI
58                 if "--format=html" in argv:
59                         argv.remove ("--format=html")
60                         format = ColorFormatter.HTML
61                 if "--no-format" in argv:
62                         argv.remove ("--no-format")
63                         format = ColorFormatter.Null
64                 return format
65
66
67 class DiffColorizer:
68
69         diff_regex = re.compile ('([a-za-z0-9_]*)([^a-za-z0-9_]?)')
70
71         def __init__ (self, formatter, colors=diff_colors, symbols=diff_symbols):
72                 self.formatter = formatter
73                 self.colors = colors
74                 self.symbols = symbols
75
76         def colorize_lines (self, lines):
77                 lines = (l if l else '' for l in lines)
78                 ss = [self.diff_regex.sub (r'\1\n\2\n', l).splitlines (True) for l in lines]
79                 oo = ["",""]
80                 st = [False, False]
81                 for l in difflib.Differ().compare (*ss):
82                         if l[0] == '?':
83                                 continue
84                         if l[0] == ' ':
85                                 for i in range(2):
86                                         if st[i]:
87                                                 oo[i] += self.formatter.end_color ()
88                                                 st[i] = False
89                                 oo = [o + self.formatter.escape (l[2:]) for o in oo]
90                                 continue
91                         if l[0] in self.symbols:
92                                 i = self.symbols.index (l[0])
93                                 if not st[i]:
94                                         oo[i] += self.formatter.start_color (self.colors[i])
95                                         st[i] = True
96                                 oo[i] += self.formatter.escape (l[2:])
97                                 continue
98                 for i in range(2):
99                         if st[i]:
100                                 oo[i] += self.formatter.end_color ()
101                                 st[i] = False
102                 oo = [o.replace ('\n', '') for o in oo]
103                 return [s1+s2+self.formatter.newline () for (s1,s2) in zip (self.symbols, oo) if s2]
104
105         def colorize_diff (self, f):
106                 lines = [None, None]
107                 for l in f:
108                         if l[0] not in self.symbols:
109                                 yield self.formatter.escape (l).replace ('\n', self.formatter.newline ())
110                                 continue
111                         i = self.symbols.index (l[0])
112                         if lines[i]:
113                                 # Flush
114                                 for line in self.colorize_lines (lines):
115                                         yield line
116                                 lines = [None, None]
117                         lines[i] = l[1:]
118                         if (all (lines)):
119                                 # Flush
120                                 for line in self.colorize_lines (lines):
121                                         yield line
122                                 lines = [None, None]
123                 if (any (lines)):
124                         # Flush
125                         for line in self.colorize_lines (lines):
126                                 yield line
127
128
129 class ZipDiffer:
130
131         @staticmethod
132         def diff_files (files, symbols=diff_symbols):
133                 files = tuple (files) # in case it's a generator, copy it
134                 try:
135                         for lines in izip_longest (*files):
136                                 if all (lines[0] == line for line in lines[1:]):
137                                         sys.stdout.writelines ([" ", lines[0]])
138                                         continue
139
140                                 for i, l in enumerate (lines):
141                                         if l:
142                                                 sys.stdout.writelines ([symbols[i], l])
143                 except IOError as e:
144                         if e.errno != errno.EPIPE:
145                                 print >> sys.stderr, "%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)
146                                 sys.exit (1)
147
148
149 class DiffFilters:
150
151         @staticmethod
152         def filter_failures (f):
153                 for key, lines in DiffHelpers.separate_test_cases (f):
154                         lines = list (lines)
155                         if not DiffHelpers.test_passed (lines):
156                                 for l in lines: yield l
157
158 class Stat:
159
160         def __init__ (self):
161                 self.count = 0
162                 self.freq = 0
163
164         def add (self, test):
165                 self.count += 1
166                 self.freq += test.freq
167
168 class Stats:
169
170         def __init__ (self):
171                 self.passed = Stat ()
172                 self.failed = Stat ()
173                 self.total  = Stat ()
174
175         def add (self, test):
176                 self.total.add (test)
177                 if test.passed:
178                         self.passed.add (test)
179                 else:
180                         self.failed.add (test)
181
182         def mean (self):
183                 return float (self.passed.count) / self.total.count
184
185         def variance (self):
186                 return (float (self.passed.count) / self.total.count) * \
187                        (float (self.failed.count) / self.total.count)
188
189         def stddev (self):
190                 return self.variance () ** .5
191
192         def zscore (self, population):
193                 """Calculate the standard score.
194                    Population is the Stats for population.
195                    Self is Stats for sample.
196                    Returns larger absolute value if sample is highly unlikely to be random.
197                    Anything outside of -3..+3 is very unlikely to be random.
198                    See: http://en.wikipedia.org/wiki/Standard_score"""
199
200                 return (self.mean () - population.mean ()) / population.stddev ()
201
202
203
204
205 class DiffSinks:
206
207         @staticmethod
208         def print_stat (f):
209                 passed = 0
210                 failed = 0
211                 # XXX port to Stats, but that would really slow us down here
212                 for key, lines in DiffHelpers.separate_test_cases (f):
213                         if DiffHelpers.test_passed (lines):
214                                 passed += 1
215                         else:
216                                 failed += 1
217                 total = passed + failed
218                 print "%d out of %d tests passed.  %d failed (%g%%)" % (passed, total, failed, 100. * failed / total)
219
220         @staticmethod
221         def print_ngrams (f, ns=(1,2,3)):
222                 gens = tuple (Ngram.generator (n) for n in ns)
223                 allstats = Stats ()
224                 allgrams = {}
225                 for key, lines in DiffHelpers.separate_test_cases (f):
226                         test = Test (lines)
227                         allstats.add (test)
228
229                         for gen in gens:
230                                 for ngram in gen (test.unicodes):
231                                         if ngram not in allgrams:
232                                                 allgrams[ngram] = Stats ()
233                                         allgrams[ngram].add (test)
234
235                 importantgrams = {}
236                 for ngram, stats in allgrams.iteritems ():
237                         if stats.failed.count >= 30: # for statistical reasons
238                                 importantgrams[ngram] = stats
239                 allgrams = importantgrams
240                 del importantgrams
241
242                 for ngram, stats in allgrams.iteritems ():
243                         print "zscore: %9f failed: %6d passed: %6d ngram: <%s>" % (stats.zscore (allstats), stats.failed.count, stats.passed.count, ','.join ("U+%04X" % u for u in ngram))
244
245
246
247 class Test:
248
249         def __init__ (self, lines):
250                 self.freq = 1
251                 self.passed = True
252                 self.identifier = None
253                 self.text = None
254                 self.unicodes = None
255                 self.glyphs = None
256                 for l in lines:
257                         symbol = l[0]
258                         if symbol != ' ':
259                                 self.passed = False
260                         i = 1
261                         if ':' in l:
262                                 i = l.index (':')
263                                 if not self.identifier:
264                                         self.identifier = l[1:i]
265                                 i = i + 2 # Skip colon and space
266                         j = -1
267                         if l[j] == '\n':
268                                 j -= 1
269                         brackets = l[i] + l[j]
270                         l = l[i+1:-2]
271                         if brackets == '()':
272                                 self.text = l
273                         elif brackets == '<>':
274                                 self.unicodes = Unicode.parse (l)
275                         elif brackets == '[]':
276                                 # XXX we don't handle failed tests here
277                                 self.glyphs = l
278
279
280 class DiffHelpers:
281
282         @staticmethod
283         def separate_test_cases (f):
284                 '''Reads lines from f, and if the lines have identifiers, ie.
285                    have a colon character, groups them by identifier,
286                    yielding lists of all lines with the same identifier.'''
287
288                 def identifier (l):
289                         if ':' in l[1:]:
290                                 return l[1:l.index (':')]
291                         return l
292                 return groupby (f, key=identifier)
293
294         @staticmethod
295         def test_passed (lines):
296                 lines = list (lines)
297                 # XXX This is a hack, but does the job for now.
298                 if any (l.find("space|space") >= 0 for l in lines): return True
299                 if any (l.find("uni25CC") >= 0 for l in lines): return True
300                 if any (l.find("dottedcircle") >= 0 for l in lines): return True
301                 return all (l[0] == ' ' for l in lines)
302
303
304 class FilterHelpers:
305
306         @staticmethod
307         def filter_printer_function (filter_callback):
308                 def printer (f):
309                         for line in filter_callback (f):
310                                 print line
311                 return printer
312
313         @staticmethod
314         def filter_printer_function_no_newline (filter_callback):
315                 def printer (f):
316                         for line in filter_callback (f):
317                                 sys.stdout.writelines ([line])
318                 return printer
319
320
321 class Ngram:
322
323         @staticmethod
324         def generator (n):
325
326                 def gen (f):
327                         l = []
328                         for x in f:
329                                 l.append (x)
330                                 if len (l) == n:
331                                         yield tuple (l)
332                                         l[:1] = []
333
334                 gen.n = n
335                 return gen
336
337
338 class UtilMains:
339
340         @staticmethod
341         def process_multiple_files (callback, mnemonic = "FILE"):
342
343                 if "--help" in sys.argv:
344                         print "Usage: %s %s..." % (sys.argv[0], mnemonic)
345                         sys.exit (1)
346
347                 try:
348                         files = sys.argv[1:] if len (sys.argv) > 1 else ['-']
349                         for s in files:
350                                 callback (FileHelpers.open_file_or_stdin (s))
351                 except IOError as e:
352                         if e.errno != errno.EPIPE:
353                                 print >> sys.stderr, "%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)
354                                 sys.exit (1)
355
356         @staticmethod
357         def process_multiple_args (callback, mnemonic):
358
359                 if len (sys.argv) == 1 or "--help" in sys.argv:
360                         print "Usage: %s %s..." % (sys.argv[0], mnemonic)
361                         sys.exit (1)
362
363                 try:
364                         for s in sys.argv[1:]:
365                                 callback (s)
366                 except IOError as e:
367                         if e.errno != errno.EPIPE:
368                                 print >> sys.stderr, "%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)
369                                 sys.exit (1)
370
371         @staticmethod
372         def filter_multiple_strings_or_stdin (callback, mnemonic, \
373                                               separator = " ", \
374                                               concat_separator = False):
375
376                 if "--help" in sys.argv:
377                         print "Usage:\n  %s %s...\nor:\n  %s\n\nWhen called with no arguments, input is read from standard input." \
378                               % (sys.argv[0], mnemonic, sys.argv[0])
379                         sys.exit (1)
380
381                 try:
382                         if len (sys.argv) == 1:
383                                 while (1):
384                                         line = sys.stdin.readline ()
385                                         if not len (line):
386                                                 break
387                                         if line[-1] == '\n':
388                                                 line = line[:-1]
389                                         print callback (line)
390                         else:
391                                 args = sys.argv[1:]
392                                 if concat_separator != False:
393                                         args = [concat_separator.join (args)]
394                                 print separator.join (callback (x) for x in (args))
395                 except IOError as e:
396                         if e.errno != errno.EPIPE:
397                                 print >> sys.stderr, "%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)
398                                 sys.exit (1)
399
400
401 class Unicode:
402
403         @staticmethod
404         def decode (s):
405                 return '<' + u','.join ("U+%04X" % ord (u) for u in unicode (s, 'utf-8')).encode ('utf-8') + '>'
406
407         @staticmethod
408         def parse (s):
409                 s = re.sub (r"[<+>,\\uU\n       ]", " ", s)
410                 s = re.sub (r"0[xX]", " ", s)
411                 return [int (x, 16) for x in s.split (' ') if len (x)]
412
413         @staticmethod
414         def encode (s):
415                 return u''.join (unichr (x) for x in Unicode.parse (s)).encode ('utf-8')
416
417         shorthands = {
418                 "ZERO WIDTH NON-JOINER": "ZWNJ",
419                 "ZERO WIDTH JOINER": "ZWJ",
420                 "NARROW NO-BREAK SPACE": "NNBSP",
421                 "COMBINING GRAPHEME JOINER": "CGJ",
422                 "LEFT-TO-RIGHT MARK": "LRM",
423                 "RIGHT-TO-LEFT MARK": "RLM",
424                 "LEFT-TO-RIGHT EMBEDDING": "LRE",
425                 "RIGHT-TO-LEFT EMBEDDING": "RLE",
426                 "POP DIRECTIONAL FORMATTING": "PDF",
427                 "LEFT-TO-RIGHT OVERRIDE": "LRO",
428                 "RIGHT-TO-LEFT OVERRIDE": "RLO",
429         }
430
431         @staticmethod
432         def pretty_name (u):
433                 try:
434                         s = unicodedata.name (u)
435                 except ValueError:
436                         return "XXX"
437                 s = re.sub (".* LETTER ", "", s)
438                 s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s)
439                 s = re.sub (".* SIGN ", "", s)
440                 s = re.sub (".* COMBINING ", "", s)
441                 if re.match (".* VIRAMA", s):
442                         s = "HALANT"
443                 if s in Unicode.shorthands:
444                         s = Unicode.shorthands[s]
445                 return s
446
447         @staticmethod
448         def pretty_names (s):
449                 s = re.sub (r"[<+>\\uU]", " ", s)
450                 s = re.sub (r"0[xX]", " ", s)
451                 s = [unichr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
452                 return u' + '.join (Unicode.pretty_name (x) for x in s).encode ('utf-8')
453
454
455 class FileHelpers:
456
457         @staticmethod
458         def open_file_or_stdin (f):
459                 if f == '-':
460                         return sys.stdin
461                 return file (f)
462
463
464 class Manifest:
465
466         @staticmethod
467         def read (s, strict = True):
468
469                 if not os.path.exists (s):
470                         if strict:
471                                 print >> sys.stderr, "%s: %s does not exist" % (sys.argv[0], s)
472                                 sys.exit (1)
473                         return
474
475                 s = os.path.normpath (s)
476
477                 if os.path.isdir (s):
478
479                         try:
480                                 m = file (os.path.join (s, "MANIFEST"))
481                                 items = [x.strip () for x in m.readlines ()]
482                                 for f in items:
483                                         for p in Manifest.read (os.path.join (s, f)):
484                                                 yield p
485                         except IOError:
486                                 if strict:
487                                         print >> sys.stderr, "%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST"))
488                                         sys.exit (1)
489                                 return
490                 else:
491                         yield s
492
493         @staticmethod
494         def update_recursive (s):
495
496                 for dirpath, dirnames, filenames in os.walk (s, followlinks=True):
497
498                         for f in ["MANIFEST", "README", "LICENSE", "COPYING", "AUTHORS", "SOURCES", "ChangeLog"]:
499                                 if f in dirnames:
500                                         dirnames.remove (f)
501                                 if f in filenames:
502                                         filenames.remove (f)
503                         dirnames.sort ()
504                         filenames.sort ()
505                         ms = os.path.join (dirpath, "MANIFEST")
506                         print "  GEN    %s" % ms
507                         m = open (ms, "w")
508                         for f in filenames:
509                                 print >> m, f
510                         for f in dirnames:
511                                 print >> m, f
512                         for f in dirnames:
513                                 Manifest.update_recursive (os.path.join (dirpath, f))
514
515 if __name__ == '__main__':
516         pass