Imported Upstream version 1.2.7
[platform/upstream/harfbuzz.git] / test / shaping / hb_test_tools.py
1 #!/usr/bin/env python
2
3 from __future__ import print_function
4 import sys, os, re, difflib, unicodedata, errno, cgi
5 from itertools import *
6
7 diff_symbols = "-+=*&^%$#@!~/"
8 diff_colors = ['red', 'green', 'blue']
9
10 if sys.version_info[0] >= 3:
11         unichr = chr
12
13 class ColorFormatter:
14
15         class Null:
16                 @staticmethod
17                 def start_color (c): return ''
18                 @staticmethod
19                 def end_color (): return ''
20                 @staticmethod
21                 def escape (s): return s
22                 @staticmethod
23                 def newline (): return '\n'
24
25         class ANSI:
26                 @staticmethod
27                 def start_color (c):
28                         return {
29                                 'red': '\033[41;37;1m',
30                                 'green': '\033[42;37;1m',
31                                 'blue': '\033[44;37;1m',
32                         }[c]
33                 @staticmethod
34                 def end_color ():
35                         return '\033[m'
36                 @staticmethod
37                 def escape (s): return s
38                 @staticmethod
39                 def newline (): return '\n'
40
41         class HTML:
42                 @staticmethod
43                 def start_color (c):
44                         return '<span style="background:%s">' % c
45                 @staticmethod
46                 def end_color ():
47                         return '</span>'
48                 @staticmethod
49                 def escape (s): return cgi.escape (s)
50                 @staticmethod
51                 def newline (): return '<br/>\n'
52
53         @staticmethod
54         def Auto (argv = [], out = sys.stdout):
55                 format = ColorFormatter.ANSI
56                 if "--format" in argv:
57                         argv.remove ("--format")
58                         format = ColorFormatter.ANSI
59                 if "--format=ansi" in argv:
60                         argv.remove ("--format=ansi")
61                         format = ColorFormatter.ANSI
62                 if "--format=html" in argv:
63                         argv.remove ("--format=html")
64                         format = ColorFormatter.HTML
65                 if "--no-format" in argv:
66                         argv.remove ("--no-format")
67                         format = ColorFormatter.Null
68                 return format
69
70
71 class DiffColorizer:
72
73         diff_regex = re.compile ('([a-za-z0-9_]*)([^a-za-z0-9_]?)')
74
75         def __init__ (self, formatter, colors=diff_colors, symbols=diff_symbols):
76                 self.formatter = formatter
77                 self.colors = colors
78                 self.symbols = symbols
79
80         def colorize_lines (self, lines):
81                 lines = (l if l else '' for l in lines)
82                 ss = [self.diff_regex.sub (r'\1\n\2\n', l).splitlines (True) for l in lines]
83                 oo = ["",""]
84                 st = [False, False]
85                 for l in difflib.Differ().compare (*ss):
86                         if l[0] == '?':
87                                 continue
88                         if l[0] == ' ':
89                                 for i in range(2):
90                                         if st[i]:
91                                                 oo[i] += self.formatter.end_color ()
92                                                 st[i] = False
93                                 oo = [o + self.formatter.escape (l[2:]) for o in oo]
94                                 continue
95                         if l[0] in self.symbols:
96                                 i = self.symbols.index (l[0])
97                                 if not st[i]:
98                                         oo[i] += self.formatter.start_color (self.colors[i])
99                                         st[i] = True
100                                 oo[i] += self.formatter.escape (l[2:])
101                                 continue
102                 for i in range(2):
103                         if st[i]:
104                                 oo[i] += self.formatter.end_color ()
105                                 st[i] = False
106                 oo = [o.replace ('\n', '') for o in oo]
107                 return [s1+s2+self.formatter.newline () for (s1,s2) in zip (self.symbols, oo) if s2]
108
109         def colorize_diff (self, f):
110                 lines = [None, None]
111                 for l in f:
112                         if l[0] not in self.symbols:
113                                 yield self.formatter.escape (l).replace ('\n', self.formatter.newline ())
114                                 continue
115                         i = self.symbols.index (l[0])
116                         if lines[i]:
117                                 # Flush
118                                 for line in self.colorize_lines (lines):
119                                         yield line
120                                 lines = [None, None]
121                         lines[i] = l[1:]
122                         if (all (lines)):
123                                 # Flush
124                                 for line in self.colorize_lines (lines):
125                                         yield line
126                                 lines = [None, None]
127                 if (any (lines)):
128                         # Flush
129                         for line in self.colorize_lines (lines):
130                                 yield line
131
132
133 class ZipDiffer:
134
135         @staticmethod
136         def diff_files (files, symbols=diff_symbols):
137                 files = tuple (files) # in case it's a generator, copy it
138                 try:
139                         for lines in izip_longest (*files):
140                                 if all (lines[0] == line for line in lines[1:]):
141                                         sys.stdout.writelines ([" ", lines[0]])
142                                         continue
143
144                                 for i, l in enumerate (lines):
145                                         if l:
146                                                 sys.stdout.writelines ([symbols[i], l])
147                 except IOError as e:
148                         if e.errno != errno.EPIPE:
149                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
150                                 sys.exit (1)
151
152
153 class DiffFilters:
154
155         @staticmethod
156         def filter_failures (f):
157                 for key, lines in DiffHelpers.separate_test_cases (f):
158                         lines = list (lines)
159                         if not DiffHelpers.test_passed (lines):
160                                 for l in lines: yield l
161
162 class Stat:
163
164         def __init__ (self):
165                 self.count = 0
166                 self.freq = 0
167
168         def add (self, test):
169                 self.count += 1
170                 self.freq += test.freq
171
172 class Stats:
173
174         def __init__ (self):
175                 self.passed = Stat ()
176                 self.failed = Stat ()
177                 self.total  = Stat ()
178
179         def add (self, test):
180                 self.total.add (test)
181                 if test.passed:
182                         self.passed.add (test)
183                 else:
184                         self.failed.add (test)
185
186         def mean (self):
187                 return float (self.passed.count) / self.total.count
188
189         def variance (self):
190                 return (float (self.passed.count) / self.total.count) * \
191                        (float (self.failed.count) / self.total.count)
192
193         def stddev (self):
194                 return self.variance () ** .5
195
196         def zscore (self, population):
197                 """Calculate the standard score.
198                    Population is the Stats for population.
199                    Self is Stats for sample.
200                    Returns larger absolute value if sample is highly unlikely to be random.
201                    Anything outside of -3..+3 is very unlikely to be random.
202                    See: http://en.wikipedia.org/wiki/Standard_score"""
203
204                 return (self.mean () - population.mean ()) / population.stddev ()
205
206
207
208
209 class DiffSinks:
210
211         @staticmethod
212         def print_stat (f):
213                 passed = 0
214                 failed = 0
215                 # XXX port to Stats, but that would really slow us down here
216                 for key, lines in DiffHelpers.separate_test_cases (f):
217                         if DiffHelpers.test_passed (lines):
218                                 passed += 1
219                         else:
220                                 failed += 1
221                 total = passed + failed
222                 print ("%d out of %d tests passed.  %d failed (%g%%)" % (passed, total, failed, 100. * failed / total))
223
224         @staticmethod
225         def print_ngrams (f, ns=(1,2,3)):
226                 gens = tuple (Ngram.generator (n) for n in ns)
227                 allstats = Stats ()
228                 allgrams = {}
229                 for key, lines in DiffHelpers.separate_test_cases (f):
230                         test = Test (lines)
231                         allstats.add (test)
232
233                         for gen in gens:
234                                 for ngram in gen (test.unicodes):
235                                         if ngram not in allgrams:
236                                                 allgrams[ngram] = Stats ()
237                                         allgrams[ngram].add (test)
238
239                 importantgrams = {}
240                 for ngram, stats in allgrams.iteritems ():
241                         if stats.failed.count >= 30: # for statistical reasons
242                                 importantgrams[ngram] = stats
243                 allgrams = importantgrams
244                 del importantgrams
245
246                 for ngram, stats in allgrams.iteritems ():
247                         print ("zscore: %9f failed: %6d passed: %6d ngram: <%s>" % (stats.zscore (allstats), stats.failed.count, stats.passed.count, ','.join ("U+%04X" % u for u in ngram)))
248
249
250
251 class Test:
252
253         def __init__ (self, lines):
254                 self.freq = 1
255                 self.passed = True
256                 self.identifier = None
257                 self.text = None
258                 self.unicodes = None
259                 self.glyphs = None
260                 for l in lines:
261                         symbol = l[0]
262                         if symbol != ' ':
263                                 self.passed = False
264                         i = 1
265                         if ':' in l:
266                                 i = l.index (':')
267                                 if not self.identifier:
268                                         self.identifier = l[1:i]
269                                 i = i + 2 # Skip colon and space
270                         j = -1
271                         if l[j] == '\n':
272                                 j -= 1
273                         brackets = l[i] + l[j]
274                         l = l[i+1:-2]
275                         if brackets == '()':
276                                 self.text = l
277                         elif brackets == '<>':
278                                 self.unicodes = Unicode.parse (l)
279                         elif brackets == '[]':
280                                 # XXX we don't handle failed tests here
281                                 self.glyphs = l
282
283
284 class DiffHelpers:
285
286         @staticmethod
287         def separate_test_cases (f):
288                 '''Reads lines from f, and if the lines have identifiers, ie.
289                    have a colon character, groups them by identifier,
290                    yielding lists of all lines with the same identifier.'''
291
292                 def identifier (l):
293                         if ':' in l[1:]:
294                                 return l[1:l.index (':')]
295                         return l
296                 return groupby (f, key=identifier)
297
298         @staticmethod
299         def test_passed (lines):
300                 lines = list (lines)
301                 # XXX This is a hack, but does the job for now.
302                 if any (l.find("space+0|space+0") >= 0 for l in lines if l[0] == '+'): return True
303                 if any (l.find("uni25CC") >= 0 for l in lines if l[0] == '+'): return True
304                 if any (l.find("dottedcircle") >= 0 for l in lines if l[0] == '+'): return True
305                 if any (l.find("glyph0") >= 0 for l in lines if l[0] == '+'): return True
306                 if any (l.find("gid0") >= 0 for l in lines if l[0] == '+'): return True
307                 if any (l.find("notdef") >= 0 for l in lines if l[0] == '+'): return True
308                 return all (l[0] == ' ' for l in lines)
309
310
311 class FilterHelpers:
312
313         @staticmethod
314         def filter_printer_function (filter_callback):
315                 def printer (f):
316                         for line in filter_callback (f):
317                                 print (line)
318                 return printer
319
320         @staticmethod
321         def filter_printer_function_no_newline (filter_callback):
322                 def printer (f):
323                         for line in filter_callback (f):
324                                 sys.stdout.writelines ([line])
325                 return printer
326
327
328 class Ngram:
329
330         @staticmethod
331         def generator (n):
332
333                 def gen (f):
334                         l = []
335                         for x in f:
336                                 l.append (x)
337                                 if len (l) == n:
338                                         yield tuple (l)
339                                         l[:1] = []
340
341                 gen.n = n
342                 return gen
343
344
345 class UtilMains:
346
347         @staticmethod
348         def process_multiple_files (callback, mnemonic = "FILE"):
349
350                 if "--help" in sys.argv:
351                         print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
352                         sys.exit (1)
353
354                 try:
355                         files = sys.argv[1:] if len (sys.argv) > 1 else ['-']
356                         for s in files:
357                                 callback (FileHelpers.open_file_or_stdin (s))
358                 except IOError as e:
359                         if e.errno != errno.EPIPE:
360                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
361                                 sys.exit (1)
362
363         @staticmethod
364         def process_multiple_args (callback, mnemonic):
365
366                 if len (sys.argv) == 1 or "--help" in sys.argv:
367                         print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
368                         sys.exit (1)
369
370                 try:
371                         for s in sys.argv[1:]:
372                                 callback (s)
373                 except IOError as e:
374                         if e.errno != errno.EPIPE:
375                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
376                                 sys.exit (1)
377
378         @staticmethod
379         def filter_multiple_strings_or_stdin (callback, mnemonic, \
380                                               separator = " ", \
381                                               concat_separator = False):
382
383                 if "--help" in sys.argv:
384                         print ("Usage:\n  %s %s...\nor:\n  %s\n\nWhen called with no arguments, input is read from standard input." \
385                               % (sys.argv[0], mnemonic, sys.argv[0]))
386                         sys.exit (1)
387
388                 try:
389                         if len (sys.argv) == 1:
390                                 while (1):
391                                         line = sys.stdin.readline ()
392                                         if not len (line):
393                                                 break
394                                         if line[-1] == '\n':
395                                                 line = line[:-1]
396                                         print (callback (line))
397                         else:
398                                 args = sys.argv[1:]
399                                 if concat_separator != False:
400                                         args = [concat_separator.join (args)]
401                                 print (separator.join (callback (x) for x in (args)))
402                 except IOError as e:
403                         if e.errno != errno.EPIPE:
404                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
405                                 sys.exit (1)
406
407
408 class Unicode:
409
410         @staticmethod
411         def decode (s):
412                 return u','.join ("U+%04X" % ord (u) for u in unicode (s, 'utf-8')).encode ('utf-8')
413
414         @staticmethod
415         def parse (s):
416                 s = re.sub (r"0[xX]", " ", s)
417                 s = re.sub (r"[<+>{},;&#\\xXuUnNiI\n    ]", " ", s)
418                 return [int (x, 16) for x in s.split ()]
419
420         @staticmethod
421         def encode (s):
422                 s = u''.join (unichr (x) for x in Unicode.parse (s))
423                 if sys.version_info[0] == 2: s = s.encode ('utf-8')
424                 return s
425
426         shorthands = {
427                 "ZERO WIDTH NON-JOINER": "ZWNJ",
428                 "ZERO WIDTH JOINER": "ZWJ",
429                 "NARROW NO-BREAK SPACE": "NNBSP",
430                 "COMBINING GRAPHEME JOINER": "CGJ",
431                 "LEFT-TO-RIGHT MARK": "LRM",
432                 "RIGHT-TO-LEFT MARK": "RLM",
433                 "LEFT-TO-RIGHT EMBEDDING": "LRE",
434                 "RIGHT-TO-LEFT EMBEDDING": "RLE",
435                 "POP DIRECTIONAL FORMATTING": "PDF",
436                 "LEFT-TO-RIGHT OVERRIDE": "LRO",
437                 "RIGHT-TO-LEFT OVERRIDE": "RLO",
438         }
439
440         @staticmethod
441         def pretty_name (u):
442                 try:
443                         s = unicodedata.name (u)
444                 except ValueError:
445                         return "XXX"
446                 s = re.sub (".* LETTER ", "", s)
447                 s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s)
448                 s = re.sub (".* SIGN ", "", s)
449                 s = re.sub (".* COMBINING ", "", s)
450                 if re.match (".* VIRAMA", s):
451                         s = "HALANT"
452                 if s in Unicode.shorthands:
453                         s = Unicode.shorthands[s]
454                 return s
455
456         @staticmethod
457         def pretty_names (s):
458                 s = re.sub (r"[<+>\\uU]", " ", s)
459                 s = re.sub (r"0[xX]", " ", s)
460                 s = [unichr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
461                 return u' + '.join (Unicode.pretty_name (x) for x in s).encode ('utf-8')
462
463
464 class FileHelpers:
465
466         @staticmethod
467         def open_file_or_stdin (f):
468                 if f == '-':
469                         return sys.stdin
470                 return file (f)
471
472
473 class Manifest:
474
475         @staticmethod
476         def read (s, strict = True):
477
478                 if not os.path.exists (s):
479                         if strict:
480                                 print ("%s: %s does not exist" % (sys.argv[0], s), file=sys.stderr)
481                                 sys.exit (1)
482                         return
483
484                 s = os.path.normpath (s)
485
486                 if os.path.isdir (s):
487
488                         try:
489                                 m = file (os.path.join (s, "MANIFEST"))
490                                 items = [x.strip () for x in m.readlines ()]
491                                 for f in items:
492                                         for p in Manifest.read (os.path.join (s, f)):
493                                                 yield p
494                         except IOError:
495                                 if strict:
496                                         print ("%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST")), file=sys.stderr)
497                                         sys.exit (1)
498                                 return
499                 else:
500                         yield s
501
502         @staticmethod
503         def update_recursive (s):
504
505                 for dirpath, dirnames, filenames in os.walk (s, followlinks=True):
506
507                         for f in ["MANIFEST", "README", "LICENSE", "COPYING", "AUTHORS", "SOURCES", "ChangeLog"]:
508                                 if f in dirnames:
509                                         dirnames.remove (f)
510                                 if f in filenames:
511                                         filenames.remove (f)
512                         dirnames.sort ()
513                         filenames.sort ()
514                         ms = os.path.join (dirpath, "MANIFEST")
515                         print ("  GEN    %s" % ms)
516                         m = open (ms, "w")
517                         for f in filenames:
518                                 print (f, file=m)
519                         for f in dirnames:
520                                 print (f, file=m)
521                         for f in dirnames:
522                                 Manifest.update_recursive (os.path.join (dirpath, f))
523
524 if __name__ == '__main__':
525         pass