a9368adbd93826a654f30cdb772a0d17a9882d3b
[platform/upstream/harfbuzz.git] / test / shaping / hb_test_tools.py
1 #!/usr/bin/env python3
2
3 import sys, os, re, difflib, unicodedata, errno, cgi, itertools
4 from itertools import *
5
6 diff_symbols = "-+=*&^%$#@!~/"
7 diff_colors = ['red', 'green', 'blue']
8
9 def codepoints(s):
10         return (ord (u) for u in s)
11
12 class ColorFormatter:
13
14         class Null:
15                 @staticmethod
16                 def start_color (c): return ''
17                 @staticmethod
18                 def end_color (): return ''
19                 @staticmethod
20                 def escape (s): return s
21                 @staticmethod
22                 def newline (): return '\n'
23
24         class ANSI:
25                 @staticmethod
26                 def start_color (c):
27                         return {
28                                 'red': '\033[41;37;1m',
29                                 'green': '\033[42;37;1m',
30                                 'blue': '\033[44;37;1m',
31                         }[c]
32                 @staticmethod
33                 def end_color ():
34                         return '\033[m'
35                 @staticmethod
36                 def escape (s): return s
37                 @staticmethod
38                 def newline (): return '\n'
39
40         class HTML:
41                 @staticmethod
42                 def start_color (c):
43                         return '<span style="background:%s">' % c
44                 @staticmethod
45                 def end_color ():
46                         return '</span>'
47                 @staticmethod
48                 def escape (s): return cgi.escape (s)
49                 @staticmethod
50                 def newline (): return '<br/>\n'
51
52         @staticmethod
53         def Auto (argv = [], out = sys.stdout):
54                 format = ColorFormatter.ANSI
55                 if "--format" in argv:
56                         argv.remove ("--format")
57                         format = ColorFormatter.ANSI
58                 if "--format=ansi" in argv:
59                         argv.remove ("--format=ansi")
60                         format = ColorFormatter.ANSI
61                 if "--format=html" in argv:
62                         argv.remove ("--format=html")
63                         format = ColorFormatter.HTML
64                 if "--no-format" in argv:
65                         argv.remove ("--no-format")
66                         format = ColorFormatter.Null
67                 return format
68
69
70 class DiffColorizer:
71
72         diff_regex = re.compile ('([a-za-z0-9_]*)([^a-za-z0-9_]?)')
73
74         def __init__ (self, formatter, colors=diff_colors, symbols=diff_symbols):
75                 self.formatter = formatter
76                 self.colors = colors
77                 self.symbols = symbols
78
79         def colorize_lines (self, lines):
80                 lines = (l if l else '' for l in lines)
81                 ss = [self.diff_regex.sub (r'\1\n\2\n', l).splitlines (True) for l in lines]
82                 oo = ["",""]
83                 st = [False, False]
84                 for l in difflib.Differ().compare (*ss):
85                         if l[0] == '?':
86                                 continue
87                         if l[0] == ' ':
88                                 for i in range(2):
89                                         if st[i]:
90                                                 oo[i] += self.formatter.end_color ()
91                                                 st[i] = False
92                                 oo = [o + self.formatter.escape (l[2:]) for o in oo]
93                                 continue
94                         if l[0] in self.symbols:
95                                 i = self.symbols.index (l[0])
96                                 if not st[i]:
97                                         oo[i] += self.formatter.start_color (self.colors[i])
98                                         st[i] = True
99                                 oo[i] += self.formatter.escape (l[2:])
100                                 continue
101                 for i in range(2):
102                         if st[i]:
103                                 oo[i] += self.formatter.end_color ()
104                                 st[i] = False
105                 oo = [o.replace ('\n', '') for o in oo]
106                 return [s1+s2+self.formatter.newline () for (s1,s2) in zip (self.symbols, oo) if s2]
107
108         def colorize_diff (self, f):
109                 lines = [None, None]
110                 for l in f:
111                         if l[0] not in self.symbols:
112                                 yield self.formatter.escape (l).replace ('\n', self.formatter.newline ())
113                                 continue
114                         i = self.symbols.index (l[0])
115                         if lines[i]:
116                                 # Flush
117                                 for line in self.colorize_lines (lines):
118                                         yield line
119                                 lines = [None, None]
120                         lines[i] = l[1:]
121                         if (all (lines)):
122                                 # Flush
123                                 for line in self.colorize_lines (lines):
124                                         yield line
125                                 lines = [None, None]
126                 if (any (lines)):
127                         # Flush
128                         for line in self.colorize_lines (lines):
129                                 yield line
130
131
132 class ZipDiffer:
133
134         @staticmethod
135         def diff_files (files, symbols=diff_symbols):
136                 files = tuple (files) # in case it's a generator, copy it
137                 try:
138                         for lines in itertools.zip_longest (*files):
139                                 if all (lines[0] == line for line in lines[1:]):
140                                         sys.stdout.writelines ([" ", lines[0]])
141                                         continue
142
143                                 for i, l in enumerate (lines):
144                                         if l:
145                                                 sys.stdout.writelines ([symbols[i], l])
146                 except IOError as e:
147                         if e.errno != errno.EPIPE:
148                                 sys.exit ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror))
149
150
151 class DiffFilters:
152
153         @staticmethod
154         def filter_failures (f):
155                 for key, lines in DiffHelpers.separate_test_cases (f):
156                         lines = list (lines)
157                         if not DiffHelpers.test_passed (lines):
158                                 for l in lines: yield l
159
160 class Stat:
161
162         def __init__ (self):
163                 self.count = 0
164                 self.freq = 0
165
166         def add (self, test):
167                 self.count += 1
168                 self.freq += test.freq
169
170 class Stats:
171
172         def __init__ (self):
173                 self.passed = Stat ()
174                 self.failed = Stat ()
175                 self.total  = Stat ()
176
177         def add (self, test):
178                 self.total.add (test)
179                 if test.passed:
180                         self.passed.add (test)
181                 else:
182                         self.failed.add (test)
183
184         def mean (self):
185                 return float (self.passed.count) / self.total.count
186
187         def variance (self):
188                 return (float (self.passed.count) / self.total.count) * \
189                        (float (self.failed.count) / self.total.count)
190
191         def stddev (self):
192                 return self.variance () ** .5
193
194         def zscore (self, population):
195                 """Calculate the standard score.
196                    Population is the Stats for population.
197                    Self is Stats for sample.
198                    Returns larger absolute value if sample is highly unlikely to be random.
199                    Anything outside of -3..+3 is very unlikely to be random.
200                    See: https://en.wikipedia.org/wiki/Standard_score"""
201
202                 return (self.mean () - population.mean ()) / population.stddev ()
203
204
205
206
207 class DiffSinks:
208
209         @staticmethod
210         def print_stat (f):
211                 passed = 0
212                 failed = 0
213                 # XXX port to Stats, but that would really slow us down here
214                 for key, lines in DiffHelpers.separate_test_cases (f):
215                         if DiffHelpers.test_passed (lines):
216                                 passed += 1
217                         else:
218                                 failed += 1
219                 total = passed + failed
220                 print ("%d out of %d tests passed.  %d failed (%g%%)" % (passed, total, failed, 100. * failed / total))
221
222
223 class Test:
224
225         def __init__ (self, lines):
226                 self.freq = 1
227                 self.passed = True
228                 self.identifier = None
229                 self.text = None
230                 self.unicodes = None
231                 self.glyphs = None
232                 for l in lines:
233                         symbol = l[0]
234                         if symbol != ' ':
235                                 self.passed = False
236                         i = 1
237                         if ':' in l:
238                                 i = l.index (':')
239                                 if not self.identifier:
240                                         self.identifier = l[1:i]
241                                 i = i + 2 # Skip colon and space
242                         j = -1
243                         if l[j] == '\n':
244                                 j -= 1
245                         brackets = l[i] + l[j]
246                         l = l[i+1:-2]
247                         if brackets == '()':
248                                 self.text = l
249                         elif brackets == '<>':
250                                 self.unicodes = Unicode.parse (l)
251                         elif brackets == '[]':
252                                 # XXX we don't handle failed tests here
253                                 self.glyphs = l
254
255
256 class DiffHelpers:
257
258         @staticmethod
259         def separate_test_cases (f):
260                 '''Reads lines from f, and if the lines have identifiers, ie.
261                    have a colon character, groups them by identifier,
262                    yielding lists of all lines with the same identifier.'''
263
264                 def identifier (l):
265                         if ':' in l[1:]:
266                                 return l[1:l.index (':')]
267                         return l
268                 return groupby (f, key=identifier)
269
270         @staticmethod
271         def test_passed (lines):
272                 lines = list (lines)
273                 # XXX This is a hack, but does the job for now.
274                 if any (l.find("space+0|space+0") >= 0 for l in lines if l[0] == '+'): return True
275                 if any (l.find("uni25CC") >= 0 for l in lines if l[0] == '+'): return True
276                 if any (l.find("dottedcircle") >= 0 for l in lines if l[0] == '+'): return True
277                 if any (l.find("glyph0") >= 0 for l in lines if l[0] == '+'): return True
278                 if any (l.find("gid0") >= 0 for l in lines if l[0] == '+'): return True
279                 if any (l.find("notdef") >= 0 for l in lines if l[0] == '+'): return True
280                 return all (l[0] == ' ' for l in lines)
281
282
283 class FilterHelpers:
284
285         @staticmethod
286         def filter_printer_function (filter_callback):
287                 def printer (f):
288                         for line in filter_callback (f):
289                                 print (line)
290                 return printer
291
292         @staticmethod
293         def filter_printer_function_no_newline (filter_callback):
294                 def printer (f):
295                         for line in filter_callback (f):
296                                 sys.stdout.writelines ([line])
297                 return printer
298
299
300 class Ngram:
301
302         @staticmethod
303         def generator (n):
304
305                 def gen (f):
306                         l = []
307                         for x in f:
308                                 l.append (x)
309                                 if len (l) == n:
310                                         yield tuple (l)
311                                         l[:1] = []
312
313                 gen.n = n
314                 return gen
315
316
317 class UtilMains:
318
319         @staticmethod
320         def process_multiple_files (callback, mnemonic = "FILE"):
321
322                 if "--help" in sys.argv:
323                         sys.exit ("Usage: %s %s..." % (sys.argv[0], mnemonic))
324
325                 try:
326                         files = sys.argv[1:] if len (sys.argv) > 1 else ['-']
327                         for s in files:
328                                 callback (FileHelpers.open_file_or_stdin (s))
329                 except IOError as e:
330                         if e.errno != errno.EPIPE:
331                                 sys.exit ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror))
332
333         @staticmethod
334         def process_multiple_args (callback, mnemonic):
335
336                 if len (sys.argv) == 1 or "--help" in sys.argv:
337                         sys.exit ("Usage: %s %s..." % (sys.argv[0], mnemonic))
338
339                 try:
340                         for s in sys.argv[1:]:
341                                 callback (s)
342                 except IOError as e:
343                         if e.errno != errno.EPIPE:
344                                 sys.exit ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror))
345
346         @staticmethod
347         def filter_multiple_strings_or_stdin (callback, mnemonic, \
348                                               separator = " ", \
349                                               concat_separator = False):
350
351                 if "--help" in sys.argv:
352                         sys.exit ("""Usage:
353   %s %s...
354 or:
355   %s
356 When called with no arguments, input is read from standard input.
357 """ % (sys.argv[0], mnemonic, sys.argv[0]))
358
359                 try:
360                         if len (sys.argv) == 1:
361                                 while (1):
362                                         line = sys.stdin.readline ()
363                                         if not len (line):
364                                                 break
365                                         if line[-1] == '\n':
366                                                 line = line[:-1]
367                                         print (callback (line))
368                         else:
369                                 args = sys.argv[1:]
370                                 if concat_separator != False:
371                                         args = [concat_separator.join (args)]
372                                 print (separator.join (callback (x) for x in (args)))
373                 except IOError as e:
374                         if e.errno != errno.EPIPE:
375                                 sys.exit ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror))
376
377
378 class Unicode:
379
380         @staticmethod
381         def decode (s):
382                 return ','.join ("U+%04X" % cp for cp in codepoints (s))
383
384         @staticmethod
385         def parse (s):
386                 s = re.sub (r"0[xX]", " ", s)
387                 s = re.sub (r"[<+>{},;&#\\xXuUnNiI\n\t]", " ", s)
388                 return [int (x, 16) for x in s.split ()]
389
390         @staticmethod
391         def encode (s):
392                 return ''.join (chr (x) for x in Unicode.parse (s))
393
394         shorthands = {
395                 "ZERO WIDTH NON-JOINER": "ZWNJ",
396                 "ZERO WIDTH JOINER": "ZWJ",
397                 "NARROW NO-BREAK SPACE": "NNBSP",
398                 "COMBINING GRAPHEME JOINER": "CGJ",
399                 "LEFT-TO-RIGHT MARK": "LRM",
400                 "RIGHT-TO-LEFT MARK": "RLM",
401                 "LEFT-TO-RIGHT EMBEDDING": "LRE",
402                 "RIGHT-TO-LEFT EMBEDDING": "RLE",
403                 "POP DIRECTIONAL FORMATTING": "PDF",
404                 "LEFT-TO-RIGHT OVERRIDE": "LRO",
405                 "RIGHT-TO-LEFT OVERRIDE": "RLO",
406         }
407
408         @staticmethod
409         def pretty_name (u):
410                 try:
411                         s = unicodedata.name (u)
412                 except ValueError:
413                         return "XXX"
414                 s = re.sub (".* LETTER ", "", s)
415                 s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s)
416                 s = re.sub (".* SIGN ", "", s)
417                 s = re.sub (".* COMBINING ", "", s)
418                 if re.match (".* VIRAMA", s):
419                         s = "HALANT"
420                 if s in Unicode.shorthands:
421                         s = Unicode.shorthands[s]
422                 return s
423
424         @staticmethod
425         def pretty_names (s):
426                 s = re.sub (r"[<+>\\uU]", " ", s)
427                 s = re.sub (r"0[xX]", " ", s)
428                 s = [chr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
429                 return ' + '.join (Unicode.pretty_name (x) for x in s)
430
431
432 class FileHelpers:
433
434         @staticmethod
435         def open_file_or_stdin (f):
436                 if f == '-':
437                         return sys.stdin
438                 return open (f)
439
440
441 class Manifest:
442
443         @staticmethod
444         def read (s, strict = True):
445
446                 if not os.path.exists (s):
447                         if strict:
448                                 sys.exit ("%s: %s does not exist" % (sys.argv[0], s))
449                         return
450
451                 s = os.path.normpath (s)
452
453                 if os.path.isdir (s):
454
455                         try:
456                                 m = open (os.path.join (s, "MANIFEST"))
457                                 items = [x.strip () for x in m.readlines ()]
458                                 for f in items:
459                                         for p in Manifest.read (os.path.join (s, f)):
460                                                 yield p
461                         except IOError:
462                                 if strict:
463                                         sys.exit ("%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST")))
464                                 return
465                 else:
466                         yield s
467
468         @staticmethod
469         def update_recursive (s):
470
471                 for dirpath, dirnames, filenames in os.walk (s, followlinks=True):
472
473                         for f in ["MANIFEST", "README", "LICENSE", "COPYING", "AUTHORS", "SOURCES", "ChangeLog"]:
474                                 if f in dirnames:
475                                         dirnames.remove (f)
476                                 if f in filenames:
477                                         filenames.remove (f)
478                         dirnames.sort ()
479                         filenames.sort ()
480                         ms = os.path.join (dirpath, "MANIFEST")
481                         print ("  GEN    %s" % ms)
482                         m = open (ms, "w")
483                         for f in filenames:
484                                 print (f, file=m)
485                         for f in dirnames:
486                                 print (f, file=m)
487                         for f in dirnames:
488                                 Manifest.update_recursive (os.path.join (dirpath, f))
489
490 if __name__ == '__main__':
491         pass