7473982998bdcbf6a334299ce354c1325f4e868d
[platform/upstream/harfbuzz.git] / test / shaping / hb_test_tools.py
1 #!/usr/bin/env python
2
3 from __future__ import print_function
4 import sys, os, re, difflib, unicodedata, errno, cgi
5 from itertools import *
6
7 diff_symbols = "-+=*&^%$#@!~/"
8 diff_colors = ['red', 'green', 'blue']
9
10 try:
11         unichr = unichr
12
13         if sys.maxunicode < 0x10FFFF:
14                 # workarounds for Python 2 "narrow" builds with UCS2-only support.
15
16                 _narrow_unichr = unichr
17
18                 def unichr(i):
19                         """
20                         Return the unicode character whose Unicode code is the integer 'i'.
21                         The valid range is 0 to 0x10FFFF inclusive.
22
23                         >>> _narrow_unichr(0xFFFF + 1)
24                         Traceback (most recent call last):
25                           File "<stdin>", line 1, in ?
26                         ValueError: unichr() arg not in range(0x10000) (narrow Python build)
27                         >>> unichr(0xFFFF + 1) == u'\U00010000'
28                         True
29                         >>> unichr(1114111) == u'\U0010FFFF'
30                         True
31                         >>> unichr(0x10FFFF + 1)
32                         Traceback (most recent call last):
33                           File "<stdin>", line 1, in ?
34                         ValueError: unichr() arg not in range(0x110000)
35                         """
36                         try:
37                                 return _narrow_unichr(i)
38                         except ValueError:
39                                 try:
40                                         padded_hex_str = hex(i)[2:].zfill(8)
41                                         escape_str = "\\U" + padded_hex_str
42                                         return escape_str.decode("unicode-escape")
43                                 except UnicodeDecodeError:
44                                         raise ValueError('unichr() arg not in range(0x110000)')
45
46 except NameError:
47         unichr = chr
48
49 class ColorFormatter:
50
51         class Null:
52                 @staticmethod
53                 def start_color (c): return ''
54                 @staticmethod
55                 def end_color (): return ''
56                 @staticmethod
57                 def escape (s): return s
58                 @staticmethod
59                 def newline (): return '\n'
60
61         class ANSI:
62                 @staticmethod
63                 def start_color (c):
64                         return {
65                                 'red': '\033[41;37;1m',
66                                 'green': '\033[42;37;1m',
67                                 'blue': '\033[44;37;1m',
68                         }[c]
69                 @staticmethod
70                 def end_color ():
71                         return '\033[m'
72                 @staticmethod
73                 def escape (s): return s
74                 @staticmethod
75                 def newline (): return '\n'
76
77         class HTML:
78                 @staticmethod
79                 def start_color (c):
80                         return '<span style="background:%s">' % c
81                 @staticmethod
82                 def end_color ():
83                         return '</span>'
84                 @staticmethod
85                 def escape (s): return cgi.escape (s)
86                 @staticmethod
87                 def newline (): return '<br/>\n'
88
89         @staticmethod
90         def Auto (argv = [], out = sys.stdout):
91                 format = ColorFormatter.ANSI
92                 if "--format" in argv:
93                         argv.remove ("--format")
94                         format = ColorFormatter.ANSI
95                 if "--format=ansi" in argv:
96                         argv.remove ("--format=ansi")
97                         format = ColorFormatter.ANSI
98                 if "--format=html" in argv:
99                         argv.remove ("--format=html")
100                         format = ColorFormatter.HTML
101                 if "--no-format" in argv:
102                         argv.remove ("--no-format")
103                         format = ColorFormatter.Null
104                 return format
105
106
107 class DiffColorizer:
108
109         diff_regex = re.compile ('([a-za-z0-9_]*)([^a-za-z0-9_]?)')
110
111         def __init__ (self, formatter, colors=diff_colors, symbols=diff_symbols):
112                 self.formatter = formatter
113                 self.colors = colors
114                 self.symbols = symbols
115
116         def colorize_lines (self, lines):
117                 lines = (l if l else '' for l in lines)
118                 ss = [self.diff_regex.sub (r'\1\n\2\n', l).splitlines (True) for l in lines]
119                 oo = ["",""]
120                 st = [False, False]
121                 for l in difflib.Differ().compare (*ss):
122                         if l[0] == '?':
123                                 continue
124                         if l[0] == ' ':
125                                 for i in range(2):
126                                         if st[i]:
127                                                 oo[i] += self.formatter.end_color ()
128                                                 st[i] = False
129                                 oo = [o + self.formatter.escape (l[2:]) for o in oo]
130                                 continue
131                         if l[0] in self.symbols:
132                                 i = self.symbols.index (l[0])
133                                 if not st[i]:
134                                         oo[i] += self.formatter.start_color (self.colors[i])
135                                         st[i] = True
136                                 oo[i] += self.formatter.escape (l[2:])
137                                 continue
138                 for i in range(2):
139                         if st[i]:
140                                 oo[i] += self.formatter.end_color ()
141                                 st[i] = False
142                 oo = [o.replace ('\n', '') for o in oo]
143                 return [s1+s2+self.formatter.newline () for (s1,s2) in zip (self.symbols, oo) if s2]
144
145         def colorize_diff (self, f):
146                 lines = [None, None]
147                 for l in f:
148                         if l[0] not in self.symbols:
149                                 yield self.formatter.escape (l).replace ('\n', self.formatter.newline ())
150                                 continue
151                         i = self.symbols.index (l[0])
152                         if lines[i]:
153                                 # Flush
154                                 for line in self.colorize_lines (lines):
155                                         yield line
156                                 lines = [None, None]
157                         lines[i] = l[1:]
158                         if (all (lines)):
159                                 # Flush
160                                 for line in self.colorize_lines (lines):
161                                         yield line
162                                 lines = [None, None]
163                 if (any (lines)):
164                         # Flush
165                         for line in self.colorize_lines (lines):
166                                 yield line
167
168
169 class ZipDiffer:
170
171         @staticmethod
172         def diff_files (files, symbols=diff_symbols):
173                 files = tuple (files) # in case it's a generator, copy it
174                 try:
175                         for lines in izip_longest (*files):
176                                 if all (lines[0] == line for line in lines[1:]):
177                                         sys.stdout.writelines ([" ", lines[0]])
178                                         continue
179
180                                 for i, l in enumerate (lines):
181                                         if l:
182                                                 sys.stdout.writelines ([symbols[i], l])
183                 except IOError as e:
184                         if e.errno != errno.EPIPE:
185                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
186                                 sys.exit (1)
187
188
189 class DiffFilters:
190
191         @staticmethod
192         def filter_failures (f):
193                 for key, lines in DiffHelpers.separate_test_cases (f):
194                         lines = list (lines)
195                         if not DiffHelpers.test_passed (lines):
196                                 for l in lines: yield l
197
198 class Stat:
199
200         def __init__ (self):
201                 self.count = 0
202                 self.freq = 0
203
204         def add (self, test):
205                 self.count += 1
206                 self.freq += test.freq
207
208 class Stats:
209
210         def __init__ (self):
211                 self.passed = Stat ()
212                 self.failed = Stat ()
213                 self.total  = Stat ()
214
215         def add (self, test):
216                 self.total.add (test)
217                 if test.passed:
218                         self.passed.add (test)
219                 else:
220                         self.failed.add (test)
221
222         def mean (self):
223                 return float (self.passed.count) / self.total.count
224
225         def variance (self):
226                 return (float (self.passed.count) / self.total.count) * \
227                        (float (self.failed.count) / self.total.count)
228
229         def stddev (self):
230                 return self.variance () ** .5
231
232         def zscore (self, population):
233                 """Calculate the standard score.
234                    Population is the Stats for population.
235                    Self is Stats for sample.
236                    Returns larger absolute value if sample is highly unlikely to be random.
237                    Anything outside of -3..+3 is very unlikely to be random.
238                    See: http://en.wikipedia.org/wiki/Standard_score"""
239
240                 return (self.mean () - population.mean ()) / population.stddev ()
241
242
243
244
245 class DiffSinks:
246
247         @staticmethod
248         def print_stat (f):
249                 passed = 0
250                 failed = 0
251                 # XXX port to Stats, but that would really slow us down here
252                 for key, lines in DiffHelpers.separate_test_cases (f):
253                         if DiffHelpers.test_passed (lines):
254                                 passed += 1
255                         else:
256                                 failed += 1
257                 total = passed + failed
258                 print ("%d out of %d tests passed.  %d failed (%g%%)" % (passed, total, failed, 100. * failed / total))
259
260         @staticmethod
261         def print_ngrams (f, ns=(1,2,3)):
262                 gens = tuple (Ngram.generator (n) for n in ns)
263                 allstats = Stats ()
264                 allgrams = {}
265                 for key, lines in DiffHelpers.separate_test_cases (f):
266                         test = Test (lines)
267                         allstats.add (test)
268
269                         for gen in gens:
270                                 for ngram in gen (test.unicodes):
271                                         if ngram not in allgrams:
272                                                 allgrams[ngram] = Stats ()
273                                         allgrams[ngram].add (test)
274
275                 importantgrams = {}
276                 for ngram, stats in allgrams.iteritems ():
277                         if stats.failed.count >= 30: # for statistical reasons
278                                 importantgrams[ngram] = stats
279                 allgrams = importantgrams
280                 del importantgrams
281
282                 for ngram, stats in allgrams.iteritems ():
283                         print ("zscore: %9f failed: %6d passed: %6d ngram: <%s>" % (stats.zscore (allstats), stats.failed.count, stats.passed.count, ','.join ("U+%04X" % u for u in ngram)))
284
285
286
287 class Test:
288
289         def __init__ (self, lines):
290                 self.freq = 1
291                 self.passed = True
292                 self.identifier = None
293                 self.text = None
294                 self.unicodes = None
295                 self.glyphs = None
296                 for l in lines:
297                         symbol = l[0]
298                         if symbol != ' ':
299                                 self.passed = False
300                         i = 1
301                         if ':' in l:
302                                 i = l.index (':')
303                                 if not self.identifier:
304                                         self.identifier = l[1:i]
305                                 i = i + 2 # Skip colon and space
306                         j = -1
307                         if l[j] == '\n':
308                                 j -= 1
309                         brackets = l[i] + l[j]
310                         l = l[i+1:-2]
311                         if brackets == '()':
312                                 self.text = l
313                         elif brackets == '<>':
314                                 self.unicodes = Unicode.parse (l)
315                         elif brackets == '[]':
316                                 # XXX we don't handle failed tests here
317                                 self.glyphs = l
318
319
320 class DiffHelpers:
321
322         @staticmethod
323         def separate_test_cases (f):
324                 '''Reads lines from f, and if the lines have identifiers, ie.
325                    have a colon character, groups them by identifier,
326                    yielding lists of all lines with the same identifier.'''
327
328                 def identifier (l):
329                         if ':' in l[1:]:
330                                 return l[1:l.index (':')]
331                         return l
332                 return groupby (f, key=identifier)
333
334         @staticmethod
335         def test_passed (lines):
336                 lines = list (lines)
337                 # XXX This is a hack, but does the job for now.
338                 if any (l.find("space+0|space+0") >= 0 for l in lines if l[0] == '+'): return True
339                 if any (l.find("uni25CC") >= 0 for l in lines if l[0] == '+'): return True
340                 if any (l.find("dottedcircle") >= 0 for l in lines if l[0] == '+'): return True
341                 if any (l.find("glyph0") >= 0 for l in lines if l[0] == '+'): return True
342                 if any (l.find("gid0") >= 0 for l in lines if l[0] == '+'): return True
343                 if any (l.find("notdef") >= 0 for l in lines if l[0] == '+'): return True
344                 return all (l[0] == ' ' for l in lines)
345
346
347 class FilterHelpers:
348
349         @staticmethod
350         def filter_printer_function (filter_callback):
351                 def printer (f):
352                         for line in filter_callback (f):
353                                 print (line)
354                 return printer
355
356         @staticmethod
357         def filter_printer_function_no_newline (filter_callback):
358                 def printer (f):
359                         for line in filter_callback (f):
360                                 sys.stdout.writelines ([line])
361                 return printer
362
363
364 class Ngram:
365
366         @staticmethod
367         def generator (n):
368
369                 def gen (f):
370                         l = []
371                         for x in f:
372                                 l.append (x)
373                                 if len (l) == n:
374                                         yield tuple (l)
375                                         l[:1] = []
376
377                 gen.n = n
378                 return gen
379
380
381 class UtilMains:
382
383         @staticmethod
384         def process_multiple_files (callback, mnemonic = "FILE"):
385
386                 if "--help" in sys.argv:
387                         print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
388                         sys.exit (1)
389
390                 try:
391                         files = sys.argv[1:] if len (sys.argv) > 1 else ['-']
392                         for s in files:
393                                 callback (FileHelpers.open_file_or_stdin (s))
394                 except IOError as e:
395                         if e.errno != errno.EPIPE:
396                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
397                                 sys.exit (1)
398
399         @staticmethod
400         def process_multiple_args (callback, mnemonic):
401
402                 if len (sys.argv) == 1 or "--help" in sys.argv:
403                         print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
404                         sys.exit (1)
405
406                 try:
407                         for s in sys.argv[1:]:
408                                 callback (s)
409                 except IOError as e:
410                         if e.errno != errno.EPIPE:
411                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
412                                 sys.exit (1)
413
414         @staticmethod
415         def filter_multiple_strings_or_stdin (callback, mnemonic, \
416                                               separator = " ", \
417                                               concat_separator = False):
418
419                 if "--help" in sys.argv:
420                         print ("Usage:\n  %s %s...\nor:\n  %s\n\nWhen called with no arguments, input is read from standard input." \
421                               % (sys.argv[0], mnemonic, sys.argv[0]))
422                         sys.exit (1)
423
424                 try:
425                         if len (sys.argv) == 1:
426                                 while (1):
427                                         line = sys.stdin.readline ()
428                                         if not len (line):
429                                                 break
430                                         if line[-1] == '\n':
431                                                 line = line[:-1]
432                                         print (callback (line))
433                         else:
434                                 args = sys.argv[1:]
435                                 if concat_separator != False:
436                                         args = [concat_separator.join (args)]
437                                 print (separator.join (callback (x) for x in (args)))
438                 except IOError as e:
439                         if e.errno != errno.EPIPE:
440                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
441                                 sys.exit (1)
442
443
444 class Unicode:
445
446         @staticmethod
447         def decode (s):
448                 return u','.join ("U+%04X" % ord (u) for u in unicode (s, 'utf-8')).encode ('utf-8')
449
450         @staticmethod
451         def parse (s):
452                 s = re.sub (r"0[xX]", " ", s)
453                 s = re.sub (r"[<+>{},;&#\\xXuUnNiI\n    ]", " ", s)
454                 return [int (x, 16) for x in s.split ()]
455
456         @staticmethod
457         def encode (s):
458                 s = u''.join (unichr (x) for x in Unicode.parse (s))
459                 if sys.version_info[0] == 2: s = s.encode ('utf-8')
460                 return s
461
462         shorthands = {
463                 "ZERO WIDTH NON-JOINER": "ZWNJ",
464                 "ZERO WIDTH JOINER": "ZWJ",
465                 "NARROW NO-BREAK SPACE": "NNBSP",
466                 "COMBINING GRAPHEME JOINER": "CGJ",
467                 "LEFT-TO-RIGHT MARK": "LRM",
468                 "RIGHT-TO-LEFT MARK": "RLM",
469                 "LEFT-TO-RIGHT EMBEDDING": "LRE",
470                 "RIGHT-TO-LEFT EMBEDDING": "RLE",
471                 "POP DIRECTIONAL FORMATTING": "PDF",
472                 "LEFT-TO-RIGHT OVERRIDE": "LRO",
473                 "RIGHT-TO-LEFT OVERRIDE": "RLO",
474         }
475
476         @staticmethod
477         def pretty_name (u):
478                 try:
479                         s = unicodedata.name (u)
480                 except ValueError:
481                         return "XXX"
482                 s = re.sub (".* LETTER ", "", s)
483                 s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s)
484                 s = re.sub (".* SIGN ", "", s)
485                 s = re.sub (".* COMBINING ", "", s)
486                 if re.match (".* VIRAMA", s):
487                         s = "HALANT"
488                 if s in Unicode.shorthands:
489                         s = Unicode.shorthands[s]
490                 return s
491
492         @staticmethod
493         def pretty_names (s):
494                 s = re.sub (r"[<+>\\uU]", " ", s)
495                 s = re.sub (r"0[xX]", " ", s)
496                 s = [unichr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
497                 return u' + '.join (Unicode.pretty_name (x) for x in s).encode ('utf-8')
498
499
500 class FileHelpers:
501
502         @staticmethod
503         def open_file_or_stdin (f):
504                 if f == '-':
505                         return sys.stdin
506                 return file (f)
507
508
509 class Manifest:
510
511         @staticmethod
512         def read (s, strict = True):
513
514                 if not os.path.exists (s):
515                         if strict:
516                                 print ("%s: %s does not exist" % (sys.argv[0], s), file=sys.stderr)
517                                 sys.exit (1)
518                         return
519
520                 s = os.path.normpath (s)
521
522                 if os.path.isdir (s):
523
524                         try:
525                                 m = file (os.path.join (s, "MANIFEST"))
526                                 items = [x.strip () for x in m.readlines ()]
527                                 for f in items:
528                                         for p in Manifest.read (os.path.join (s, f)):
529                                                 yield p
530                         except IOError:
531                                 if strict:
532                                         print ("%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST")), file=sys.stderr)
533                                         sys.exit (1)
534                                 return
535                 else:
536                         yield s
537
538         @staticmethod
539         def update_recursive (s):
540
541                 for dirpath, dirnames, filenames in os.walk (s, followlinks=True):
542
543                         for f in ["MANIFEST", "README", "LICENSE", "COPYING", "AUTHORS", "SOURCES", "ChangeLog"]:
544                                 if f in dirnames:
545                                         dirnames.remove (f)
546                                 if f in filenames:
547                                         filenames.remove (f)
548                         dirnames.sort ()
549                         filenames.sort ()
550                         ms = os.path.join (dirpath, "MANIFEST")
551                         print ("  GEN    %s" % ms)
552                         m = open (ms, "w")
553                         for f in filenames:
554                                 print (f, file=m)
555                         for f in dirnames:
556                                 print (f, file=m)
557                         for f in dirnames:
558                                 Manifest.update_recursive (os.path.join (dirpath, f))
559
560 if __name__ == '__main__':
561         pass