Imported Upstream version 1.4.6
[platform/upstream/harfbuzz.git] / test / shaping / hb_test_tools.py
1 #!/usr/bin/env python
2
3 from __future__ import print_function
4 import sys, os, re, difflib, unicodedata, errno, cgi
5 from itertools import *
6
7 diff_symbols = "-+=*&^%$#@!~/"
8 diff_colors = ['red', 'green', 'blue']
9
10 try:
11         unichr = unichr
12
13         if sys.maxunicode < 0x10FFFF:
14                 # workarounds for Python 2 "narrow" builds with UCS2-only support.
15
16                 _narrow_unichr = unichr
17
18                 def unichr(i):
19                         """
20                         Return the unicode character whose Unicode code is the integer 'i'.
21                         The valid range is 0 to 0x10FFFF inclusive.
22
23                         >>> _narrow_unichr(0xFFFF + 1)
24                         Traceback (most recent call last):
25                           File "<stdin>", line 1, in ?
26                         ValueError: unichr() arg not in range(0x10000) (narrow Python build)
27                         >>> unichr(0xFFFF + 1) == u'\U00010000'
28                         True
29                         >>> unichr(1114111) == u'\U0010FFFF'
30                         True
31                         >>> unichr(0x10FFFF + 1)
32                         Traceback (most recent call last):
33                           File "<stdin>", line 1, in ?
34                         ValueError: unichr() arg not in range(0x110000)
35                         """
36                         try:
37                                 return _narrow_unichr(i)
38                         except ValueError:
39                                 try:
40                                         padded_hex_str = hex(i)[2:].zfill(8)
41                                         escape_str = "\\U" + padded_hex_str
42                                         return escape_str.decode("unicode-escape")
43                                 except UnicodeDecodeError:
44                                         raise ValueError('unichr() arg not in range(0x110000)')
45
46 except NameError:
47         unichr = chr
48
49 try:
50         unicode = unicode
51 except NameError:
52         unicode = str
53
54 def tounicode(s, encoding='ascii', errors='strict'):
55         if not isinstance(s, unicode):
56                 return s.decode(encoding, errors)
57         else:
58                 return s
59
60 class ColorFormatter:
61
62         class Null:
63                 @staticmethod
64                 def start_color (c): return ''
65                 @staticmethod
66                 def end_color (): return ''
67                 @staticmethod
68                 def escape (s): return s
69                 @staticmethod
70                 def newline (): return '\n'
71
72         class ANSI:
73                 @staticmethod
74                 def start_color (c):
75                         return {
76                                 'red': '\033[41;37;1m',
77                                 'green': '\033[42;37;1m',
78                                 'blue': '\033[44;37;1m',
79                         }[c]
80                 @staticmethod
81                 def end_color ():
82                         return '\033[m'
83                 @staticmethod
84                 def escape (s): return s
85                 @staticmethod
86                 def newline (): return '\n'
87
88         class HTML:
89                 @staticmethod
90                 def start_color (c):
91                         return '<span style="background:%s">' % c
92                 @staticmethod
93                 def end_color ():
94                         return '</span>'
95                 @staticmethod
96                 def escape (s): return cgi.escape (s)
97                 @staticmethod
98                 def newline (): return '<br/>\n'
99
100         @staticmethod
101         def Auto (argv = [], out = sys.stdout):
102                 format = ColorFormatter.ANSI
103                 if "--format" in argv:
104                         argv.remove ("--format")
105                         format = ColorFormatter.ANSI
106                 if "--format=ansi" in argv:
107                         argv.remove ("--format=ansi")
108                         format = ColorFormatter.ANSI
109                 if "--format=html" in argv:
110                         argv.remove ("--format=html")
111                         format = ColorFormatter.HTML
112                 if "--no-format" in argv:
113                         argv.remove ("--no-format")
114                         format = ColorFormatter.Null
115                 return format
116
117
118 class DiffColorizer:
119
120         diff_regex = re.compile ('([a-za-z0-9_]*)([^a-za-z0-9_]?)')
121
122         def __init__ (self, formatter, colors=diff_colors, symbols=diff_symbols):
123                 self.formatter = formatter
124                 self.colors = colors
125                 self.symbols = symbols
126
127         def colorize_lines (self, lines):
128                 lines = (l if l else '' for l in lines)
129                 ss = [self.diff_regex.sub (r'\1\n\2\n', l).splitlines (True) for l in lines]
130                 oo = ["",""]
131                 st = [False, False]
132                 for l in difflib.Differ().compare (*ss):
133                         if l[0] == '?':
134                                 continue
135                         if l[0] == ' ':
136                                 for i in range(2):
137                                         if st[i]:
138                                                 oo[i] += self.formatter.end_color ()
139                                                 st[i] = False
140                                 oo = [o + self.formatter.escape (l[2:]) for o in oo]
141                                 continue
142                         if l[0] in self.symbols:
143                                 i = self.symbols.index (l[0])
144                                 if not st[i]:
145                                         oo[i] += self.formatter.start_color (self.colors[i])
146                                         st[i] = True
147                                 oo[i] += self.formatter.escape (l[2:])
148                                 continue
149                 for i in range(2):
150                         if st[i]:
151                                 oo[i] += self.formatter.end_color ()
152                                 st[i] = False
153                 oo = [o.replace ('\n', '') for o in oo]
154                 return [s1+s2+self.formatter.newline () for (s1,s2) in zip (self.symbols, oo) if s2]
155
156         def colorize_diff (self, f):
157                 lines = [None, None]
158                 for l in f:
159                         if l[0] not in self.symbols:
160                                 yield self.formatter.escape (l).replace ('\n', self.formatter.newline ())
161                                 continue
162                         i = self.symbols.index (l[0])
163                         if lines[i]:
164                                 # Flush
165                                 for line in self.colorize_lines (lines):
166                                         yield line
167                                 lines = [None, None]
168                         lines[i] = l[1:]
169                         if (all (lines)):
170                                 # Flush
171                                 for line in self.colorize_lines (lines):
172                                         yield line
173                                 lines = [None, None]
174                 if (any (lines)):
175                         # Flush
176                         for line in self.colorize_lines (lines):
177                                 yield line
178
179
180 class ZipDiffer:
181
182         @staticmethod
183         def diff_files (files, symbols=diff_symbols):
184                 files = tuple (files) # in case it's a generator, copy it
185                 try:
186                         for lines in izip_longest (*files):
187                                 if all (lines[0] == line for line in lines[1:]):
188                                         sys.stdout.writelines ([" ", lines[0]])
189                                         continue
190
191                                 for i, l in enumerate (lines):
192                                         if l:
193                                                 sys.stdout.writelines ([symbols[i], l])
194                 except IOError as e:
195                         if e.errno != errno.EPIPE:
196                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
197                                 sys.exit (1)
198
199
200 class DiffFilters:
201
202         @staticmethod
203         def filter_failures (f):
204                 for key, lines in DiffHelpers.separate_test_cases (f):
205                         lines = list (lines)
206                         if not DiffHelpers.test_passed (lines):
207                                 for l in lines: yield l
208
209 class Stat:
210
211         def __init__ (self):
212                 self.count = 0
213                 self.freq = 0
214
215         def add (self, test):
216                 self.count += 1
217                 self.freq += test.freq
218
219 class Stats:
220
221         def __init__ (self):
222                 self.passed = Stat ()
223                 self.failed = Stat ()
224                 self.total  = Stat ()
225
226         def add (self, test):
227                 self.total.add (test)
228                 if test.passed:
229                         self.passed.add (test)
230                 else:
231                         self.failed.add (test)
232
233         def mean (self):
234                 return float (self.passed.count) / self.total.count
235
236         def variance (self):
237                 return (float (self.passed.count) / self.total.count) * \
238                        (float (self.failed.count) / self.total.count)
239
240         def stddev (self):
241                 return self.variance () ** .5
242
243         def zscore (self, population):
244                 """Calculate the standard score.
245                    Population is the Stats for population.
246                    Self is Stats for sample.
247                    Returns larger absolute value if sample is highly unlikely to be random.
248                    Anything outside of -3..+3 is very unlikely to be random.
249                    See: http://en.wikipedia.org/wiki/Standard_score"""
250
251                 return (self.mean () - population.mean ()) / population.stddev ()
252
253
254
255
256 class DiffSinks:
257
258         @staticmethod
259         def print_stat (f):
260                 passed = 0
261                 failed = 0
262                 # XXX port to Stats, but that would really slow us down here
263                 for key, lines in DiffHelpers.separate_test_cases (f):
264                         if DiffHelpers.test_passed (lines):
265                                 passed += 1
266                         else:
267                                 failed += 1
268                 total = passed + failed
269                 print ("%d out of %d tests passed.  %d failed (%g%%)" % (passed, total, failed, 100. * failed / total))
270
271         @staticmethod
272         def print_ngrams (f, ns=(1,2,3)):
273                 gens = tuple (Ngram.generator (n) for n in ns)
274                 allstats = Stats ()
275                 allgrams = {}
276                 for key, lines in DiffHelpers.separate_test_cases (f):
277                         test = Test (lines)
278                         allstats.add (test)
279
280                         for gen in gens:
281                                 for ngram in gen (test.unicodes):
282                                         if ngram not in allgrams:
283                                                 allgrams[ngram] = Stats ()
284                                         allgrams[ngram].add (test)
285
286                 importantgrams = {}
287                 for ngram, stats in allgrams.iteritems ():
288                         if stats.failed.count >= 30: # for statistical reasons
289                                 importantgrams[ngram] = stats
290                 allgrams = importantgrams
291                 del importantgrams
292
293                 for ngram, stats in allgrams.iteritems ():
294                         print ("zscore: %9f failed: %6d passed: %6d ngram: <%s>" % (stats.zscore (allstats), stats.failed.count, stats.passed.count, ','.join ("U+%04X" % u for u in ngram)))
295
296
297
298 class Test:
299
300         def __init__ (self, lines):
301                 self.freq = 1
302                 self.passed = True
303                 self.identifier = None
304                 self.text = None
305                 self.unicodes = None
306                 self.glyphs = None
307                 for l in lines:
308                         symbol = l[0]
309                         if symbol != ' ':
310                                 self.passed = False
311                         i = 1
312                         if ':' in l:
313                                 i = l.index (':')
314                                 if not self.identifier:
315                                         self.identifier = l[1:i]
316                                 i = i + 2 # Skip colon and space
317                         j = -1
318                         if l[j] == '\n':
319                                 j -= 1
320                         brackets = l[i] + l[j]
321                         l = l[i+1:-2]
322                         if brackets == '()':
323                                 self.text = l
324                         elif brackets == '<>':
325                                 self.unicodes = Unicode.parse (l)
326                         elif brackets == '[]':
327                                 # XXX we don't handle failed tests here
328                                 self.glyphs = l
329
330
331 class DiffHelpers:
332
333         @staticmethod
334         def separate_test_cases (f):
335                 '''Reads lines from f, and if the lines have identifiers, ie.
336                    have a colon character, groups them by identifier,
337                    yielding lists of all lines with the same identifier.'''
338
339                 def identifier (l):
340                         if ':' in l[1:]:
341                                 return l[1:l.index (':')]
342                         return l
343                 return groupby (f, key=identifier)
344
345         @staticmethod
346         def test_passed (lines):
347                 lines = list (lines)
348                 # XXX This is a hack, but does the job for now.
349                 if any (l.find("space+0|space+0") >= 0 for l in lines if l[0] == '+'): return True
350                 if any (l.find("uni25CC") >= 0 for l in lines if l[0] == '+'): return True
351                 if any (l.find("dottedcircle") >= 0 for l in lines if l[0] == '+'): return True
352                 if any (l.find("glyph0") >= 0 for l in lines if l[0] == '+'): return True
353                 if any (l.find("gid0") >= 0 for l in lines if l[0] == '+'): return True
354                 if any (l.find("notdef") >= 0 for l in lines if l[0] == '+'): return True
355                 return all (l[0] == ' ' for l in lines)
356
357
358 class FilterHelpers:
359
360         @staticmethod
361         def filter_printer_function (filter_callback):
362                 def printer (f):
363                         for line in filter_callback (f):
364                                 print (line)
365                 return printer
366
367         @staticmethod
368         def filter_printer_function_no_newline (filter_callback):
369                 def printer (f):
370                         for line in filter_callback (f):
371                                 sys.stdout.writelines ([line])
372                 return printer
373
374
375 class Ngram:
376
377         @staticmethod
378         def generator (n):
379
380                 def gen (f):
381                         l = []
382                         for x in f:
383                                 l.append (x)
384                                 if len (l) == n:
385                                         yield tuple (l)
386                                         l[:1] = []
387
388                 gen.n = n
389                 return gen
390
391
392 class UtilMains:
393
394         @staticmethod
395         def process_multiple_files (callback, mnemonic = "FILE"):
396
397                 if "--help" in sys.argv:
398                         print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
399                         sys.exit (1)
400
401                 try:
402                         files = sys.argv[1:] if len (sys.argv) > 1 else ['-']
403                         for s in files:
404                                 callback (FileHelpers.open_file_or_stdin (s))
405                 except IOError as e:
406                         if e.errno != errno.EPIPE:
407                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
408                                 sys.exit (1)
409
410         @staticmethod
411         def process_multiple_args (callback, mnemonic):
412
413                 if len (sys.argv) == 1 or "--help" in sys.argv:
414                         print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
415                         sys.exit (1)
416
417                 try:
418                         for s in sys.argv[1:]:
419                                 callback (s)
420                 except IOError as e:
421                         if e.errno != errno.EPIPE:
422                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
423                                 sys.exit (1)
424
425         @staticmethod
426         def filter_multiple_strings_or_stdin (callback, mnemonic, \
427                                               separator = " ", \
428                                               concat_separator = False):
429
430                 if "--help" in sys.argv:
431                         print ("Usage:\n  %s %s...\nor:\n  %s\n\nWhen called with no arguments, input is read from standard input." \
432                               % (sys.argv[0], mnemonic, sys.argv[0]))
433                         sys.exit (1)
434
435                 try:
436                         if len (sys.argv) == 1:
437                                 while (1):
438                                         line = sys.stdin.readline ()
439                                         if not len (line):
440                                                 break
441                                         if line[-1] == '\n':
442                                                 line = line[:-1]
443                                         print (callback (line))
444                         else:
445                                 args = sys.argv[1:]
446                                 if concat_separator != False:
447                                         args = [concat_separator.join (args)]
448                                 print (separator.join (callback (x) for x in (args)))
449                 except IOError as e:
450                         if e.errno != errno.EPIPE:
451                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
452                                 sys.exit (1)
453
454
455 class Unicode:
456
457         @staticmethod
458         def decode (s):
459                 return u','.join ("U+%04X" % ord (u) for u in tounicode (s, 'utf-8'))
460
461         @staticmethod
462         def parse (s):
463                 s = re.sub (r"0[xX]", " ", s)
464                 s = re.sub (r"[<+>{},;&#\\xXuUnNiI\n    ]", " ", s)
465                 return [int (x, 16) for x in s.split ()]
466
467         @staticmethod
468         def encode (s):
469                 s = u''.join (unichr (x) for x in Unicode.parse (s))
470                 if sys.version_info[0] == 2: s = s.encode ('utf-8')
471                 return s
472
473         shorthands = {
474                 "ZERO WIDTH NON-JOINER": "ZWNJ",
475                 "ZERO WIDTH JOINER": "ZWJ",
476                 "NARROW NO-BREAK SPACE": "NNBSP",
477                 "COMBINING GRAPHEME JOINER": "CGJ",
478                 "LEFT-TO-RIGHT MARK": "LRM",
479                 "RIGHT-TO-LEFT MARK": "RLM",
480                 "LEFT-TO-RIGHT EMBEDDING": "LRE",
481                 "RIGHT-TO-LEFT EMBEDDING": "RLE",
482                 "POP DIRECTIONAL FORMATTING": "PDF",
483                 "LEFT-TO-RIGHT OVERRIDE": "LRO",
484                 "RIGHT-TO-LEFT OVERRIDE": "RLO",
485         }
486
487         @staticmethod
488         def pretty_name (u):
489                 try:
490                         s = unicodedata.name (u)
491                 except ValueError:
492                         return "XXX"
493                 s = re.sub (".* LETTER ", "", s)
494                 s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s)
495                 s = re.sub (".* SIGN ", "", s)
496                 s = re.sub (".* COMBINING ", "", s)
497                 if re.match (".* VIRAMA", s):
498                         s = "HALANT"
499                 if s in Unicode.shorthands:
500                         s = Unicode.shorthands[s]
501                 return s
502
503         @staticmethod
504         def pretty_names (s):
505                 s = re.sub (r"[<+>\\uU]", " ", s)
506                 s = re.sub (r"0[xX]", " ", s)
507                 s = [unichr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
508                 return u' + '.join (Unicode.pretty_name (x) for x in s).encode ('utf-8')
509
510
511 class FileHelpers:
512
513         @staticmethod
514         def open_file_or_stdin (f):
515                 if f == '-':
516                         return sys.stdin
517                 return file (f)
518
519
520 class Manifest:
521
522         @staticmethod
523         def read (s, strict = True):
524
525                 if not os.path.exists (s):
526                         if strict:
527                                 print ("%s: %s does not exist" % (sys.argv[0], s), file=sys.stderr)
528                                 sys.exit (1)
529                         return
530
531                 s = os.path.normpath (s)
532
533                 if os.path.isdir (s):
534
535                         try:
536                                 m = file (os.path.join (s, "MANIFEST"))
537                                 items = [x.strip () for x in m.readlines ()]
538                                 for f in items:
539                                         for p in Manifest.read (os.path.join (s, f)):
540                                                 yield p
541                         except IOError:
542                                 if strict:
543                                         print ("%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST")), file=sys.stderr)
544                                         sys.exit (1)
545                                 return
546                 else:
547                         yield s
548
549         @staticmethod
550         def update_recursive (s):
551
552                 for dirpath, dirnames, filenames in os.walk (s, followlinks=True):
553
554                         for f in ["MANIFEST", "README", "LICENSE", "COPYING", "AUTHORS", "SOURCES", "ChangeLog"]:
555                                 if f in dirnames:
556                                         dirnames.remove (f)
557                                 if f in filenames:
558                                         filenames.remove (f)
559                         dirnames.sort ()
560                         filenames.sort ()
561                         ms = os.path.join (dirpath, "MANIFEST")
562                         print ("  GEN    %s" % ms)
563                         m = open (ms, "w")
564                         for f in filenames:
565                                 print (f, file=m)
566                         for f in dirnames:
567                                 print (f, file=m)
568                         for f in dirnames:
569                                 Manifest.update_recursive (os.path.join (dirpath, f))
570
571 if __name__ == '__main__':
572         pass