8348dc2699a3182fcb0f9bd29c73cd6bbe2c735d
[platform/upstream/harfbuzz.git] / test / shaping / hb_test_tools.py
1 #!/usr/bin/env python
2
3 from __future__ import print_function, division, absolute_import
4
5 import sys, os, re, difflib, unicodedata, errno, cgi
6 from itertools import *
7
8 diff_symbols = "-+=*&^%$#@!~/"
9 diff_colors = ['red', 'green', 'blue']
10
11 def codepoints(s):
12         return (ord (u) for u in s)
13
14 try:
15         unichr = unichr
16
17         if sys.maxunicode < 0x10FFFF:
18                 # workarounds for Python 2 "narrow" builds with UCS2-only support.
19
20                 _narrow_unichr = unichr
21
22                 def unichr(i):
23                         """
24                         Return the unicode character whose Unicode code is the integer 'i'.
25                         The valid range is 0 to 0x10FFFF inclusive.
26
27                         >>> _narrow_unichr(0xFFFF + 1)
28                         Traceback (most recent call last):
29                           File "<stdin>", line 1, in ?
30                         ValueError: unichr() arg not in range(0x10000) (narrow Python build)
31                         >>> unichr(0xFFFF + 1) == u'\U00010000'
32                         True
33                         >>> unichr(1114111) == u'\U0010FFFF'
34                         True
35                         >>> unichr(0x10FFFF + 1)
36                         Traceback (most recent call last):
37                           File "<stdin>", line 1, in ?
38                         ValueError: unichr() arg not in range(0x110000)
39                         """
40                         try:
41                                 return _narrow_unichr(i)
42                         except ValueError:
43                                 try:
44                                         padded_hex_str = hex(i)[2:].zfill(8)
45                                         escape_str = "\\U" + padded_hex_str
46                                         return escape_str.decode("unicode-escape")
47                                 except UnicodeDecodeError:
48                                         raise ValueError('unichr() arg not in range(0x110000)')
49
50                 def codepoints(s):
51                         high_surrogate = None
52                         for u in s:
53                                 cp = ord (u)
54                                 if 0xDC00 <= cp <= 0xDFFF:
55                                         if high_surrogate:
56                                                 yield 0x10000 + (high_surrogate - 0xD800) * 0x400 + (cp - 0xDC00)
57                                                 high_surrogate = None
58                                         else:
59                                                 yield 0xFFFC
60                                 else:
61                                         if high_surrogate:
62                                                 yield 0xFFFC
63                                                 high_surrogate = None
64                                         if 0xD800 <= cp <= 0xDBFF:
65                                                 high_surrogate = cp
66                                         else:
67                                                 yield cp
68                                                 high_surrogate = None
69                         if high_surrogate:
70                                 yield 0xFFFC
71
72 except NameError:
73         unichr = chr
74
75 try:
76         unicode = unicode
77 except NameError:
78         unicode = str
79
80 def tounicode(s, encoding='ascii', errors='strict'):
81         if not isinstance(s, unicode):
82                 return s.decode(encoding, errors)
83         else:
84                 return s
85
86 class ColorFormatter:
87
88         class Null:
89                 @staticmethod
90                 def start_color (c): return ''
91                 @staticmethod
92                 def end_color (): return ''
93                 @staticmethod
94                 def escape (s): return s
95                 @staticmethod
96                 def newline (): return '\n'
97
98         class ANSI:
99                 @staticmethod
100                 def start_color (c):
101                         return {
102                                 'red': '\033[41;37;1m',
103                                 'green': '\033[42;37;1m',
104                                 'blue': '\033[44;37;1m',
105                         }[c]
106                 @staticmethod
107                 def end_color ():
108                         return '\033[m'
109                 @staticmethod
110                 def escape (s): return s
111                 @staticmethod
112                 def newline (): return '\n'
113
114         class HTML:
115                 @staticmethod
116                 def start_color (c):
117                         return '<span style="background:%s">' % c
118                 @staticmethod
119                 def end_color ():
120                         return '</span>'
121                 @staticmethod
122                 def escape (s): return cgi.escape (s)
123                 @staticmethod
124                 def newline (): return '<br/>\n'
125
126         @staticmethod
127         def Auto (argv = [], out = sys.stdout):
128                 format = ColorFormatter.ANSI
129                 if "--format" in argv:
130                         argv.remove ("--format")
131                         format = ColorFormatter.ANSI
132                 if "--format=ansi" in argv:
133                         argv.remove ("--format=ansi")
134                         format = ColorFormatter.ANSI
135                 if "--format=html" in argv:
136                         argv.remove ("--format=html")
137                         format = ColorFormatter.HTML
138                 if "--no-format" in argv:
139                         argv.remove ("--no-format")
140                         format = ColorFormatter.Null
141                 return format
142
143
144 class DiffColorizer:
145
146         diff_regex = re.compile ('([a-za-z0-9_]*)([^a-za-z0-9_]?)')
147
148         def __init__ (self, formatter, colors=diff_colors, symbols=diff_symbols):
149                 self.formatter = formatter
150                 self.colors = colors
151                 self.symbols = symbols
152
153         def colorize_lines (self, lines):
154                 lines = (l if l else '' for l in lines)
155                 ss = [self.diff_regex.sub (r'\1\n\2\n', l).splitlines (True) for l in lines]
156                 oo = ["",""]
157                 st = [False, False]
158                 for l in difflib.Differ().compare (*ss):
159                         if l[0] == '?':
160                                 continue
161                         if l[0] == ' ':
162                                 for i in range(2):
163                                         if st[i]:
164                                                 oo[i] += self.formatter.end_color ()
165                                                 st[i] = False
166                                 oo = [o + self.formatter.escape (l[2:]) for o in oo]
167                                 continue
168                         if l[0] in self.symbols:
169                                 i = self.symbols.index (l[0])
170                                 if not st[i]:
171                                         oo[i] += self.formatter.start_color (self.colors[i])
172                                         st[i] = True
173                                 oo[i] += self.formatter.escape (l[2:])
174                                 continue
175                 for i in range(2):
176                         if st[i]:
177                                 oo[i] += self.formatter.end_color ()
178                                 st[i] = False
179                 oo = [o.replace ('\n', '') for o in oo]
180                 return [s1+s2+self.formatter.newline () for (s1,s2) in zip (self.symbols, oo) if s2]
181
182         def colorize_diff (self, f):
183                 lines = [None, None]
184                 for l in f:
185                         if l[0] not in self.symbols:
186                                 yield self.formatter.escape (l).replace ('\n', self.formatter.newline ())
187                                 continue
188                         i = self.symbols.index (l[0])
189                         if lines[i]:
190                                 # Flush
191                                 for line in self.colorize_lines (lines):
192                                         yield line
193                                 lines = [None, None]
194                         lines[i] = l[1:]
195                         if (all (lines)):
196                                 # Flush
197                                 for line in self.colorize_lines (lines):
198                                         yield line
199                                 lines = [None, None]
200                 if (any (lines)):
201                         # Flush
202                         for line in self.colorize_lines (lines):
203                                 yield line
204
205
206 class ZipDiffer:
207
208         @staticmethod
209         def diff_files (files, symbols=diff_symbols):
210                 files = tuple (files) # in case it's a generator, copy it
211                 try:
212                         for lines in izip_longest (*files):
213                                 if all (lines[0] == line for line in lines[1:]):
214                                         sys.stdout.writelines ([" ", lines[0]])
215                                         continue
216
217                                 for i, l in enumerate (lines):
218                                         if l:
219                                                 sys.stdout.writelines ([symbols[i], l])
220                 except IOError as e:
221                         if e.errno != errno.EPIPE:
222                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
223                                 sys.exit (1)
224
225
226 class DiffFilters:
227
228         @staticmethod
229         def filter_failures (f):
230                 for key, lines in DiffHelpers.separate_test_cases (f):
231                         lines = list (lines)
232                         if not DiffHelpers.test_passed (lines):
233                                 for l in lines: yield l
234
235 class Stat:
236
237         def __init__ (self):
238                 self.count = 0
239                 self.freq = 0
240
241         def add (self, test):
242                 self.count += 1
243                 self.freq += test.freq
244
245 class Stats:
246
247         def __init__ (self):
248                 self.passed = Stat ()
249                 self.failed = Stat ()
250                 self.total  = Stat ()
251
252         def add (self, test):
253                 self.total.add (test)
254                 if test.passed:
255                         self.passed.add (test)
256                 else:
257                         self.failed.add (test)
258
259         def mean (self):
260                 return float (self.passed.count) / self.total.count
261
262         def variance (self):
263                 return (float (self.passed.count) / self.total.count) * \
264                        (float (self.failed.count) / self.total.count)
265
266         def stddev (self):
267                 return self.variance () ** .5
268
269         def zscore (self, population):
270                 """Calculate the standard score.
271                    Population is the Stats for population.
272                    Self is Stats for sample.
273                    Returns larger absolute value if sample is highly unlikely to be random.
274                    Anything outside of -3..+3 is very unlikely to be random.
275                    See: http://en.wikipedia.org/wiki/Standard_score"""
276
277                 return (self.mean () - population.mean ()) / population.stddev ()
278
279
280
281
282 class DiffSinks:
283
284         @staticmethod
285         def print_stat (f):
286                 passed = 0
287                 failed = 0
288                 # XXX port to Stats, but that would really slow us down here
289                 for key, lines in DiffHelpers.separate_test_cases (f):
290                         if DiffHelpers.test_passed (lines):
291                                 passed += 1
292                         else:
293                                 failed += 1
294                 total = passed + failed
295                 print ("%d out of %d tests passed.  %d failed (%g%%)" % (passed, total, failed, 100. * failed / total))
296
297
298 class Test:
299
300         def __init__ (self, lines):
301                 self.freq = 1
302                 self.passed = True
303                 self.identifier = None
304                 self.text = None
305                 self.unicodes = None
306                 self.glyphs = None
307                 for l in lines:
308                         symbol = l[0]
309                         if symbol != ' ':
310                                 self.passed = False
311                         i = 1
312                         if ':' in l:
313                                 i = l.index (':')
314                                 if not self.identifier:
315                                         self.identifier = l[1:i]
316                                 i = i + 2 # Skip colon and space
317                         j = -1
318                         if l[j] == '\n':
319                                 j -= 1
320                         brackets = l[i] + l[j]
321                         l = l[i+1:-2]
322                         if brackets == '()':
323                                 self.text = l
324                         elif brackets == '<>':
325                                 self.unicodes = Unicode.parse (l)
326                         elif brackets == '[]':
327                                 # XXX we don't handle failed tests here
328                                 self.glyphs = l
329
330
331 class DiffHelpers:
332
333         @staticmethod
334         def separate_test_cases (f):
335                 '''Reads lines from f, and if the lines have identifiers, ie.
336                    have a colon character, groups them by identifier,
337                    yielding lists of all lines with the same identifier.'''
338
339                 def identifier (l):
340                         if ':' in l[1:]:
341                                 return l[1:l.index (':')]
342                         return l
343                 return groupby (f, key=identifier)
344
345         @staticmethod
346         def test_passed (lines):
347                 lines = list (lines)
348                 # XXX This is a hack, but does the job for now.
349                 if any (l.find("space+0|space+0") >= 0 for l in lines if l[0] == '+'): return True
350                 if any (l.find("uni25CC") >= 0 for l in lines if l[0] == '+'): return True
351                 if any (l.find("dottedcircle") >= 0 for l in lines if l[0] == '+'): return True
352                 if any (l.find("glyph0") >= 0 for l in lines if l[0] == '+'): return True
353                 if any (l.find("gid0") >= 0 for l in lines if l[0] == '+'): return True
354                 if any (l.find("notdef") >= 0 for l in lines if l[0] == '+'): return True
355                 return all (l[0] == ' ' for l in lines)
356
357
358 class FilterHelpers:
359
360         @staticmethod
361         def filter_printer_function (filter_callback):
362                 def printer (f):
363                         for line in filter_callback (f):
364                                 print (line)
365                 return printer
366
367         @staticmethod
368         def filter_printer_function_no_newline (filter_callback):
369                 def printer (f):
370                         for line in filter_callback (f):
371                                 sys.stdout.writelines ([line])
372                 return printer
373
374
375 class Ngram:
376
377         @staticmethod
378         def generator (n):
379
380                 def gen (f):
381                         l = []
382                         for x in f:
383                                 l.append (x)
384                                 if len (l) == n:
385                                         yield tuple (l)
386                                         l[:1] = []
387
388                 gen.n = n
389                 return gen
390
391
392 class UtilMains:
393
394         @staticmethod
395         def process_multiple_files (callback, mnemonic = "FILE"):
396
397                 if "--help" in sys.argv:
398                         print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
399                         sys.exit (1)
400
401                 try:
402                         files = sys.argv[1:] if len (sys.argv) > 1 else ['-']
403                         for s in files:
404                                 callback (FileHelpers.open_file_or_stdin (s))
405                 except IOError as e:
406                         if e.errno != errno.EPIPE:
407                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
408                                 sys.exit (1)
409
410         @staticmethod
411         def process_multiple_args (callback, mnemonic):
412
413                 if len (sys.argv) == 1 or "--help" in sys.argv:
414                         print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
415                         sys.exit (1)
416
417                 try:
418                         for s in sys.argv[1:]:
419                                 callback (s)
420                 except IOError as e:
421                         if e.errno != errno.EPIPE:
422                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
423                                 sys.exit (1)
424
425         @staticmethod
426         def filter_multiple_strings_or_stdin (callback, mnemonic, \
427                                               separator = " ", \
428                                               concat_separator = False):
429
430                 if "--help" in sys.argv:
431                         print ("Usage:\n  %s %s...\nor:\n  %s\n\nWhen called with no arguments, input is read from standard input." \
432                               % (sys.argv[0], mnemonic, sys.argv[0]))
433                         sys.exit (1)
434
435                 try:
436                         if len (sys.argv) == 1:
437                                 while (1):
438                                         line = sys.stdin.readline ()
439                                         if not len (line):
440                                                 break
441                                         if line[-1] == '\n':
442                                                 line = line[:-1]
443                                         print (callback (line))
444                         else:
445                                 args = sys.argv[1:]
446                                 if concat_separator != False:
447                                         args = [concat_separator.join (args)]
448                                 print (separator.join (callback (x) for x in (args)))
449                 except IOError as e:
450                         if e.errno != errno.EPIPE:
451                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
452                                 sys.exit (1)
453
454
455 class Unicode:
456
457         @staticmethod
458         def decode (s):
459                 return u','.join ("U+%04X" % cp for cp in codepoints (tounicode (s, 'utf-8')))
460
461         @staticmethod
462         def parse (s):
463                 s = re.sub (r"0[xX]", " ", s)
464                 s = re.sub (r"[<+>{},;&#\\xXuUnNiI\n\t]", " ", s)
465                 return [int (x, 16) for x in s.split ()]
466
467         @staticmethod
468         def encode (s):
469                 s = u''.join (unichr (x) for x in Unicode.parse (s))
470                 if sys.version_info[0] == 2: s = s.encode ('utf-8')
471                 return s
472
473         shorthands = {
474                 "ZERO WIDTH NON-JOINER": "ZWNJ",
475                 "ZERO WIDTH JOINER": "ZWJ",
476                 "NARROW NO-BREAK SPACE": "NNBSP",
477                 "COMBINING GRAPHEME JOINER": "CGJ",
478                 "LEFT-TO-RIGHT MARK": "LRM",
479                 "RIGHT-TO-LEFT MARK": "RLM",
480                 "LEFT-TO-RIGHT EMBEDDING": "LRE",
481                 "RIGHT-TO-LEFT EMBEDDING": "RLE",
482                 "POP DIRECTIONAL FORMATTING": "PDF",
483                 "LEFT-TO-RIGHT OVERRIDE": "LRO",
484                 "RIGHT-TO-LEFT OVERRIDE": "RLO",
485         }
486
487         @staticmethod
488         def pretty_name (u):
489                 try:
490                         s = unicodedata.name (u)
491                 except ValueError:
492                         return "XXX"
493                 s = re.sub (".* LETTER ", "", s)
494                 s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s)
495                 s = re.sub (".* SIGN ", "", s)
496                 s = re.sub (".* COMBINING ", "", s)
497                 if re.match (".* VIRAMA", s):
498                         s = "HALANT"
499                 if s in Unicode.shorthands:
500                         s = Unicode.shorthands[s]
501                 return s
502
503         @staticmethod
504         def pretty_names (s):
505                 s = re.sub (r"[<+>\\uU]", " ", s)
506                 s = re.sub (r"0[xX]", " ", s)
507                 s = [unichr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
508                 return u' + '.join (Unicode.pretty_name (x) for x in s).encode ('utf-8')
509
510
511 class FileHelpers:
512
513         @staticmethod
514         def open_file_or_stdin (f):
515                 if f == '-':
516                         return sys.stdin
517                 return open (f)
518
519
520 class Manifest:
521
522         @staticmethod
523         def read (s, strict = True):
524
525                 if not os.path.exists (s):
526                         if strict:
527                                 print ("%s: %s does not exist" % (sys.argv[0], s), file=sys.stderr)
528                                 sys.exit (1)
529                         return
530
531                 s = os.path.normpath (s)
532
533                 if os.path.isdir (s):
534
535                         try:
536                                 m = open (os.path.join (s, "MANIFEST"))
537                                 items = [x.strip () for x in m.readlines ()]
538                                 for f in items:
539                                         for p in Manifest.read (os.path.join (s, f)):
540                                                 yield p
541                         except IOError:
542                                 if strict:
543                                         print ("%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST")), file=sys.stderr)
544                                         sys.exit (1)
545                                 return
546                 else:
547                         yield s
548
549         @staticmethod
550         def update_recursive (s):
551
552                 for dirpath, dirnames, filenames in os.walk (s, followlinks=True):
553
554                         for f in ["MANIFEST", "README", "LICENSE", "COPYING", "AUTHORS", "SOURCES", "ChangeLog"]:
555                                 if f in dirnames:
556                                         dirnames.remove (f)
557                                 if f in filenames:
558                                         filenames.remove (f)
559                         dirnames.sort ()
560                         filenames.sort ()
561                         ms = os.path.join (dirpath, "MANIFEST")
562                         print ("  GEN    %s" % ms)
563                         m = open (ms, "w")
564                         for f in filenames:
565                                 print (f, file=m)
566                         for f in dirnames:
567                                 print (f, file=m)
568                         for f in dirnames:
569                                 Manifest.update_recursive (os.path.join (dirpath, f))
570
571 if __name__ == '__main__':
572         pass