Imported Upstream version 1.7.6
[platform/upstream/harfbuzz.git] / test / shaping / hb_test_tools.py
1 #!/usr/bin/env python
2
3 from __future__ import print_function
4 import sys, os, re, difflib, unicodedata, errno, cgi
5 from itertools import *
6
7 diff_symbols = "-+=*&^%$#@!~/"
8 diff_colors = ['red', 'green', 'blue']
9
10 def codepoints(s):
11         return (ord (u) for u in s)
12
13 try:
14         unichr = unichr
15
16         if sys.maxunicode < 0x10FFFF:
17                 # workarounds for Python 2 "narrow" builds with UCS2-only support.
18
19                 _narrow_unichr = unichr
20
21                 def unichr(i):
22                         """
23                         Return the unicode character whose Unicode code is the integer 'i'.
24                         The valid range is 0 to 0x10FFFF inclusive.
25
26                         >>> _narrow_unichr(0xFFFF + 1)
27                         Traceback (most recent call last):
28                           File "<stdin>", line 1, in ?
29                         ValueError: unichr() arg not in range(0x10000) (narrow Python build)
30                         >>> unichr(0xFFFF + 1) == u'\U00010000'
31                         True
32                         >>> unichr(1114111) == u'\U0010FFFF'
33                         True
34                         >>> unichr(0x10FFFF + 1)
35                         Traceback (most recent call last):
36                           File "<stdin>", line 1, in ?
37                         ValueError: unichr() arg not in range(0x110000)
38                         """
39                         try:
40                                 return _narrow_unichr(i)
41                         except ValueError:
42                                 try:
43                                         padded_hex_str = hex(i)[2:].zfill(8)
44                                         escape_str = "\\U" + padded_hex_str
45                                         return escape_str.decode("unicode-escape")
46                                 except UnicodeDecodeError:
47                                         raise ValueError('unichr() arg not in range(0x110000)')
48
49                 def codepoints(s):
50                         high_surrogate = None
51                         for u in s:
52                                 cp = ord (u)
53                                 if 0xDC00 <= cp <= 0xDFFF:
54                                         if high_surrogate:
55                                                 yield 0x10000 + (high_surrogate - 0xD800) * 0x400 + (cp - 0xDC00)
56                                                 high_surrogate = None
57                                         else:
58                                                 yield 0xFFFC
59                                 else:
60                                         if high_surrogate:
61                                                 yield 0xFFFC
62                                                 high_surrogate = None
63                                         if 0xD800 <= cp <= 0xDBFF:
64                                                 high_surrogate = cp
65                                         else:
66                                                 yield cp
67                                                 high_surrogate = None
68                         if high_surrogate:
69                                 yield 0xFFFC
70
71 except NameError:
72         unichr = chr
73
74 try:
75         unicode = unicode
76 except NameError:
77         unicode = str
78
79 def tounicode(s, encoding='ascii', errors='strict'):
80         if not isinstance(s, unicode):
81                 return s.decode(encoding, errors)
82         else:
83                 return s
84
85 class ColorFormatter:
86
87         class Null:
88                 @staticmethod
89                 def start_color (c): return ''
90                 @staticmethod
91                 def end_color (): return ''
92                 @staticmethod
93                 def escape (s): return s
94                 @staticmethod
95                 def newline (): return '\n'
96
97         class ANSI:
98                 @staticmethod
99                 def start_color (c):
100                         return {
101                                 'red': '\033[41;37;1m',
102                                 'green': '\033[42;37;1m',
103                                 'blue': '\033[44;37;1m',
104                         }[c]
105                 @staticmethod
106                 def end_color ():
107                         return '\033[m'
108                 @staticmethod
109                 def escape (s): return s
110                 @staticmethod
111                 def newline (): return '\n'
112
113         class HTML:
114                 @staticmethod
115                 def start_color (c):
116                         return '<span style="background:%s">' % c
117                 @staticmethod
118                 def end_color ():
119                         return '</span>'
120                 @staticmethod
121                 def escape (s): return cgi.escape (s)
122                 @staticmethod
123                 def newline (): return '<br/>\n'
124
125         @staticmethod
126         def Auto (argv = [], out = sys.stdout):
127                 format = ColorFormatter.ANSI
128                 if "--format" in argv:
129                         argv.remove ("--format")
130                         format = ColorFormatter.ANSI
131                 if "--format=ansi" in argv:
132                         argv.remove ("--format=ansi")
133                         format = ColorFormatter.ANSI
134                 if "--format=html" in argv:
135                         argv.remove ("--format=html")
136                         format = ColorFormatter.HTML
137                 if "--no-format" in argv:
138                         argv.remove ("--no-format")
139                         format = ColorFormatter.Null
140                 return format
141
142
143 class DiffColorizer:
144
145         diff_regex = re.compile ('([a-za-z0-9_]*)([^a-za-z0-9_]?)')
146
147         def __init__ (self, formatter, colors=diff_colors, symbols=diff_symbols):
148                 self.formatter = formatter
149                 self.colors = colors
150                 self.symbols = symbols
151
152         def colorize_lines (self, lines):
153                 lines = (l if l else '' for l in lines)
154                 ss = [self.diff_regex.sub (r'\1\n\2\n', l).splitlines (True) for l in lines]
155                 oo = ["",""]
156                 st = [False, False]
157                 for l in difflib.Differ().compare (*ss):
158                         if l[0] == '?':
159                                 continue
160                         if l[0] == ' ':
161                                 for i in range(2):
162                                         if st[i]:
163                                                 oo[i] += self.formatter.end_color ()
164                                                 st[i] = False
165                                 oo = [o + self.formatter.escape (l[2:]) for o in oo]
166                                 continue
167                         if l[0] in self.symbols:
168                                 i = self.symbols.index (l[0])
169                                 if not st[i]:
170                                         oo[i] += self.formatter.start_color (self.colors[i])
171                                         st[i] = True
172                                 oo[i] += self.formatter.escape (l[2:])
173                                 continue
174                 for i in range(2):
175                         if st[i]:
176                                 oo[i] += self.formatter.end_color ()
177                                 st[i] = False
178                 oo = [o.replace ('\n', '') for o in oo]
179                 return [s1+s2+self.formatter.newline () for (s1,s2) in zip (self.symbols, oo) if s2]
180
181         def colorize_diff (self, f):
182                 lines = [None, None]
183                 for l in f:
184                         if l[0] not in self.symbols:
185                                 yield self.formatter.escape (l).replace ('\n', self.formatter.newline ())
186                                 continue
187                         i = self.symbols.index (l[0])
188                         if lines[i]:
189                                 # Flush
190                                 for line in self.colorize_lines (lines):
191                                         yield line
192                                 lines = [None, None]
193                         lines[i] = l[1:]
194                         if (all (lines)):
195                                 # Flush
196                                 for line in self.colorize_lines (lines):
197                                         yield line
198                                 lines = [None, None]
199                 if (any (lines)):
200                         # Flush
201                         for line in self.colorize_lines (lines):
202                                 yield line
203
204
205 class ZipDiffer:
206
207         @staticmethod
208         def diff_files (files, symbols=diff_symbols):
209                 files = tuple (files) # in case it's a generator, copy it
210                 try:
211                         for lines in izip_longest (*files):
212                                 if all (lines[0] == line for line in lines[1:]):
213                                         sys.stdout.writelines ([" ", lines[0]])
214                                         continue
215
216                                 for i, l in enumerate (lines):
217                                         if l:
218                                                 sys.stdout.writelines ([symbols[i], l])
219                 except IOError as e:
220                         if e.errno != errno.EPIPE:
221                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
222                                 sys.exit (1)
223
224
225 class DiffFilters:
226
227         @staticmethod
228         def filter_failures (f):
229                 for key, lines in DiffHelpers.separate_test_cases (f):
230                         lines = list (lines)
231                         if not DiffHelpers.test_passed (lines):
232                                 for l in lines: yield l
233
234 class Stat:
235
236         def __init__ (self):
237                 self.count = 0
238                 self.freq = 0
239
240         def add (self, test):
241                 self.count += 1
242                 self.freq += test.freq
243
244 class Stats:
245
246         def __init__ (self):
247                 self.passed = Stat ()
248                 self.failed = Stat ()
249                 self.total  = Stat ()
250
251         def add (self, test):
252                 self.total.add (test)
253                 if test.passed:
254                         self.passed.add (test)
255                 else:
256                         self.failed.add (test)
257
258         def mean (self):
259                 return float (self.passed.count) / self.total.count
260
261         def variance (self):
262                 return (float (self.passed.count) / self.total.count) * \
263                        (float (self.failed.count) / self.total.count)
264
265         def stddev (self):
266                 return self.variance () ** .5
267
268         def zscore (self, population):
269                 """Calculate the standard score.
270                    Population is the Stats for population.
271                    Self is Stats for sample.
272                    Returns larger absolute value if sample is highly unlikely to be random.
273                    Anything outside of -3..+3 is very unlikely to be random.
274                    See: http://en.wikipedia.org/wiki/Standard_score"""
275
276                 return (self.mean () - population.mean ()) / population.stddev ()
277
278
279
280
281 class DiffSinks:
282
283         @staticmethod
284         def print_stat (f):
285                 passed = 0
286                 failed = 0
287                 # XXX port to Stats, but that would really slow us down here
288                 for key, lines in DiffHelpers.separate_test_cases (f):
289                         if DiffHelpers.test_passed (lines):
290                                 passed += 1
291                         else:
292                                 failed += 1
293                 total = passed + failed
294                 print ("%d out of %d tests passed.  %d failed (%g%%)" % (passed, total, failed, 100. * failed / total))
295
296
297 class Test:
298
299         def __init__ (self, lines):
300                 self.freq = 1
301                 self.passed = True
302                 self.identifier = None
303                 self.text = None
304                 self.unicodes = None
305                 self.glyphs = None
306                 for l in lines:
307                         symbol = l[0]
308                         if symbol != ' ':
309                                 self.passed = False
310                         i = 1
311                         if ':' in l:
312                                 i = l.index (':')
313                                 if not self.identifier:
314                                         self.identifier = l[1:i]
315                                 i = i + 2 # Skip colon and space
316                         j = -1
317                         if l[j] == '\n':
318                                 j -= 1
319                         brackets = l[i] + l[j]
320                         l = l[i+1:-2]
321                         if brackets == '()':
322                                 self.text = l
323                         elif brackets == '<>':
324                                 self.unicodes = Unicode.parse (l)
325                         elif brackets == '[]':
326                                 # XXX we don't handle failed tests here
327                                 self.glyphs = l
328
329
330 class DiffHelpers:
331
332         @staticmethod
333         def separate_test_cases (f):
334                 '''Reads lines from f, and if the lines have identifiers, ie.
335                    have a colon character, groups them by identifier,
336                    yielding lists of all lines with the same identifier.'''
337
338                 def identifier (l):
339                         if ':' in l[1:]:
340                                 return l[1:l.index (':')]
341                         return l
342                 return groupby (f, key=identifier)
343
344         @staticmethod
345         def test_passed (lines):
346                 lines = list (lines)
347                 # XXX This is a hack, but does the job for now.
348                 if any (l.find("space+0|space+0") >= 0 for l in lines if l[0] == '+'): return True
349                 if any (l.find("uni25CC") >= 0 for l in lines if l[0] == '+'): return True
350                 if any (l.find("dottedcircle") >= 0 for l in lines if l[0] == '+'): return True
351                 if any (l.find("glyph0") >= 0 for l in lines if l[0] == '+'): return True
352                 if any (l.find("gid0") >= 0 for l in lines if l[0] == '+'): return True
353                 if any (l.find("notdef") >= 0 for l in lines if l[0] == '+'): return True
354                 return all (l[0] == ' ' for l in lines)
355
356
357 class FilterHelpers:
358
359         @staticmethod
360         def filter_printer_function (filter_callback):
361                 def printer (f):
362                         for line in filter_callback (f):
363                                 print (line)
364                 return printer
365
366         @staticmethod
367         def filter_printer_function_no_newline (filter_callback):
368                 def printer (f):
369                         for line in filter_callback (f):
370                                 sys.stdout.writelines ([line])
371                 return printer
372
373
374 class Ngram:
375
376         @staticmethod
377         def generator (n):
378
379                 def gen (f):
380                         l = []
381                         for x in f:
382                                 l.append (x)
383                                 if len (l) == n:
384                                         yield tuple (l)
385                                         l[:1] = []
386
387                 gen.n = n
388                 return gen
389
390
391 class UtilMains:
392
393         @staticmethod
394         def process_multiple_files (callback, mnemonic = "FILE"):
395
396                 if "--help" in sys.argv:
397                         print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
398                         sys.exit (1)
399
400                 try:
401                         files = sys.argv[1:] if len (sys.argv) > 1 else ['-']
402                         for s in files:
403                                 callback (FileHelpers.open_file_or_stdin (s))
404                 except IOError as e:
405                         if e.errno != errno.EPIPE:
406                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
407                                 sys.exit (1)
408
409         @staticmethod
410         def process_multiple_args (callback, mnemonic):
411
412                 if len (sys.argv) == 1 or "--help" in sys.argv:
413                         print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
414                         sys.exit (1)
415
416                 try:
417                         for s in sys.argv[1:]:
418                                 callback (s)
419                 except IOError as e:
420                         if e.errno != errno.EPIPE:
421                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
422                                 sys.exit (1)
423
424         @staticmethod
425         def filter_multiple_strings_or_stdin (callback, mnemonic, \
426                                               separator = " ", \
427                                               concat_separator = False):
428
429                 if "--help" in sys.argv:
430                         print ("Usage:\n  %s %s...\nor:\n  %s\n\nWhen called with no arguments, input is read from standard input." \
431                               % (sys.argv[0], mnemonic, sys.argv[0]))
432                         sys.exit (1)
433
434                 try:
435                         if len (sys.argv) == 1:
436                                 while (1):
437                                         line = sys.stdin.readline ()
438                                         if not len (line):
439                                                 break
440                                         if line[-1] == '\n':
441                                                 line = line[:-1]
442                                         print (callback (line))
443                         else:
444                                 args = sys.argv[1:]
445                                 if concat_separator != False:
446                                         args = [concat_separator.join (args)]
447                                 print (separator.join (callback (x) for x in (args)))
448                 except IOError as e:
449                         if e.errno != errno.EPIPE:
450                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
451                                 sys.exit (1)
452
453
454 class Unicode:
455
456         @staticmethod
457         def decode (s):
458                 return u','.join ("U+%04X" % cp for cp in codepoints (tounicode (s, 'utf-8')))
459
460         @staticmethod
461         def parse (s):
462                 s = re.sub (r"0[xX]", " ", s)
463                 s = re.sub (r"[<+>{},;&#\\xXuUnNiI\n\t]", " ", s)
464                 return [int (x, 16) for x in s.split ()]
465
466         @staticmethod
467         def encode (s):
468                 s = u''.join (unichr (x) for x in Unicode.parse (s))
469                 if sys.version_info[0] == 2: s = s.encode ('utf-8')
470                 return s
471
472         shorthands = {
473                 "ZERO WIDTH NON-JOINER": "ZWNJ",
474                 "ZERO WIDTH JOINER": "ZWJ",
475                 "NARROW NO-BREAK SPACE": "NNBSP",
476                 "COMBINING GRAPHEME JOINER": "CGJ",
477                 "LEFT-TO-RIGHT MARK": "LRM",
478                 "RIGHT-TO-LEFT MARK": "RLM",
479                 "LEFT-TO-RIGHT EMBEDDING": "LRE",
480                 "RIGHT-TO-LEFT EMBEDDING": "RLE",
481                 "POP DIRECTIONAL FORMATTING": "PDF",
482                 "LEFT-TO-RIGHT OVERRIDE": "LRO",
483                 "RIGHT-TO-LEFT OVERRIDE": "RLO",
484         }
485
486         @staticmethod
487         def pretty_name (u):
488                 try:
489                         s = unicodedata.name (u)
490                 except ValueError:
491                         return "XXX"
492                 s = re.sub (".* LETTER ", "", s)
493                 s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s)
494                 s = re.sub (".* SIGN ", "", s)
495                 s = re.sub (".* COMBINING ", "", s)
496                 if re.match (".* VIRAMA", s):
497                         s = "HALANT"
498                 if s in Unicode.shorthands:
499                         s = Unicode.shorthands[s]
500                 return s
501
502         @staticmethod
503         def pretty_names (s):
504                 s = re.sub (r"[<+>\\uU]", " ", s)
505                 s = re.sub (r"0[xX]", " ", s)
506                 s = [unichr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
507                 return u' + '.join (Unicode.pretty_name (x) for x in s).encode ('utf-8')
508
509
510 class FileHelpers:
511
512         @staticmethod
513         def open_file_or_stdin (f):
514                 if f == '-':
515                         return sys.stdin
516                 return file (f)
517
518
519 class Manifest:
520
521         @staticmethod
522         def read (s, strict = True):
523
524                 if not os.path.exists (s):
525                         if strict:
526                                 print ("%s: %s does not exist" % (sys.argv[0], s), file=sys.stderr)
527                                 sys.exit (1)
528                         return
529
530                 s = os.path.normpath (s)
531
532                 if os.path.isdir (s):
533
534                         try:
535                                 m = file (os.path.join (s, "MANIFEST"))
536                                 items = [x.strip () for x in m.readlines ()]
537                                 for f in items:
538                                         for p in Manifest.read (os.path.join (s, f)):
539                                                 yield p
540                         except IOError:
541                                 if strict:
542                                         print ("%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST")), file=sys.stderr)
543                                         sys.exit (1)
544                                 return
545                 else:
546                         yield s
547
548         @staticmethod
549         def update_recursive (s):
550
551                 for dirpath, dirnames, filenames in os.walk (s, followlinks=True):
552
553                         for f in ["MANIFEST", "README", "LICENSE", "COPYING", "AUTHORS", "SOURCES", "ChangeLog"]:
554                                 if f in dirnames:
555                                         dirnames.remove (f)
556                                 if f in filenames:
557                                         filenames.remove (f)
558                         dirnames.sort ()
559                         filenames.sort ()
560                         ms = os.path.join (dirpath, "MANIFEST")
561                         print ("  GEN    %s" % ms)
562                         m = open (ms, "w")
563                         for f in filenames:
564                                 print (f, file=m)
565                         for f in dirnames:
566                                 print (f, file=m)
567                         for f in dirnames:
568                                 Manifest.update_recursive (os.path.join (dirpath, f))
569
570 if __name__ == '__main__':
571         pass