Imported Upstream version 2.3.1
[platform/upstream/harfbuzz.git] / test / shaping / hb_test_tools.py
1 #!/usr/bin/env python
2
3 from __future__ import print_function, division, absolute_import
4
5 import sys, os, re, difflib, unicodedata, errno, cgi
6 from itertools import *
7 try:
8         import unicodedata2 as unicodedata
9 except Exception:
10         pass
11
12 diff_symbols = "-+=*&^%$#@!~/"
13 diff_colors = ['red', 'green', 'blue']
14
15 def codepoints(s):
16         return (ord (u) for u in s)
17
18 try:
19         unichr = unichr
20
21         if sys.maxunicode < 0x10FFFF:
22                 # workarounds for Python 2 "narrow" builds with UCS2-only support.
23
24                 _narrow_unichr = unichr
25
26                 def unichr(i):
27                         """
28                         Return the unicode character whose Unicode code is the integer 'i'.
29                         The valid range is 0 to 0x10FFFF inclusive.
30
31                         >>> _narrow_unichr(0xFFFF + 1)
32                         Traceback (most recent call last):
33                           File "<stdin>", line 1, in ?
34                         ValueError: unichr() arg not in range(0x10000) (narrow Python build)
35                         >>> unichr(0xFFFF + 1) == u'\U00010000'
36                         True
37                         >>> unichr(1114111) == u'\U0010FFFF'
38                         True
39                         >>> unichr(0x10FFFF + 1)
40                         Traceback (most recent call last):
41                           File "<stdin>", line 1, in ?
42                         ValueError: unichr() arg not in range(0x110000)
43                         """
44                         try:
45                                 return _narrow_unichr(i)
46                         except ValueError:
47                                 try:
48                                         padded_hex_str = hex(i)[2:].zfill(8)
49                                         escape_str = "\\U" + padded_hex_str
50                                         return escape_str.decode("unicode-escape")
51                                 except UnicodeDecodeError:
52                                         raise ValueError('unichr() arg not in range(0x110000)')
53
54                 def codepoints(s):
55                         high_surrogate = None
56                         for u in s:
57                                 cp = ord (u)
58                                 if 0xDC00 <= cp <= 0xDFFF:
59                                         if high_surrogate:
60                                                 yield 0x10000 + (high_surrogate - 0xD800) * 0x400 + (cp - 0xDC00)
61                                                 high_surrogate = None
62                                         else:
63                                                 yield 0xFFFD
64                                 else:
65                                         if high_surrogate:
66                                                 yield 0xFFFD
67                                                 high_surrogate = None
68                                         if 0xD800 <= cp <= 0xDBFF:
69                                                 high_surrogate = cp
70                                         else:
71                                                 yield cp
72                                                 high_surrogate = None
73                         if high_surrogate:
74                                 yield 0xFFFD
75
76 except NameError:
77         unichr = chr
78
79 try:
80         unicode = unicode
81 except NameError:
82         unicode = str
83
84 def tounicode(s, encoding='ascii', errors='strict'):
85         if not isinstance(s, unicode):
86                 return s.decode(encoding, errors)
87         else:
88                 return s
89
90 class ColorFormatter:
91
92         class Null:
93                 @staticmethod
94                 def start_color (c): return ''
95                 @staticmethod
96                 def end_color (): return ''
97                 @staticmethod
98                 def escape (s): return s
99                 @staticmethod
100                 def newline (): return '\n'
101
102         class ANSI:
103                 @staticmethod
104                 def start_color (c):
105                         return {
106                                 'red': '\033[41;37;1m',
107                                 'green': '\033[42;37;1m',
108                                 'blue': '\033[44;37;1m',
109                         }[c]
110                 @staticmethod
111                 def end_color ():
112                         return '\033[m'
113                 @staticmethod
114                 def escape (s): return s
115                 @staticmethod
116                 def newline (): return '\n'
117
118         class HTML:
119                 @staticmethod
120                 def start_color (c):
121                         return '<span style="background:%s">' % c
122                 @staticmethod
123                 def end_color ():
124                         return '</span>'
125                 @staticmethod
126                 def escape (s): return cgi.escape (s)
127                 @staticmethod
128                 def newline (): return '<br/>\n'
129
130         @staticmethod
131         def Auto (argv = [], out = sys.stdout):
132                 format = ColorFormatter.ANSI
133                 if "--format" in argv:
134                         argv.remove ("--format")
135                         format = ColorFormatter.ANSI
136                 if "--format=ansi" in argv:
137                         argv.remove ("--format=ansi")
138                         format = ColorFormatter.ANSI
139                 if "--format=html" in argv:
140                         argv.remove ("--format=html")
141                         format = ColorFormatter.HTML
142                 if "--no-format" in argv:
143                         argv.remove ("--no-format")
144                         format = ColorFormatter.Null
145                 return format
146
147
148 class DiffColorizer:
149
150         diff_regex = re.compile ('([a-za-z0-9_]*)([^a-za-z0-9_]?)')
151
152         def __init__ (self, formatter, colors=diff_colors, symbols=diff_symbols):
153                 self.formatter = formatter
154                 self.colors = colors
155                 self.symbols = symbols
156
157         def colorize_lines (self, lines):
158                 lines = (l if l else '' for l in lines)
159                 ss = [self.diff_regex.sub (r'\1\n\2\n', l).splitlines (True) for l in lines]
160                 oo = ["",""]
161                 st = [False, False]
162                 for l in difflib.Differ().compare (*ss):
163                         if l[0] == '?':
164                                 continue
165                         if l[0] == ' ':
166                                 for i in range(2):
167                                         if st[i]:
168                                                 oo[i] += self.formatter.end_color ()
169                                                 st[i] = False
170                                 oo = [o + self.formatter.escape (l[2:]) for o in oo]
171                                 continue
172                         if l[0] in self.symbols:
173                                 i = self.symbols.index (l[0])
174                                 if not st[i]:
175                                         oo[i] += self.formatter.start_color (self.colors[i])
176                                         st[i] = True
177                                 oo[i] += self.formatter.escape (l[2:])
178                                 continue
179                 for i in range(2):
180                         if st[i]:
181                                 oo[i] += self.formatter.end_color ()
182                                 st[i] = False
183                 oo = [o.replace ('\n', '') for o in oo]
184                 return [s1+s2+self.formatter.newline () for (s1,s2) in zip (self.symbols, oo) if s2]
185
186         def colorize_diff (self, f):
187                 lines = [None, None]
188                 for l in f:
189                         if l[0] not in self.symbols:
190                                 yield self.formatter.escape (l).replace ('\n', self.formatter.newline ())
191                                 continue
192                         i = self.symbols.index (l[0])
193                         if lines[i]:
194                                 # Flush
195                                 for line in self.colorize_lines (lines):
196                                         yield line
197                                 lines = [None, None]
198                         lines[i] = l[1:]
199                         if (all (lines)):
200                                 # Flush
201                                 for line in self.colorize_lines (lines):
202                                         yield line
203                                 lines = [None, None]
204                 if (any (lines)):
205                         # Flush
206                         for line in self.colorize_lines (lines):
207                                 yield line
208
209
210 class ZipDiffer:
211
212         @staticmethod
213         def diff_files (files, symbols=diff_symbols):
214                 files = tuple (files) # in case it's a generator, copy it
215                 try:
216                         for lines in izip_longest (*files):
217                                 if all (lines[0] == line for line in lines[1:]):
218                                         sys.stdout.writelines ([" ", lines[0]])
219                                         continue
220
221                                 for i, l in enumerate (lines):
222                                         if l:
223                                                 sys.stdout.writelines ([symbols[i], l])
224                 except IOError as e:
225                         if e.errno != errno.EPIPE:
226                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
227                                 sys.exit (1)
228
229
230 class DiffFilters:
231
232         @staticmethod
233         def filter_failures (f):
234                 for key, lines in DiffHelpers.separate_test_cases (f):
235                         lines = list (lines)
236                         if not DiffHelpers.test_passed (lines):
237                                 for l in lines: yield l
238
239 class Stat:
240
241         def __init__ (self):
242                 self.count = 0
243                 self.freq = 0
244
245         def add (self, test):
246                 self.count += 1
247                 self.freq += test.freq
248
249 class Stats:
250
251         def __init__ (self):
252                 self.passed = Stat ()
253                 self.failed = Stat ()
254                 self.total  = Stat ()
255
256         def add (self, test):
257                 self.total.add (test)
258                 if test.passed:
259                         self.passed.add (test)
260                 else:
261                         self.failed.add (test)
262
263         def mean (self):
264                 return float (self.passed.count) / self.total.count
265
266         def variance (self):
267                 return (float (self.passed.count) / self.total.count) * \
268                        (float (self.failed.count) / self.total.count)
269
270         def stddev (self):
271                 return self.variance () ** .5
272
273         def zscore (self, population):
274                 """Calculate the standard score.
275                    Population is the Stats for population.
276                    Self is Stats for sample.
277                    Returns larger absolute value if sample is highly unlikely to be random.
278                    Anything outside of -3..+3 is very unlikely to be random.
279                    See: http://en.wikipedia.org/wiki/Standard_score"""
280
281                 return (self.mean () - population.mean ()) / population.stddev ()
282
283
284
285
286 class DiffSinks:
287
288         @staticmethod
289         def print_stat (f):
290                 passed = 0
291                 failed = 0
292                 # XXX port to Stats, but that would really slow us down here
293                 for key, lines in DiffHelpers.separate_test_cases (f):
294                         if DiffHelpers.test_passed (lines):
295                                 passed += 1
296                         else:
297                                 failed += 1
298                 total = passed + failed
299                 print ("%d out of %d tests passed.  %d failed (%g%%)" % (passed, total, failed, 100. * failed / total))
300
301
302 class Test:
303
304         def __init__ (self, lines):
305                 self.freq = 1
306                 self.passed = True
307                 self.identifier = None
308                 self.text = None
309                 self.unicodes = None
310                 self.glyphs = None
311                 for l in lines:
312                         symbol = l[0]
313                         if symbol != ' ':
314                                 self.passed = False
315                         i = 1
316                         if ':' in l:
317                                 i = l.index (':')
318                                 if not self.identifier:
319                                         self.identifier = l[1:i]
320                                 i = i + 2 # Skip colon and space
321                         j = -1
322                         if l[j] == '\n':
323                                 j -= 1
324                         brackets = l[i] + l[j]
325                         l = l[i+1:-2]
326                         if brackets == '()':
327                                 self.text = l
328                         elif brackets == '<>':
329                                 self.unicodes = Unicode.parse (l)
330                         elif brackets == '[]':
331                                 # XXX we don't handle failed tests here
332                                 self.glyphs = l
333
334
335 class DiffHelpers:
336
337         @staticmethod
338         def separate_test_cases (f):
339                 '''Reads lines from f, and if the lines have identifiers, ie.
340                    have a colon character, groups them by identifier,
341                    yielding lists of all lines with the same identifier.'''
342
343                 def identifier (l):
344                         if ':' in l[1:]:
345                                 return l[1:l.index (':')]
346                         return l
347                 return groupby (f, key=identifier)
348
349         @staticmethod
350         def test_passed (lines):
351                 lines = list (lines)
352                 # XXX This is a hack, but does the job for now.
353                 if any (l.find("space+0|space+0") >= 0 for l in lines if l[0] == '+'): return True
354                 if any (l.find("uni25CC") >= 0 for l in lines if l[0] == '+'): return True
355                 if any (l.find("dottedcircle") >= 0 for l in lines if l[0] == '+'): return True
356                 if any (l.find("glyph0") >= 0 for l in lines if l[0] == '+'): return True
357                 if any (l.find("gid0") >= 0 for l in lines if l[0] == '+'): return True
358                 if any (l.find("notdef") >= 0 for l in lines if l[0] == '+'): return True
359                 return all (l[0] == ' ' for l in lines)
360
361
362 class FilterHelpers:
363
364         @staticmethod
365         def filter_printer_function (filter_callback):
366                 def printer (f):
367                         for line in filter_callback (f):
368                                 print (line)
369                 return printer
370
371         @staticmethod
372         def filter_printer_function_no_newline (filter_callback):
373                 def printer (f):
374                         for line in filter_callback (f):
375                                 sys.stdout.writelines ([line])
376                 return printer
377
378
379 class Ngram:
380
381         @staticmethod
382         def generator (n):
383
384                 def gen (f):
385                         l = []
386                         for x in f:
387                                 l.append (x)
388                                 if len (l) == n:
389                                         yield tuple (l)
390                                         l[:1] = []
391
392                 gen.n = n
393                 return gen
394
395
396 class UtilMains:
397
398         @staticmethod
399         def process_multiple_files (callback, mnemonic = "FILE"):
400
401                 if "--help" in sys.argv:
402                         print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
403                         sys.exit (1)
404
405                 try:
406                         files = sys.argv[1:] if len (sys.argv) > 1 else ['-']
407                         for s in files:
408                                 callback (FileHelpers.open_file_or_stdin (s))
409                 except IOError as e:
410                         if e.errno != errno.EPIPE:
411                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
412                                 sys.exit (1)
413
414         @staticmethod
415         def process_multiple_args (callback, mnemonic):
416
417                 if len (sys.argv) == 1 or "--help" in sys.argv:
418                         print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
419                         sys.exit (1)
420
421                 try:
422                         for s in sys.argv[1:]:
423                                 callback (s)
424                 except IOError as e:
425                         if e.errno != errno.EPIPE:
426                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
427                                 sys.exit (1)
428
429         @staticmethod
430         def filter_multiple_strings_or_stdin (callback, mnemonic, \
431                                               separator = " ", \
432                                               concat_separator = False):
433
434                 if "--help" in sys.argv:
435                         print ("Usage:\n  %s %s...\nor:\n  %s\n\nWhen called with no arguments, input is read from standard input." \
436                               % (sys.argv[0], mnemonic, sys.argv[0]))
437                         sys.exit (1)
438
439                 try:
440                         if len (sys.argv) == 1:
441                                 while (1):
442                                         line = sys.stdin.readline ()
443                                         if not len (line):
444                                                 break
445                                         if line[-1] == '\n':
446                                                 line = line[:-1]
447                                         print (callback (line))
448                         else:
449                                 args = sys.argv[1:]
450                                 if concat_separator != False:
451                                         args = [concat_separator.join (args)]
452                                 print (separator.join (callback (x) for x in (args)))
453                 except IOError as e:
454                         if e.errno != errno.EPIPE:
455                                 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
456                                 sys.exit (1)
457
458
459 class Unicode:
460
461         @staticmethod
462         def decode (s):
463                 return u','.join ("U+%04X" % cp for cp in codepoints (tounicode (s, 'utf-8')))
464
465         @staticmethod
466         def parse (s):
467                 s = re.sub (r"0[xX]", " ", s)
468                 s = re.sub (r"[<+>{},;&#\\xXuUnNiI\n\t]", " ", s)
469                 return [int (x, 16) for x in s.split ()]
470
471         @staticmethod
472         def encode (s):
473                 s = u''.join (unichr (x) for x in Unicode.parse (s))
474                 if sys.version_info[0] == 2: s = s.encode ('utf-8')
475                 return s
476
477         shorthands = {
478                 "ZERO WIDTH NON-JOINER": "ZWNJ",
479                 "ZERO WIDTH JOINER": "ZWJ",
480                 "NARROW NO-BREAK SPACE": "NNBSP",
481                 "COMBINING GRAPHEME JOINER": "CGJ",
482                 "LEFT-TO-RIGHT MARK": "LRM",
483                 "RIGHT-TO-LEFT MARK": "RLM",
484                 "LEFT-TO-RIGHT EMBEDDING": "LRE",
485                 "RIGHT-TO-LEFT EMBEDDING": "RLE",
486                 "POP DIRECTIONAL FORMATTING": "PDF",
487                 "LEFT-TO-RIGHT OVERRIDE": "LRO",
488                 "RIGHT-TO-LEFT OVERRIDE": "RLO",
489         }
490
491         @staticmethod
492         def pretty_name (u):
493                 try:
494                         s = unicodedata.name (u)
495                 except ValueError:
496                         return "XXX"
497                 s = re.sub (".* LETTER ", "", s)
498                 s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s)
499                 s = re.sub (".* SIGN ", "", s)
500                 s = re.sub (".* COMBINING ", "", s)
501                 if re.match (".* VIRAMA", s):
502                         s = "HALANT"
503                 if s in Unicode.shorthands:
504                         s = Unicode.shorthands[s]
505                 return s
506
507         @staticmethod
508         def pretty_names (s):
509                 s = re.sub (r"[<+>\\uU]", " ", s)
510                 s = re.sub (r"0[xX]", " ", s)
511                 s = [unichr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
512                 return u' + '.join (Unicode.pretty_name (x) for x in s).encode ('utf-8')
513
514
515 class FileHelpers:
516
517         @staticmethod
518         def open_file_or_stdin (f):
519                 if f == '-':
520                         return sys.stdin
521                 return open (f)
522
523
524 class Manifest:
525
526         @staticmethod
527         def read (s, strict = True):
528
529                 if not os.path.exists (s):
530                         if strict:
531                                 print ("%s: %s does not exist" % (sys.argv[0], s), file=sys.stderr)
532                                 sys.exit (1)
533                         return
534
535                 s = os.path.normpath (s)
536
537                 if os.path.isdir (s):
538
539                         try:
540                                 m = open (os.path.join (s, "MANIFEST"))
541                                 items = [x.strip () for x in m.readlines ()]
542                                 for f in items:
543                                         for p in Manifest.read (os.path.join (s, f)):
544                                                 yield p
545                         except IOError:
546                                 if strict:
547                                         print ("%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST")), file=sys.stderr)
548                                         sys.exit (1)
549                                 return
550                 else:
551                         yield s
552
553         @staticmethod
554         def update_recursive (s):
555
556                 for dirpath, dirnames, filenames in os.walk (s, followlinks=True):
557
558                         for f in ["MANIFEST", "README", "LICENSE", "COPYING", "AUTHORS", "SOURCES", "ChangeLog"]:
559                                 if f in dirnames:
560                                         dirnames.remove (f)
561                                 if f in filenames:
562                                         filenames.remove (f)
563                         dirnames.sort ()
564                         filenames.sort ()
565                         ms = os.path.join (dirpath, "MANIFEST")
566                         print ("  GEN    %s" % ms)
567                         m = open (ms, "w")
568                         for f in filenames:
569                                 print (f, file=m)
570                         for f in dirnames:
571                                 print (f, file=m)
572                         for f in dirnames:
573                                 Manifest.update_recursive (os.path.join (dirpath, f))
574
575 if __name__ == '__main__':
576         pass