3 from __future__ import print_function
4 import sys, os, re, difflib, unicodedata, errno, cgi
5 from itertools import *
7 diff_symbols = "-+=*&^%$#@!~/"
8 diff_colors = ['red', 'green', 'blue']
11 return (ord (u) for u in s)
16 if sys.maxunicode < 0x10FFFF:
17 # workarounds for Python 2 "narrow" builds with UCS2-only support.
19 _narrow_unichr = unichr
23 Return the unicode character whose Unicode code is the integer 'i'.
24 The valid range is 0 to 0x10FFFF inclusive.
26 >>> _narrow_unichr(0xFFFF + 1)
27 Traceback (most recent call last):
28 File "<stdin>", line 1, in ?
29 ValueError: unichr() arg not in range(0x10000) (narrow Python build)
30 >>> unichr(0xFFFF + 1) == u'\U00010000'
32 >>> unichr(1114111) == u'\U0010FFFF'
34 >>> unichr(0x10FFFF + 1)
35 Traceback (most recent call last):
36 File "<stdin>", line 1, in ?
37 ValueError: unichr() arg not in range(0x110000)
40 return _narrow_unichr(i)
43 padded_hex_str = hex(i)[2:].zfill(8)
44 escape_str = "\\U" + padded_hex_str
45 return escape_str.decode("unicode-escape")
46 except UnicodeDecodeError:
47 raise ValueError('unichr() arg not in range(0x110000)')
53 if 0xDC00 <= cp <= 0xDFFF:
55 yield 0x10000 + (high_surrogate - 0xD800) * 0x400 + (cp - 0xDC00)
63 if 0xD800 <= cp <= 0xDBFF:
79 def tounicode(s, encoding='ascii', errors='strict'):
80 if not isinstance(s, unicode):
81 return s.decode(encoding, errors)
89 def start_color (c): return ''
91 def end_color (): return ''
93 def escape (s): return s
95 def newline (): return '\n'
101 'red': '\033[41;37;1m',
102 'green': '\033[42;37;1m',
103 'blue': '\033[44;37;1m',
109 def escape (s): return s
111 def newline (): return '\n'
116 return '<span style="background:%s">' % c
121 def escape (s): return cgi.escape (s)
123 def newline (): return '<br/>\n'
126 def Auto (argv = [], out = sys.stdout):
127 format = ColorFormatter.ANSI
128 if "--format" in argv:
129 argv.remove ("--format")
130 format = ColorFormatter.ANSI
131 if "--format=ansi" in argv:
132 argv.remove ("--format=ansi")
133 format = ColorFormatter.ANSI
134 if "--format=html" in argv:
135 argv.remove ("--format=html")
136 format = ColorFormatter.HTML
137 if "--no-format" in argv:
138 argv.remove ("--no-format")
139 format = ColorFormatter.Null
145 diff_regex = re.compile ('([a-za-z0-9_]*)([^a-za-z0-9_]?)')
147 def __init__ (self, formatter, colors=diff_colors, symbols=diff_symbols):
148 self.formatter = formatter
150 self.symbols = symbols
152 def colorize_lines (self, lines):
153 lines = (l if l else '' for l in lines)
154 ss = [self.diff_regex.sub (r'\1\n\2\n', l).splitlines (True) for l in lines]
157 for l in difflib.Differ().compare (*ss):
163 oo[i] += self.formatter.end_color ()
165 oo = [o + self.formatter.escape (l[2:]) for o in oo]
167 if l[0] in self.symbols:
168 i = self.symbols.index (l[0])
170 oo[i] += self.formatter.start_color (self.colors[i])
172 oo[i] += self.formatter.escape (l[2:])
176 oo[i] += self.formatter.end_color ()
178 oo = [o.replace ('\n', '') for o in oo]
179 return [s1+s2+self.formatter.newline () for (s1,s2) in zip (self.symbols, oo) if s2]
181 def colorize_diff (self, f):
184 if l[0] not in self.symbols:
185 yield self.formatter.escape (l).replace ('\n', self.formatter.newline ())
187 i = self.symbols.index (l[0])
190 for line in self.colorize_lines (lines):
196 for line in self.colorize_lines (lines):
201 for line in self.colorize_lines (lines):
208 def diff_files (files, symbols=diff_symbols):
209 files = tuple (files) # in case it's a generator, copy it
211 for lines in izip_longest (*files):
212 if all (lines[0] == line for line in lines[1:]):
213 sys.stdout.writelines ([" ", lines[0]])
216 for i, l in enumerate (lines):
218 sys.stdout.writelines ([symbols[i], l])
220 if e.errno != errno.EPIPE:
221 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
228 def filter_failures (f):
229 for key, lines in DiffHelpers.separate_test_cases (f):
231 if not DiffHelpers.test_passed (lines):
232 for l in lines: yield l
240 def add (self, test):
242 self.freq += test.freq
247 self.passed = Stat ()
248 self.failed = Stat ()
251 def add (self, test):
252 self.total.add (test)
254 self.passed.add (test)
256 self.failed.add (test)
259 return float (self.passed.count) / self.total.count
262 return (float (self.passed.count) / self.total.count) * \
263 (float (self.failed.count) / self.total.count)
266 return self.variance () ** .5
268 def zscore (self, population):
269 """Calculate the standard score.
270 Population is the Stats for population.
271 Self is Stats for sample.
272 Returns larger absolute value if sample is highly unlikely to be random.
273 Anything outside of -3..+3 is very unlikely to be random.
274 See: http://en.wikipedia.org/wiki/Standard_score"""
276 return (self.mean () - population.mean ()) / population.stddev ()
287 # XXX port to Stats, but that would really slow us down here
288 for key, lines in DiffHelpers.separate_test_cases (f):
289 if DiffHelpers.test_passed (lines):
293 total = passed + failed
294 print ("%d out of %d tests passed. %d failed (%g%%)" % (passed, total, failed, 100. * failed / total))
299 def __init__ (self, lines):
302 self.identifier = None
313 if not self.identifier:
314 self.identifier = l[1:i]
315 i = i + 2 # Skip colon and space
319 brackets = l[i] + l[j]
323 elif brackets == '<>':
324 self.unicodes = Unicode.parse (l)
325 elif brackets == '[]':
326 # XXX we don't handle failed tests here
333 def separate_test_cases (f):
334 '''Reads lines from f, and if the lines have identifiers, ie.
335 have a colon character, groups them by identifier,
336 yielding lists of all lines with the same identifier.'''
340 return l[1:l.index (':')]
342 return groupby (f, key=identifier)
345 def test_passed (lines):
347 # XXX This is a hack, but does the job for now.
348 if any (l.find("space+0|space+0") >= 0 for l in lines if l[0] == '+'): return True
349 if any (l.find("uni25CC") >= 0 for l in lines if l[0] == '+'): return True
350 if any (l.find("dottedcircle") >= 0 for l in lines if l[0] == '+'): return True
351 if any (l.find("glyph0") >= 0 for l in lines if l[0] == '+'): return True
352 if any (l.find("gid0") >= 0 for l in lines if l[0] == '+'): return True
353 if any (l.find("notdef") >= 0 for l in lines if l[0] == '+'): return True
354 return all (l[0] == ' ' for l in lines)
360 def filter_printer_function (filter_callback):
362 for line in filter_callback (f):
367 def filter_printer_function_no_newline (filter_callback):
369 for line in filter_callback (f):
370 sys.stdout.writelines ([line])
394 def process_multiple_files (callback, mnemonic = "FILE"):
396 if "--help" in sys.argv:
397 print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
401 files = sys.argv[1:] if len (sys.argv) > 1 else ['-']
403 callback (FileHelpers.open_file_or_stdin (s))
405 if e.errno != errno.EPIPE:
406 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
410 def process_multiple_args (callback, mnemonic):
412 if len (sys.argv) == 1 or "--help" in sys.argv:
413 print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
417 for s in sys.argv[1:]:
420 if e.errno != errno.EPIPE:
421 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
425 def filter_multiple_strings_or_stdin (callback, mnemonic, \
427 concat_separator = False):
429 if "--help" in sys.argv:
430 print ("Usage:\n %s %s...\nor:\n %s\n\nWhen called with no arguments, input is read from standard input." \
431 % (sys.argv[0], mnemonic, sys.argv[0]))
435 if len (sys.argv) == 1:
437 line = sys.stdin.readline ()
442 print (callback (line))
445 if concat_separator != False:
446 args = [concat_separator.join (args)]
447 print (separator.join (callback (x) for x in (args)))
449 if e.errno != errno.EPIPE:
450 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
458 return u','.join ("U+%04X" % cp for cp in codepoints (tounicode (s, 'utf-8')))
462 s = re.sub (r"0[xX]", " ", s)
463 s = re.sub (r"[<+>{},;&#\\xXuUnNiI\n\t]", " ", s)
464 return [int (x, 16) for x in s.split ()]
468 s = u''.join (unichr (x) for x in Unicode.parse (s))
469 if sys.version_info[0] == 2: s = s.encode ('utf-8')
473 "ZERO WIDTH NON-JOINER": "ZWNJ",
474 "ZERO WIDTH JOINER": "ZWJ",
475 "NARROW NO-BREAK SPACE": "NNBSP",
476 "COMBINING GRAPHEME JOINER": "CGJ",
477 "LEFT-TO-RIGHT MARK": "LRM",
478 "RIGHT-TO-LEFT MARK": "RLM",
479 "LEFT-TO-RIGHT EMBEDDING": "LRE",
480 "RIGHT-TO-LEFT EMBEDDING": "RLE",
481 "POP DIRECTIONAL FORMATTING": "PDF",
482 "LEFT-TO-RIGHT OVERRIDE": "LRO",
483 "RIGHT-TO-LEFT OVERRIDE": "RLO",
489 s = unicodedata.name (u)
492 s = re.sub (".* LETTER ", "", s)
493 s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s)
494 s = re.sub (".* SIGN ", "", s)
495 s = re.sub (".* COMBINING ", "", s)
496 if re.match (".* VIRAMA", s):
498 if s in Unicode.shorthands:
499 s = Unicode.shorthands[s]
503 def pretty_names (s):
504 s = re.sub (r"[<+>\\uU]", " ", s)
505 s = re.sub (r"0[xX]", " ", s)
506 s = [unichr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
507 return u' + '.join (Unicode.pretty_name (x) for x in s).encode ('utf-8')
513 def open_file_or_stdin (f):
522 def read (s, strict = True):
524 if not os.path.exists (s):
526 print ("%s: %s does not exist" % (sys.argv[0], s), file=sys.stderr)
530 s = os.path.normpath (s)
532 if os.path.isdir (s):
535 m = file (os.path.join (s, "MANIFEST"))
536 items = [x.strip () for x in m.readlines ()]
538 for p in Manifest.read (os.path.join (s, f)):
542 print ("%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST")), file=sys.stderr)
549 def update_recursive (s):
551 for dirpath, dirnames, filenames in os.walk (s, followlinks=True):
553 for f in ["MANIFEST", "README", "LICENSE", "COPYING", "AUTHORS", "SOURCES", "ChangeLog"]:
560 ms = os.path.join (dirpath, "MANIFEST")
561 print (" GEN %s" % ms)
568 Manifest.update_recursive (os.path.join (dirpath, f))
570 if __name__ == '__main__':