3 from __future__ import print_function, division, absolute_import
5 import sys, os, re, difflib, unicodedata, errno, cgi
6 from itertools import *
8 import unicodedata2 as unicodedata
12 diff_symbols = "-+=*&^%$#@!~/"
13 diff_colors = ['red', 'green', 'blue']
16 return (ord (u) for u in s)
21 if sys.maxunicode < 0x10FFFF:
22 # workarounds for Python 2 "narrow" builds with UCS2-only support.
24 _narrow_unichr = unichr
28 Return the unicode character whose Unicode code is the integer 'i'.
29 The valid range is 0 to 0x10FFFF inclusive.
31 >>> _narrow_unichr(0xFFFF + 1)
32 Traceback (most recent call last):
33 File "<stdin>", line 1, in ?
34 ValueError: unichr() arg not in range(0x10000) (narrow Python build)
35 >>> unichr(0xFFFF + 1) == u'\U00010000'
37 >>> unichr(1114111) == u'\U0010FFFF'
39 >>> unichr(0x10FFFF + 1)
40 Traceback (most recent call last):
41 File "<stdin>", line 1, in ?
42 ValueError: unichr() arg not in range(0x110000)
45 return _narrow_unichr(i)
48 padded_hex_str = hex(i)[2:].zfill(8)
49 escape_str = "\\U" + padded_hex_str
50 return escape_str.decode("unicode-escape")
51 except UnicodeDecodeError:
52 raise ValueError('unichr() arg not in range(0x110000)')
58 if 0xDC00 <= cp <= 0xDFFF:
60 yield 0x10000 + (high_surrogate - 0xD800) * 0x400 + (cp - 0xDC00)
68 if 0xD800 <= cp <= 0xDBFF:
84 def tounicode(s, encoding='ascii', errors='strict'):
85 if not isinstance(s, unicode):
86 return s.decode(encoding, errors)
94 def start_color (c): return ''
96 def end_color (): return ''
98 def escape (s): return s
100 def newline (): return '\n'
106 'red': '\033[41;37;1m',
107 'green': '\033[42;37;1m',
108 'blue': '\033[44;37;1m',
114 def escape (s): return s
116 def newline (): return '\n'
121 return '<span style="background:%s">' % c
126 def escape (s): return cgi.escape (s)
128 def newline (): return '<br/>\n'
131 def Auto (argv = [], out = sys.stdout):
132 format = ColorFormatter.ANSI
133 if "--format" in argv:
134 argv.remove ("--format")
135 format = ColorFormatter.ANSI
136 if "--format=ansi" in argv:
137 argv.remove ("--format=ansi")
138 format = ColorFormatter.ANSI
139 if "--format=html" in argv:
140 argv.remove ("--format=html")
141 format = ColorFormatter.HTML
142 if "--no-format" in argv:
143 argv.remove ("--no-format")
144 format = ColorFormatter.Null
150 diff_regex = re.compile ('([a-za-z0-9_]*)([^a-za-z0-9_]?)')
152 def __init__ (self, formatter, colors=diff_colors, symbols=diff_symbols):
153 self.formatter = formatter
155 self.symbols = symbols
157 def colorize_lines (self, lines):
158 lines = (l if l else '' for l in lines)
159 ss = [self.diff_regex.sub (r'\1\n\2\n', l).splitlines (True) for l in lines]
162 for l in difflib.Differ().compare (*ss):
168 oo[i] += self.formatter.end_color ()
170 oo = [o + self.formatter.escape (l[2:]) for o in oo]
172 if l[0] in self.symbols:
173 i = self.symbols.index (l[0])
175 oo[i] += self.formatter.start_color (self.colors[i])
177 oo[i] += self.formatter.escape (l[2:])
181 oo[i] += self.formatter.end_color ()
183 oo = [o.replace ('\n', '') for o in oo]
184 return [s1+s2+self.formatter.newline () for (s1,s2) in zip (self.symbols, oo) if s2]
186 def colorize_diff (self, f):
189 if l[0] not in self.symbols:
190 yield self.formatter.escape (l).replace ('\n', self.formatter.newline ())
192 i = self.symbols.index (l[0])
195 for line in self.colorize_lines (lines):
201 for line in self.colorize_lines (lines):
206 for line in self.colorize_lines (lines):
213 def diff_files (files, symbols=diff_symbols):
214 files = tuple (files) # in case it's a generator, copy it
216 for lines in izip_longest (*files):
217 if all (lines[0] == line for line in lines[1:]):
218 sys.stdout.writelines ([" ", lines[0]])
221 for i, l in enumerate (lines):
223 sys.stdout.writelines ([symbols[i], l])
225 if e.errno != errno.EPIPE:
226 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
233 def filter_failures (f):
234 for key, lines in DiffHelpers.separate_test_cases (f):
236 if not DiffHelpers.test_passed (lines):
237 for l in lines: yield l
245 def add (self, test):
247 self.freq += test.freq
252 self.passed = Stat ()
253 self.failed = Stat ()
256 def add (self, test):
257 self.total.add (test)
259 self.passed.add (test)
261 self.failed.add (test)
264 return float (self.passed.count) / self.total.count
267 return (float (self.passed.count) / self.total.count) * \
268 (float (self.failed.count) / self.total.count)
271 return self.variance () ** .5
273 def zscore (self, population):
274 """Calculate the standard score.
275 Population is the Stats for population.
276 Self is Stats for sample.
277 Returns larger absolute value if sample is highly unlikely to be random.
278 Anything outside of -3..+3 is very unlikely to be random.
279 See: http://en.wikipedia.org/wiki/Standard_score"""
281 return (self.mean () - population.mean ()) / population.stddev ()
292 # XXX port to Stats, but that would really slow us down here
293 for key, lines in DiffHelpers.separate_test_cases (f):
294 if DiffHelpers.test_passed (lines):
298 total = passed + failed
299 print ("%d out of %d tests passed. %d failed (%g%%)" % (passed, total, failed, 100. * failed / total))
304 def __init__ (self, lines):
307 self.identifier = None
318 if not self.identifier:
319 self.identifier = l[1:i]
320 i = i + 2 # Skip colon and space
324 brackets = l[i] + l[j]
328 elif brackets == '<>':
329 self.unicodes = Unicode.parse (l)
330 elif brackets == '[]':
331 # XXX we don't handle failed tests here
338 def separate_test_cases (f):
339 '''Reads lines from f, and if the lines have identifiers, ie.
340 have a colon character, groups them by identifier,
341 yielding lists of all lines with the same identifier.'''
345 return l[1:l.index (':')]
347 return groupby (f, key=identifier)
350 def test_passed (lines):
352 # XXX This is a hack, but does the job for now.
353 if any (l.find("space+0|space+0") >= 0 for l in lines if l[0] == '+'): return True
354 if any (l.find("uni25CC") >= 0 for l in lines if l[0] == '+'): return True
355 if any (l.find("dottedcircle") >= 0 for l in lines if l[0] == '+'): return True
356 if any (l.find("glyph0") >= 0 for l in lines if l[0] == '+'): return True
357 if any (l.find("gid0") >= 0 for l in lines if l[0] == '+'): return True
358 if any (l.find("notdef") >= 0 for l in lines if l[0] == '+'): return True
359 return all (l[0] == ' ' for l in lines)
365 def filter_printer_function (filter_callback):
367 for line in filter_callback (f):
372 def filter_printer_function_no_newline (filter_callback):
374 for line in filter_callback (f):
375 sys.stdout.writelines ([line])
399 def process_multiple_files (callback, mnemonic = "FILE"):
401 if "--help" in sys.argv:
402 print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
406 files = sys.argv[1:] if len (sys.argv) > 1 else ['-']
408 callback (FileHelpers.open_file_or_stdin (s))
410 if e.errno != errno.EPIPE:
411 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
415 def process_multiple_args (callback, mnemonic):
417 if len (sys.argv) == 1 or "--help" in sys.argv:
418 print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
422 for s in sys.argv[1:]:
425 if e.errno != errno.EPIPE:
426 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
430 def filter_multiple_strings_or_stdin (callback, mnemonic, \
432 concat_separator = False):
434 if "--help" in sys.argv:
435 print ("Usage:\n %s %s...\nor:\n %s\n\nWhen called with no arguments, input is read from standard input." \
436 % (sys.argv[0], mnemonic, sys.argv[0]))
440 if len (sys.argv) == 1:
442 line = sys.stdin.readline ()
447 print (callback (line))
450 if concat_separator != False:
451 args = [concat_separator.join (args)]
452 print (separator.join (callback (x) for x in (args)))
454 if e.errno != errno.EPIPE:
455 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
463 return u','.join ("U+%04X" % cp for cp in codepoints (tounicode (s, 'utf-8')))
467 s = re.sub (r"0[xX]", " ", s)
468 s = re.sub (r"[<+>{},;&#\\xXuUnNiI\n\t]", " ", s)
469 return [int (x, 16) for x in s.split ()]
473 s = u''.join (unichr (x) for x in Unicode.parse (s))
474 if sys.version_info[0] == 2: s = s.encode ('utf-8')
478 "ZERO WIDTH NON-JOINER": "ZWNJ",
479 "ZERO WIDTH JOINER": "ZWJ",
480 "NARROW NO-BREAK SPACE": "NNBSP",
481 "COMBINING GRAPHEME JOINER": "CGJ",
482 "LEFT-TO-RIGHT MARK": "LRM",
483 "RIGHT-TO-LEFT MARK": "RLM",
484 "LEFT-TO-RIGHT EMBEDDING": "LRE",
485 "RIGHT-TO-LEFT EMBEDDING": "RLE",
486 "POP DIRECTIONAL FORMATTING": "PDF",
487 "LEFT-TO-RIGHT OVERRIDE": "LRO",
488 "RIGHT-TO-LEFT OVERRIDE": "RLO",
494 s = unicodedata.name (u)
497 s = re.sub (".* LETTER ", "", s)
498 s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s)
499 s = re.sub (".* SIGN ", "", s)
500 s = re.sub (".* COMBINING ", "", s)
501 if re.match (".* VIRAMA", s):
503 if s in Unicode.shorthands:
504 s = Unicode.shorthands[s]
508 def pretty_names (s):
509 s = re.sub (r"[<+>\\uU]", " ", s)
510 s = re.sub (r"0[xX]", " ", s)
511 s = [unichr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
512 return u' + '.join (Unicode.pretty_name (x) for x in s).encode ('utf-8')
518 def open_file_or_stdin (f):
527 def read (s, strict = True):
529 if not os.path.exists (s):
531 print ("%s: %s does not exist" % (sys.argv[0], s), file=sys.stderr)
535 s = os.path.normpath (s)
537 if os.path.isdir (s):
540 m = open (os.path.join (s, "MANIFEST"))
541 items = [x.strip () for x in m.readlines ()]
543 for p in Manifest.read (os.path.join (s, f)):
547 print ("%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST")), file=sys.stderr)
554 def update_recursive (s):
556 for dirpath, dirnames, filenames in os.walk (s, followlinks=True):
558 for f in ["MANIFEST", "README", "LICENSE", "COPYING", "AUTHORS", "SOURCES", "ChangeLog"]:
565 ms = os.path.join (dirpath, "MANIFEST")
566 print (" GEN %s" % ms)
573 Manifest.update_recursive (os.path.join (dirpath, f))
575 if __name__ == '__main__':