3 from __future__ import print_function, division, absolute_import
5 import sys, os, re, difflib, unicodedata, errno, cgi
6 from itertools import *
8 diff_symbols = "-+=*&^%$#@!~/"
9 diff_colors = ['red', 'green', 'blue']
12 return (ord (u) for u in s)
17 if sys.maxunicode < 0x10FFFF:
18 # workarounds for Python 2 "narrow" builds with UCS2-only support.
20 _narrow_unichr = unichr
24 Return the unicode character whose Unicode code is the integer 'i'.
25 The valid range is 0 to 0x10FFFF inclusive.
27 >>> _narrow_unichr(0xFFFF + 1)
28 Traceback (most recent call last):
29 File "<stdin>", line 1, in ?
30 ValueError: unichr() arg not in range(0x10000) (narrow Python build)
31 >>> unichr(0xFFFF + 1) == u'\U00010000'
33 >>> unichr(1114111) == u'\U0010FFFF'
35 >>> unichr(0x10FFFF + 1)
36 Traceback (most recent call last):
37 File "<stdin>", line 1, in ?
38 ValueError: unichr() arg not in range(0x110000)
41 return _narrow_unichr(i)
44 padded_hex_str = hex(i)[2:].zfill(8)
45 escape_str = "\\U" + padded_hex_str
46 return escape_str.decode("unicode-escape")
47 except UnicodeDecodeError:
48 raise ValueError('unichr() arg not in range(0x110000)')
54 if 0xDC00 <= cp <= 0xDFFF:
56 yield 0x10000 + (high_surrogate - 0xD800) * 0x400 + (cp - 0xDC00)
64 if 0xD800 <= cp <= 0xDBFF:
80 def tounicode(s, encoding='ascii', errors='strict'):
81 if not isinstance(s, unicode):
82 return s.decode(encoding, errors)
90 def start_color (c): return ''
92 def end_color (): return ''
94 def escape (s): return s
96 def newline (): return '\n'
102 'red': '\033[41;37;1m',
103 'green': '\033[42;37;1m',
104 'blue': '\033[44;37;1m',
110 def escape (s): return s
112 def newline (): return '\n'
117 return '<span style="background:%s">' % c
122 def escape (s): return cgi.escape (s)
124 def newline (): return '<br/>\n'
127 def Auto (argv = [], out = sys.stdout):
128 format = ColorFormatter.ANSI
129 if "--format" in argv:
130 argv.remove ("--format")
131 format = ColorFormatter.ANSI
132 if "--format=ansi" in argv:
133 argv.remove ("--format=ansi")
134 format = ColorFormatter.ANSI
135 if "--format=html" in argv:
136 argv.remove ("--format=html")
137 format = ColorFormatter.HTML
138 if "--no-format" in argv:
139 argv.remove ("--no-format")
140 format = ColorFormatter.Null
146 diff_regex = re.compile ('([a-za-z0-9_]*)([^a-za-z0-9_]?)')
148 def __init__ (self, formatter, colors=diff_colors, symbols=diff_symbols):
149 self.formatter = formatter
151 self.symbols = symbols
153 def colorize_lines (self, lines):
154 lines = (l if l else '' for l in lines)
155 ss = [self.diff_regex.sub (r'\1\n\2\n', l).splitlines (True) for l in lines]
158 for l in difflib.Differ().compare (*ss):
164 oo[i] += self.formatter.end_color ()
166 oo = [o + self.formatter.escape (l[2:]) for o in oo]
168 if l[0] in self.symbols:
169 i = self.symbols.index (l[0])
171 oo[i] += self.formatter.start_color (self.colors[i])
173 oo[i] += self.formatter.escape (l[2:])
177 oo[i] += self.formatter.end_color ()
179 oo = [o.replace ('\n', '') for o in oo]
180 return [s1+s2+self.formatter.newline () for (s1,s2) in zip (self.symbols, oo) if s2]
182 def colorize_diff (self, f):
185 if l[0] not in self.symbols:
186 yield self.formatter.escape (l).replace ('\n', self.formatter.newline ())
188 i = self.symbols.index (l[0])
191 for line in self.colorize_lines (lines):
197 for line in self.colorize_lines (lines):
202 for line in self.colorize_lines (lines):
209 def diff_files (files, symbols=diff_symbols):
210 files = tuple (files) # in case it's a generator, copy it
212 for lines in izip_longest (*files):
213 if all (lines[0] == line for line in lines[1:]):
214 sys.stdout.writelines ([" ", lines[0]])
217 for i, l in enumerate (lines):
219 sys.stdout.writelines ([symbols[i], l])
221 if e.errno != errno.EPIPE:
222 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
229 def filter_failures (f):
230 for key, lines in DiffHelpers.separate_test_cases (f):
232 if not DiffHelpers.test_passed (lines):
233 for l in lines: yield l
241 def add (self, test):
243 self.freq += test.freq
248 self.passed = Stat ()
249 self.failed = Stat ()
252 def add (self, test):
253 self.total.add (test)
255 self.passed.add (test)
257 self.failed.add (test)
260 return float (self.passed.count) / self.total.count
263 return (float (self.passed.count) / self.total.count) * \
264 (float (self.failed.count) / self.total.count)
267 return self.variance () ** .5
269 def zscore (self, population):
270 """Calculate the standard score.
271 Population is the Stats for population.
272 Self is Stats for sample.
273 Returns larger absolute value if sample is highly unlikely to be random.
274 Anything outside of -3..+3 is very unlikely to be random.
275 See: http://en.wikipedia.org/wiki/Standard_score"""
277 return (self.mean () - population.mean ()) / population.stddev ()
288 # XXX port to Stats, but that would really slow us down here
289 for key, lines in DiffHelpers.separate_test_cases (f):
290 if DiffHelpers.test_passed (lines):
294 total = passed + failed
295 print ("%d out of %d tests passed. %d failed (%g%%)" % (passed, total, failed, 100. * failed / total))
300 def __init__ (self, lines):
303 self.identifier = None
314 if not self.identifier:
315 self.identifier = l[1:i]
316 i = i + 2 # Skip colon and space
320 brackets = l[i] + l[j]
324 elif brackets == '<>':
325 self.unicodes = Unicode.parse (l)
326 elif brackets == '[]':
327 # XXX we don't handle failed tests here
334 def separate_test_cases (f):
335 '''Reads lines from f, and if the lines have identifiers, ie.
336 have a colon character, groups them by identifier,
337 yielding lists of all lines with the same identifier.'''
341 return l[1:l.index (':')]
343 return groupby (f, key=identifier)
346 def test_passed (lines):
348 # XXX This is a hack, but does the job for now.
349 if any (l.find("space+0|space+0") >= 0 for l in lines if l[0] == '+'): return True
350 if any (l.find("uni25CC") >= 0 for l in lines if l[0] == '+'): return True
351 if any (l.find("dottedcircle") >= 0 for l in lines if l[0] == '+'): return True
352 if any (l.find("glyph0") >= 0 for l in lines if l[0] == '+'): return True
353 if any (l.find("gid0") >= 0 for l in lines if l[0] == '+'): return True
354 if any (l.find("notdef") >= 0 for l in lines if l[0] == '+'): return True
355 return all (l[0] == ' ' for l in lines)
361 def filter_printer_function (filter_callback):
363 for line in filter_callback (f):
368 def filter_printer_function_no_newline (filter_callback):
370 for line in filter_callback (f):
371 sys.stdout.writelines ([line])
395 def process_multiple_files (callback, mnemonic = "FILE"):
397 if "--help" in sys.argv:
398 print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
402 files = sys.argv[1:] if len (sys.argv) > 1 else ['-']
404 callback (FileHelpers.open_file_or_stdin (s))
406 if e.errno != errno.EPIPE:
407 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
411 def process_multiple_args (callback, mnemonic):
413 if len (sys.argv) == 1 or "--help" in sys.argv:
414 print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
418 for s in sys.argv[1:]:
421 if e.errno != errno.EPIPE:
422 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
426 def filter_multiple_strings_or_stdin (callback, mnemonic, \
428 concat_separator = False):
430 if "--help" in sys.argv:
431 print ("Usage:\n %s %s...\nor:\n %s\n\nWhen called with no arguments, input is read from standard input." \
432 % (sys.argv[0], mnemonic, sys.argv[0]))
436 if len (sys.argv) == 1:
438 line = sys.stdin.readline ()
443 print (callback (line))
446 if concat_separator != False:
447 args = [concat_separator.join (args)]
448 print (separator.join (callback (x) for x in (args)))
450 if e.errno != errno.EPIPE:
451 print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
459 return u','.join ("U+%04X" % cp for cp in codepoints (tounicode (s, 'utf-8')))
463 s = re.sub (r"0[xX]", " ", s)
464 s = re.sub (r"[<+>{},;&#\\xXuUnNiI\n\t]", " ", s)
465 return [int (x, 16) for x in s.split ()]
469 s = u''.join (unichr (x) for x in Unicode.parse (s))
470 if sys.version_info[0] == 2: s = s.encode ('utf-8')
474 "ZERO WIDTH NON-JOINER": "ZWNJ",
475 "ZERO WIDTH JOINER": "ZWJ",
476 "NARROW NO-BREAK SPACE": "NNBSP",
477 "COMBINING GRAPHEME JOINER": "CGJ",
478 "LEFT-TO-RIGHT MARK": "LRM",
479 "RIGHT-TO-LEFT MARK": "RLM",
480 "LEFT-TO-RIGHT EMBEDDING": "LRE",
481 "RIGHT-TO-LEFT EMBEDDING": "RLE",
482 "POP DIRECTIONAL FORMATTING": "PDF",
483 "LEFT-TO-RIGHT OVERRIDE": "LRO",
484 "RIGHT-TO-LEFT OVERRIDE": "RLO",
490 s = unicodedata.name (u)
493 s = re.sub (".* LETTER ", "", s)
494 s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s)
495 s = re.sub (".* SIGN ", "", s)
496 s = re.sub (".* COMBINING ", "", s)
497 if re.match (".* VIRAMA", s):
499 if s in Unicode.shorthands:
500 s = Unicode.shorthands[s]
504 def pretty_names (s):
505 s = re.sub (r"[<+>\\uU]", " ", s)
506 s = re.sub (r"0[xX]", " ", s)
507 s = [unichr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
508 return u' + '.join (Unicode.pretty_name (x) for x in s).encode ('utf-8')
514 def open_file_or_stdin (f):
523 def read (s, strict = True):
525 if not os.path.exists (s):
527 print ("%s: %s does not exist" % (sys.argv[0], s), file=sys.stderr)
531 s = os.path.normpath (s)
533 if os.path.isdir (s):
536 m = open (os.path.join (s, "MANIFEST"))
537 items = [x.strip () for x in m.readlines ()]
539 for p in Manifest.read (os.path.join (s, f)):
543 print ("%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST")), file=sys.stderr)
550 def update_recursive (s):
552 for dirpath, dirnames, filenames in os.walk (s, followlinks=True):
554 for f in ["MANIFEST", "README", "LICENSE", "COPYING", "AUTHORS", "SOURCES", "ChangeLog"]:
561 ms = os.path.join (dirpath, "MANIFEST")
562 print (" GEN %s" % ms)
569 Manifest.update_recursive (os.path.join (dirpath, f))
571 if __name__ == '__main__':