3 import sys, os, re, difflib, unicodedata, errno, cgi, itertools
4 from itertools import *
6 diff_symbols = "-+=*&^%$#@!~/"
7 diff_colors = ['red', 'green', 'blue']
10 return (ord (u) for u in s)
16 def start_color (c): return ''
18 def end_color (): return ''
20 def escape (s): return s
22 def newline (): return '\n'
28 'red': '\033[41;37;1m',
29 'green': '\033[42;37;1m',
30 'blue': '\033[44;37;1m',
36 def escape (s): return s
38 def newline (): return '\n'
43 return '<span style="background:%s">' % c
48 def escape (s): return cgi.escape (s)
50 def newline (): return '<br/>\n'
53 def Auto (argv = [], out = sys.stdout):
54 format = ColorFormatter.ANSI
55 if "--format" in argv:
56 argv.remove ("--format")
57 format = ColorFormatter.ANSI
58 if "--format=ansi" in argv:
59 argv.remove ("--format=ansi")
60 format = ColorFormatter.ANSI
61 if "--format=html" in argv:
62 argv.remove ("--format=html")
63 format = ColorFormatter.HTML
64 if "--no-format" in argv:
65 argv.remove ("--no-format")
66 format = ColorFormatter.Null
72 diff_regex = re.compile ('([a-za-z0-9_]*)([^a-za-z0-9_]?)')
74 def __init__ (self, formatter, colors=diff_colors, symbols=diff_symbols):
75 self.formatter = formatter
77 self.symbols = symbols
79 def colorize_lines (self, lines):
80 lines = (l if l else '' for l in lines)
81 ss = [self.diff_regex.sub (r'\1\n\2\n', l).splitlines (True) for l in lines]
84 for l in difflib.Differ().compare (*ss):
90 oo[i] += self.formatter.end_color ()
92 oo = [o + self.formatter.escape (l[2:]) for o in oo]
94 if l[0] in self.symbols:
95 i = self.symbols.index (l[0])
97 oo[i] += self.formatter.start_color (self.colors[i])
99 oo[i] += self.formatter.escape (l[2:])
103 oo[i] += self.formatter.end_color ()
105 oo = [o.replace ('\n', '') for o in oo]
106 return [s1+s2+self.formatter.newline () for (s1,s2) in zip (self.symbols, oo) if s2]
108 def colorize_diff (self, f):
111 if l[0] not in self.symbols:
112 yield self.formatter.escape (l).replace ('\n', self.formatter.newline ())
114 i = self.symbols.index (l[0])
117 for line in self.colorize_lines (lines):
123 for line in self.colorize_lines (lines):
128 for line in self.colorize_lines (lines):
135 def diff_files (files, symbols=diff_symbols):
136 files = tuple (files) # in case it's a generator, copy it
138 for lines in itertools.zip_longest (*files):
139 if all (lines[0] == line for line in lines[1:]):
140 sys.stdout.writelines ([" ", lines[0]])
143 for i, l in enumerate (lines):
145 sys.stdout.writelines ([symbols[i], l])
147 if e.errno != errno.EPIPE:
148 sys.exit ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror))
154 def filter_failures (f):
155 for key, lines in DiffHelpers.separate_test_cases (f):
157 if not DiffHelpers.test_passed (lines):
158 for l in lines: yield l
166 def add (self, test):
168 self.freq += test.freq
173 self.passed = Stat ()
174 self.failed = Stat ()
177 def add (self, test):
178 self.total.add (test)
180 self.passed.add (test)
182 self.failed.add (test)
185 return float (self.passed.count) / self.total.count
188 return (float (self.passed.count) / self.total.count) * \
189 (float (self.failed.count) / self.total.count)
192 return self.variance () ** .5
194 def zscore (self, population):
195 """Calculate the standard score.
196 Population is the Stats for population.
197 Self is Stats for sample.
198 Returns larger absolute value if sample is highly unlikely to be random.
199 Anything outside of -3..+3 is very unlikely to be random.
200 See: https://en.wikipedia.org/wiki/Standard_score"""
202 return (self.mean () - population.mean ()) / population.stddev ()
213 # XXX port to Stats, but that would really slow us down here
214 for key, lines in DiffHelpers.separate_test_cases (f):
215 if DiffHelpers.test_passed (lines):
219 total = passed + failed
220 print ("%d out of %d tests passed. %d failed (%g%%)" % (passed, total, failed, 100. * failed / total))
225 def __init__ (self, lines):
228 self.identifier = None
239 if not self.identifier:
240 self.identifier = l[1:i]
241 i = i + 2 # Skip colon and space
245 brackets = l[i] + l[j]
249 elif brackets == '<>':
250 self.unicodes = Unicode.parse (l)
251 elif brackets == '[]':
252 # XXX we don't handle failed tests here
259 def separate_test_cases (f):
260 '''Reads lines from f, and if the lines have identifiers, ie.
261 have a colon character, groups them by identifier,
262 yielding lists of all lines with the same identifier.'''
266 return l[1:l.index (':')]
268 return groupby (f, key=identifier)
271 def test_passed (lines):
273 # XXX This is a hack, but does the job for now.
274 if any (l.find("space+0|space+0") >= 0 for l in lines if l[0] == '+'): return True
275 if any (l.find("uni25CC") >= 0 for l in lines if l[0] == '+'): return True
276 if any (l.find("dottedcircle") >= 0 for l in lines if l[0] == '+'): return True
277 if any (l.find("glyph0") >= 0 for l in lines if l[0] == '+'): return True
278 if any (l.find("gid0") >= 0 for l in lines if l[0] == '+'): return True
279 if any (l.find("notdef") >= 0 for l in lines if l[0] == '+'): return True
280 return all (l[0] == ' ' for l in lines)
286 def filter_printer_function (filter_callback):
288 for line in filter_callback (f):
293 def filter_printer_function_no_newline (filter_callback):
295 for line in filter_callback (f):
296 sys.stdout.writelines ([line])
320 def process_multiple_files (callback, mnemonic = "FILE"):
322 if "--help" in sys.argv:
323 sys.exit ("Usage: %s %s..." % (sys.argv[0], mnemonic))
326 files = sys.argv[1:] if len (sys.argv) > 1 else ['-']
328 callback (FileHelpers.open_file_or_stdin (s))
330 if e.errno != errno.EPIPE:
331 sys.exit ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror))
334 def process_multiple_args (callback, mnemonic):
336 if len (sys.argv) == 1 or "--help" in sys.argv:
337 sys.exit ("Usage: %s %s..." % (sys.argv[0], mnemonic))
340 for s in sys.argv[1:]:
343 if e.errno != errno.EPIPE:
344 sys.exit ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror))
347 def filter_multiple_strings_or_stdin (callback, mnemonic, \
349 concat_separator = False):
351 if "--help" in sys.argv:
356 When called with no arguments, input is read from standard input.
357 """ % (sys.argv[0], mnemonic, sys.argv[0]))
360 if len (sys.argv) == 1:
362 line = sys.stdin.readline ()
367 print (callback (line))
370 if concat_separator != False:
371 args = [concat_separator.join (args)]
372 print (separator.join (callback (x) for x in (args)))
374 if e.errno != errno.EPIPE:
375 sys.exit ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror))
382 return ','.join ("U+%04X" % cp for cp in codepoints (s))
386 s = re.sub (r"0[xX]", " ", s)
387 s = re.sub (r"[<+>{},;&#\\xXuUnNiI\n\t]", " ", s)
388 return [int (x, 16) for x in s.split ()]
392 return ''.join (chr (x) for x in Unicode.parse (s))
395 "ZERO WIDTH NON-JOINER": "ZWNJ",
396 "ZERO WIDTH JOINER": "ZWJ",
397 "NARROW NO-BREAK SPACE": "NNBSP",
398 "COMBINING GRAPHEME JOINER": "CGJ",
399 "LEFT-TO-RIGHT MARK": "LRM",
400 "RIGHT-TO-LEFT MARK": "RLM",
401 "LEFT-TO-RIGHT EMBEDDING": "LRE",
402 "RIGHT-TO-LEFT EMBEDDING": "RLE",
403 "POP DIRECTIONAL FORMATTING": "PDF",
404 "LEFT-TO-RIGHT OVERRIDE": "LRO",
405 "RIGHT-TO-LEFT OVERRIDE": "RLO",
411 s = unicodedata.name (u)
414 s = re.sub (".* LETTER ", "", s)
415 s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s)
416 s = re.sub (".* SIGN ", "", s)
417 s = re.sub (".* COMBINING ", "", s)
418 if re.match (".* VIRAMA", s):
420 if s in Unicode.shorthands:
421 s = Unicode.shorthands[s]
425 def pretty_names (s):
426 s = re.sub (r"[<+>\\uU]", " ", s)
427 s = re.sub (r"0[xX]", " ", s)
428 s = [chr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
429 return ' + '.join (Unicode.pretty_name (x) for x in s)
435 def open_file_or_stdin (f):
444 def read (s, strict = True):
446 if not os.path.exists (s):
448 sys.exit ("%s: %s does not exist" % (sys.argv[0], s))
451 s = os.path.normpath (s)
453 if os.path.isdir (s):
456 m = open (os.path.join (s, "MANIFEST"))
457 items = [x.strip () for x in m.readlines ()]
459 for p in Manifest.read (os.path.join (s, f)):
463 sys.exit ("%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST")))
469 def update_recursive (s):
471 for dirpath, dirnames, filenames in os.walk (s, followlinks=True):
473 for f in ["MANIFEST", "README", "LICENSE", "COPYING", "AUTHORS", "SOURCES", "ChangeLog"]:
480 ms = os.path.join (dirpath, "MANIFEST")
481 print (" GEN %s" % ms)
488 Manifest.update_recursive (os.path.join (dirpath, f))
490 if __name__ == '__main__':