-#!/usr/bin/python
+#!/usr/bin/env python3
-import sys, os, re, difflib, unicodedata, errno, cgi
+import sys, os, re, difflib, unicodedata, errno, cgi, itertools
from itertools import *
diff_symbols = "-+=*&^%$#@!~/"
diff_colors = ['red', 'green', 'blue']
+def codepoints(s):
+ return (ord (u) for u in s)
+
class ColorFormatter:
class Null:
def diff_files (files, symbols=diff_symbols):
files = tuple (files) # in case it's a generator, copy it
try:
- for lines in izip_longest (*files):
+ for lines in itertools.zip_longest (*files):
if all (lines[0] == line for line in lines[1:]):
sys.stdout.writelines ([" ", lines[0]])
continue
sys.stdout.writelines ([symbols[i], l])
except IOError as e:
if e.errno != errno.EPIPE:
- print >> sys.stderr, "%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)
- sys.exit (1)
+ sys.exit ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror))
class DiffFilters:
Self is Stats for sample.
Returns larger absolute value if sample is highly unlikely to be random.
Anything outside of -3..+3 is very unlikely to be random.
- See: http://en.wikipedia.org/wiki/Standard_score"""
+ See: https://en.wikipedia.org/wiki/Standard_score"""
return (self.mean () - population.mean ()) / population.stddev ()
else:
failed += 1
total = passed + failed
- print "%d out of %d tests passed. %d failed (%g%%)" % (passed, total, failed, 100. * failed / total)
-
- @staticmethod
- def print_ngrams (f, ns=(1,2,3)):
- gens = tuple (Ngram.generator (n) for n in ns)
- allstats = Stats ()
- allgrams = {}
- for key, lines in DiffHelpers.separate_test_cases (f):
- test = Test (lines)
- allstats.add (test)
-
- for gen in gens:
- for ngram in gen (test.unicodes):
- if ngram not in allgrams:
- allgrams[ngram] = Stats ()
- allgrams[ngram].add (test)
-
- importantgrams = {}
- for ngram, stats in allgrams.iteritems ():
- if stats.failed.count >= 30: # for statistical reasons
- importantgrams[ngram] = stats
- allgrams = importantgrams
- del importantgrams
-
- for ngram, stats in allgrams.iteritems ():
- print "zscore: %9f failed: %6d passed: %6d ngram: <%s>" % (stats.zscore (allstats), stats.failed.count, stats.passed.count, ','.join ("U+%04X" % u for u in ngram))
-
+ print ("%d out of %d tests passed. %d failed (%g%%)" % (passed, total, failed, 100. * failed / total))
class Test:
def filter_printer_function (filter_callback):
def printer (f):
for line in filter_callback (f):
- print line
+ print (line)
return printer
@staticmethod
def process_multiple_files (callback, mnemonic = "FILE"):
if "--help" in sys.argv:
- print "Usage: %s %s..." % (sys.argv[0], mnemonic)
- sys.exit (1)
+ sys.exit ("Usage: %s %s..." % (sys.argv[0], mnemonic))
try:
files = sys.argv[1:] if len (sys.argv) > 1 else ['-']
callback (FileHelpers.open_file_or_stdin (s))
except IOError as e:
if e.errno != errno.EPIPE:
- print >> sys.stderr, "%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)
- sys.exit (1)
+ sys.exit ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror))
@staticmethod
def process_multiple_args (callback, mnemonic):
if len (sys.argv) == 1 or "--help" in sys.argv:
- print "Usage: %s %s..." % (sys.argv[0], mnemonic)
- sys.exit (1)
+ sys.exit ("Usage: %s %s..." % (sys.argv[0], mnemonic))
try:
for s in sys.argv[1:]:
callback (s)
except IOError as e:
if e.errno != errno.EPIPE:
- print >> sys.stderr, "%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)
- sys.exit (1)
+ sys.exit ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror))
@staticmethod
def filter_multiple_strings_or_stdin (callback, mnemonic, \
concat_separator = False):
if "--help" in sys.argv:
- print "Usage:\n %s %s...\nor:\n %s\n\nWhen called with no arguments, input is read from standard input." \
- % (sys.argv[0], mnemonic, sys.argv[0])
- sys.exit (1)
+ sys.exit ("""Usage:
+ %s %s...
+or:
+ %s
+When called with no arguments, input is read from standard input.
+""" % (sys.argv[0], mnemonic, sys.argv[0]))
try:
if len (sys.argv) == 1:
break
if line[-1] == '\n':
line = line[:-1]
- print callback (line)
+ print (callback (line))
else:
args = sys.argv[1:]
if concat_separator != False:
args = [concat_separator.join (args)]
- print separator.join (callback (x) for x in (args))
+ print (separator.join (callback (x) for x in (args)))
except IOError as e:
if e.errno != errno.EPIPE:
- print >> sys.stderr, "%s: %s: %s" % (sys.argv[0], e.filename, e.strerror)
- sys.exit (1)
+ sys.exit ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror))
class Unicode:
@staticmethod
def decode (s):
- return '<' + u','.join ("U+%04X" % ord (u) for u in unicode (s, 'utf-8')).encode ('utf-8') + '>'
+ return ','.join ("U+%04X" % cp for cp in codepoints (s))
@staticmethod
def parse (s):
s = re.sub (r"0[xX]", " ", s)
- s = re.sub (r"[<+>,;&#\\xXuU\n ]", " ", s)
- return [int (x, 16) for x in s.split (' ') if len (x)]
+ s = re.sub (r"[<+>{},;&#\\xXuUnNiI\n\t]", " ", s)
+ return [int (x, 16) for x in s.split ()]
@staticmethod
def encode (s):
- return u''.join (unichr (x) for x in Unicode.parse (s)).encode ('utf-8')
+ return ''.join (chr (x) for x in Unicode.parse (s))
shorthands = {
"ZERO WIDTH NON-JOINER": "ZWNJ",
def pretty_names (s):
s = re.sub (r"[<+>\\uU]", " ", s)
s = re.sub (r"0[xX]", " ", s)
- s = [unichr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
- return u' + '.join (Unicode.pretty_name (x) for x in s).encode ('utf-8')
+ s = [chr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
+ return ' + '.join (Unicode.pretty_name (x) for x in s)
class FileHelpers:
def open_file_or_stdin (f):
if f == '-':
return sys.stdin
- return file (f)
+ return open (f)
class Manifest:
if not os.path.exists (s):
if strict:
- print >> sys.stderr, "%s: %s does not exist" % (sys.argv[0], s)
- sys.exit (1)
+ sys.exit ("%s: %s does not exist" % (sys.argv[0], s))
return
s = os.path.normpath (s)
if os.path.isdir (s):
try:
- m = file (os.path.join (s, "MANIFEST"))
+ m = open (os.path.join (s, "MANIFEST"))
items = [x.strip () for x in m.readlines ()]
for f in items:
for p in Manifest.read (os.path.join (s, f)):
yield p
except IOError:
if strict:
- print >> sys.stderr, "%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST"))
- sys.exit (1)
+ sys.exit ("%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST")))
return
else:
yield s
dirnames.sort ()
filenames.sort ()
ms = os.path.join (dirpath, "MANIFEST")
- print " GEN %s" % ms
+ print (" GEN %s" % ms)
m = open (ms, "w")
for f in filenames:
- print >> m, f
+ print (f, file=m)
for f in dirnames:
- print >> m, f
+ print (f, file=m)
for f in dirnames:
Manifest.update_recursive (os.path.join (dirpath, f))