reload(sys)
sys.setdefaultencoding("utf-8")
-import argparse
+import optparse
import os
import json
import re
endian=sys.byteorder
-parser = argparse.ArgumentParser(description="ICU Datafile repackager. Example of use: \"mkdir tmp ; python icutrim.py -D ~/Downloads/icudt53l.dat -T tmp -F trim_en.json -O icudt53l.dat\" you will then find a smaller icudt53l.dat in 'tmp'. ",
- epilog="ICU tool, http://icu-project.org - master copy at http://source.icu-project.org/repos/icu/tools/trunk/scripts/icutrim.py")
+parser = optparse.OptionParser(usage="usage: mkdir tmp ; %prog -D ~/Downloads/icudt53l.dat -T tmp -F trim_en.json -O icudt53l.dat" )
-parser.add_argument("-P","--tool-path",
+parser.add_option("-P","--tool-path",
action="store",
dest="toolpath",
help="set the prefix directory for ICU tools")
-parser.add_argument("-D","--input-file",
+parser.add_option("-D","--input-file",
action="store",
dest="datfile",
help="input data file (icudt__.dat)",
- required=True)
+ ) # required
-parser.add_argument("-F","--filter-file",
+parser.add_option("-F","--filter-file",
action="store",
dest="filterfile",
help="filter file (JSON format)",
- required=True)
+ ) # required
-parser.add_argument("-T","--tmp-dir",
+parser.add_option("-T","--tmp-dir",
action="store",
dest="tmpdir",
help="working directory.",
- required=True)
+ ) # required
-parser.add_argument("--delete-tmp",
+parser.add_option("--delete-tmp",
action="count",
dest="deltmpdir",
help="delete working directory.",
default=0)
-parser.add_argument("-O","--outfile",
+parser.add_option("-O","--outfile",
action="store",
dest="outfile",
help="outfile (NOT a full path)",
- required=True)
+ ) # required
-parser.add_argument("-v","--verbose",
+parser.add_option("-v","--verbose",
action="count",
default=0)
-parser.add_argument('-e', '--endian', action='store', dest='endian', help='endian, big, little or host, your default is "%s".' % endian, default=endian, metavar='endianness')
+parser.add_option('-e', '--endian', action='store', dest='endian', help='endian, big, little or host, your default is "%s".' % endian, default=endian, metavar='endianness')
+(options, args) = parser.parse_args()
-args = parser.parse_args()
+optVars = vars(options)
-if args.verbose>0:
- print "Options: "+str(args)
+for opt in [ "datfile", "filterfile", "tmpdir", "outfile" ]:
+ if optVars[opt] is None:
+ print "Missing required option: %s" % opt
+ sys.exit(1)
+
+if options.verbose>0:
+ print "Options: "+str(options)
-if (os.path.isdir(args.tmpdir) and args.deltmpdir):
- if args.verbose>1:
- print "Deleting tmp dir %s.." % (args.tmpdir)
- shutil.rmtree(args.tmpdir)
+if (os.path.isdir(options.tmpdir) and options.deltmpdir):
+ if options.verbose>1:
+ print "Deleting tmp dir %s.." % (options.tmpdir)
+ shutil.rmtree(options.tmpdir)
-if not (os.path.isdir(args.tmpdir)):
- os.mkdir(args.tmpdir)
+if not (os.path.isdir(options.tmpdir)):
+ os.mkdir(options.tmpdir)
else:
- print "Please delete tmpdir %s before beginning." % args.tmpdir
+ print "Please delete tmpdir %s before beginning." % options.tmpdir
sys.exit(1)
-if args.endian not in ("big","little","host"):
- print "Unknown endianness: %s" % args.endian
+if options.endian not in ("big","little","host"):
+ print "Unknown endianness: %s" % options.endian
sys.exit(1)
-if args.endian is "host":
- args.endian = endian
+if options.endian is "host":
+ options.endian = endian
-if not os.path.isdir(args.tmpdir):
- print "Error, tmpdir not a directory: %s" % (args.tmpdir)
+if not os.path.isdir(options.tmpdir):
+ print "Error, tmpdir not a directory: %s" % (options.tmpdir)
sys.exit(1)
-if not os.path.isfile(args.filterfile):
- print "Filterfile doesn't exist: %s" % (args.filterfile)
+if not os.path.isfile(options.filterfile):
+ print "Filterfile doesn't exist: %s" % (options.filterfile)
sys.exit(1)
-if not os.path.isfile(args.datfile):
- print "Datfile doesn't exist: %s" % (args.datfile)
+if not os.path.isfile(options.datfile):
+ print "Datfile doesn't exist: %s" % (options.datfile)
sys.exit(1)
-if not args.datfile.endswith(".dat"):
- print "Datfile doesn't end with .dat: %s" % (args.datfile)
+if not options.datfile.endswith(".dat"):
+ print "Datfile doesn't end with .dat: %s" % (options.datfile)
sys.exit(1)
-outfile = os.path.join(args.tmpdir, args.outfile)
+outfile = os.path.join(options.tmpdir, options.outfile)
if os.path.isfile(outfile):
print "Error, output file does exist: %s" % (outfile)
sys.exit(1)
-if not args.outfile.endswith(".dat"):
- print "Outfile doesn't end with .dat: %s" % (args.outfile)
+if not options.outfile.endswith(".dat"):
+ print "Outfile doesn't end with .dat: %s" % (options.outfile)
sys.exit(1)
-dataname=args.outfile[0:-4]
+dataname=options.outfile[0:-4]
## TODO: need to improve this. Quotes, etc.
def runcmd(tool, cmd, doContinue=False):
- if(args.toolpath):
- cmd = os.path.join(args.toolpath, tool) + " " + cmd
+ if(options.toolpath):
+ cmd = os.path.join(options.toolpath, tool) + " " + cmd
else:
cmd = tool + " " + cmd
- if(args.verbose>4):
+ if(options.verbose>4):
print "# " + cmd
rc = os.system(cmd)
return rc
## STEP 0 - read in json config
-fi= open(args.filterfile, "rb")
+fi= open(options.filterfile, "rb")
config=json.load(fi)
fi.close()
-if (args.verbose > 6):
+if (options.verbose > 6):
print config
if(config.has_key("comment")):
- print "%s: %s" % (args.filterfile, config["comment"])
+ print "%s: %s" % (options.filterfile, config["comment"])
## STEP 1 - copy the data file, swapping endianness
endian_letter = "l"
-runcmd("icupkg", "-t%s %s %s""" % (endian_letter, args.datfile, outfile))
+runcmd("icupkg", "-t%s %s %s""" % (endian_letter, options.datfile, outfile))
## STEP 2 - get listing
-listfile = os.path.join(args.tmpdir,"icudata.lst")
+listfile = os.path.join(options.tmpdir,"icudata.lst")
runcmd("icupkg", "-l %s > %s""" % (outfile, listfile))
fi = open(listfile, 'rb')
itemset = set(items)
-if (args.verbose>1):
+if (options.verbose>1):
print "input file: %d items" % (len(items))
# list of all trees
if not config["trees"].has_key(tree):
return
mytree = trees[tree]
- if(args.verbose>0):
+ if(options.verbose>0):
print "* %s: %d items" % (tree, len(mytree["locs"]))
# do varible substitution for this tree here
if type(config["trees"][tree]) == str or type(config["trees"][tree]) == unicode:
treeStr = config["trees"][tree]
- if(args.verbose>5):
+ if(options.verbose>5):
print " Substituting $%s for tree %s" % (treeStr, tree)
if(not config.has_key("variables") or not config["variables"].has_key(treeStr)):
print " ERROR: no variable: variables.%s for tree %s" % (treeStr, tree)
sys.exit(1)
config["trees"][tree] = config["variables"][treeStr]
myconfig = config["trees"][tree]
- if(args.verbose>4):
+ if(options.verbose>4):
print " Config: %s" % (myconfig)
# Process this tree
if(len(myconfig)==0 or len(mytree["locs"])==0):
- if(args.verbose>2):
+ if(options.verbose>2):
print " No processing for %s - skipping" % (tree)
else:
only = None
if (len(only)==0) and (mytree["treeprefix"] != ""):
thePool = "%spool.res" % (mytree["treeprefix"])
if (thePool in itemset):
- if(args.verbose>0):
+ if(options.verbose>0):
print "Removing %s because tree %s is empty." % (thePool, tree)
remove.add(thePool)
else:
if (only is not None) and not loc in only:
# REMOVE loc
toRemove = "%s%s%s" % (mytree["treeprefix"], loc, mytree["extension"])
- if(args.verbose>6):
+ if(options.verbose>6):
print "Queueing for removal: %s" % toRemove
remove.add(toRemove)
def addTreeByType(tree, mytree):
- if(args.verbose>1):
+ if(options.verbose>1):
print "(considering %s): %s" % (tree, mytree)
trees[tree] = mytree
mytree["locs"]=[]
tree = "ROOT"
else:
tree = treeprefix[0:-1]
- if(args.verbose>6):
+ if(options.verbose>6):
print "procesing %s" % (tree)
trees[tree] = { "extension": ".res", "treeprefix": treeprefix, "hasIndex": True }
# read in the resource list for the tree
- treelistfile = os.path.join(args.tmpdir,"%s.lst" % tree)
+ treelistfile = os.path.join(options.tmpdir,"%s.lst" % tree)
runcmd("iculslocs", "-i %s -N %s -T %s -l > %s" % (outfile, dataname, tree, treelistfile))
fi = open(treelistfile, 'rb')
treeitems = fi.readlines()
trees[tree]["locs"] = [treeitems[i].strip() for i in range(len(treeitems))]
fi.close()
if(not config.has_key("trees") or not config["trees"].has_key(tree)):
- print " Warning: filter file %s does not mention trees.%s - will be kept as-is" % (args.filterfile, tree)
+ print " Warning: filter file %s does not mention trees.%s - will be kept as-is" % (options.filterfile, tree)
else:
queueForRemoval(tree)
if(count > 10):
print "Giving up - %dth attempt at removal." % count
sys.exit(1)
- if(args.verbose>1):
+ if(options.verbose>1):
print "%d items to remove - try #%d" % (len(remove),count)
if(len(remove)>0):
oldcount = len(remove)
- hackerrfile=os.path.join(args.tmpdir, "REMOVE.err")
- removefile = os.path.join(args.tmpdir, "REMOVE.lst")
+ hackerrfile=os.path.join(options.tmpdir, "REMOVE.err")
+ removefile = os.path.join(options.tmpdir, "REMOVE.lst")
fi = open(removefile, 'wb')
for i in remove:
print >>fi, i
fi.close()
rc = runcmd("icupkg","-r %s %s 2> %s" % (removefile,outfile,hackerrfile),True)
if rc is not 0:
- if(args.verbose>5):
+ if(options.verbose>5):
print "## Damage control, trying to parse stderr from icupkg.."
fi = open(hackerrfile, 'rb')
erritems = fi.readlines()
m = pat.match(line)
if m:
toDelete = m.group(1)
- if(args.verbose > 5):
+ if(options.verbose > 5):
print "<< %s added to delete" % toDelete
remove.add(toDelete)
else:
print "ERROR: could not match errline: %s" % line
sys.exit(1)
- if(args.verbose > 5):
+ if(options.verbose > 5):
print " now %d items to remove" % len(remove)
if(oldcount == len(remove)):
print " ERROR: could not add any mor eitems to remove. Fail."
# skip trees that don't have res_index
if not trees[tree].has_key("hasIndex"):
continue
- treebunddir = args.tmpdir
+ treebunddir = options.tmpdir
if(trees[tree]["treeprefix"]):
treebunddir = os.path.join(treebunddir, trees[tree]["treeprefix"])
if not (os.path.isdir(treebunddir)):
treebundtxt = "%s.txt" % (treebundres[0:-4])
runcmd("iculslocs", "-i %s -N %s -T %s -b %s" % (outfile, dataname, tree, treebundtxt))
runcmd("genrb","-d %s -s %s res_index.txt" % (treebunddir, treebunddir))
- runcmd("icupkg","-s %s -a %s%s %s" % (args.tmpdir, trees[tree]["treeprefix"], RES_INDX, outfile))
+ runcmd("icupkg","-s %s -a %s%s %s" % (options.tmpdir, trees[tree]["treeprefix"], RES_INDX, outfile))