[nnc] Add misc scripts (#1610)
authorDenis Maksimenko/AI Tools Lab /SRR/Assistant Engineer/삼성전자 <d.maksimenko@partner.samsung.com>
Tue, 9 Oct 2018 10:59:58 +0000 (13:59 +0300)
committerРоман Михайлович Русяев/AI Tools Lab /SRR/Staff Engineer/삼성전자 <r.rusyaev@samsung.com>
Tue, 9 Oct 2018 10:59:58 +0000 (13:59 +0300)
Add utility scripts that we use to repository.

Signed-off-by: Denis Maksimenko <d.maksimenko@partner.samsung.com>
contrib/nnc/utils/infer_tests/README.md [new file with mode: 0644]
contrib/nnc/utils/infer_tests/infer_testcases.py [new file with mode: 0644]
contrib/nnc/utils/infer_tests/res2bin.py [new file with mode: 0644]
contrib/nnc/utils/prepare_inputs/README.md [new file with mode: 0644]
contrib/nnc/utils/prepare_inputs/jpeg2hdf5.py [new file with mode: 0644]

diff --git a/contrib/nnc/utils/infer_tests/README.md b/contrib/nnc/utils/infer_tests/README.md
new file mode 100644 (file)
index 0000000..1f5d3f6
--- /dev/null
@@ -0,0 +1,9 @@
+These scripts can be useful for developing/testing nnc. Usage and purpose of the scripts can be found in comments in their source code.
+
+Note that these scripts are just development artifacts and are not supposed to go into production in any form. 
+
+infer_testcases.py: run inference with `nnkit` on testcases
+res2bin.py: used by infer_testcases.py to convert resulting hdf5 to binary format
+
+'testcases' folder structure:
+At the moment we use the following structure: a folder for a model contains 'models' and 'testcases' subfolders. The 'models' subfolder contains model that we run inference on, 'testcases' subfolder contains a 'testcase*' folder for each different testcase. Each of those folders in turn contain 'input' with a '.JPEG' file (and '.hdf5' and '.dat' files after running `jpeg2hdf5` script), and 'output' folder where inference results are stored. 
\ No newline at end of file
diff --git a/contrib/nnc/utils/infer_tests/infer_testcases.py b/contrib/nnc/utils/infer_tests/infer_testcases.py
new file mode 100644 (file)
index 0000000..4587075
--- /dev/null
@@ -0,0 +1,150 @@
+from __future__ import print_function
+import sys
+import glob
+import subprocess
+import res2bin
+import datetime
+
+# This script uses nnkit to run inference for given model on a given data
+# Messages are printed to stderr
+# Usage:
+# -b - specifies path to nnkit build folder, inside which tools/nni is located
+# -f - specifies framework ('tfl' for tflite or 'caf' for caffe) that the model belogs to
+# -t - specifies path to testcase folder (see it's structure in readme)
+# -p - allow some sort of parallelism by processing only a subset of files, 
+#      you need to specify number of processes and run as much of them 
+#      manually with diferent numbers
+# -r - infer all testcases regardless of whether the result files are present
+# last argument(s) is the model to infer
+#
+# Example of usage:
+# python3 infer_testcases.py -f tfl -b /mnt/nncc_ci/nncc_new/build/contrib/nnkit -t /mnt/nncc_ci/images/inc_slim/testcases/ -p 10 1 -r /mnt/nncc_ci/images/inc_slim/models/inception_v3_2018.tflite
+#
+
+helpstr = "Expected arguments: -b <path_to_nnkit>" + \
+                               "-f (tfl | caf) " + \
+                               "-t <testcases_dir> " + \
+                               "[-p <nporc> <proc_num>] " + \
+                               "[-r] " + \
+                               "(<tflite_model_file> | <caffe_prototxt_model> <caffe_caffemodel_file>)" 
+
+def eprint(*args, **kwargs):
+    print(*args, file=sys.stderr, **kwargs)
+
+
+nproc = 1
+proc_num = 1
+min_argc = 8
+
+args = {}
+args['-p'] = (1, 1) 
+args['-r'] = False
+
+argc = len(sys.argv)
+for i in range(argc):
+    arg = sys.argv[i]
+    if arg =='-r':
+        args[arg] = True
+    elif arg == '-b' or arg == '-f' or arg == '-t':
+        if i + 1 >= argc:
+            eprint(arg, " is missing it's value")
+            eprint(helpstr)
+            exit()
+        args[arg] = sys.argv[i + 1]
+    elif arg == '-p':
+        min_argc += 3
+        if i + 2 >= argc:
+            eprint(arg, " is missing some of it's values")
+            eprint(helpstr)
+            exit()
+        args[arg] = (int(sys.argv[i + 1]), int(sys.argv[i + 2]))
+    elif arg[0] == '-':
+        print('Unsupported argument: ', arg)
+        exit()
+
+if not ('-b' in args and '-f' in args and '-t' in args):
+    eprint('Some arguments are not provided')
+    eprint(helpstr)
+    exit()
+
+fw = args['-f']
+build_path = args['-b']
+testcases_dir = args['-t']
+nproc, proc_num = args['-p']
+remove_existing = args['-r']
+
+if fw == 'tfl':
+    model = sys.argv[-1]
+    print('Model: ', model)
+elif fw == 'caf':
+    model_proto = sys.argv[-2]
+    model_caffe = sys.argv[-1]
+    print('Models: ', model_proto, model_caffe)
+else:
+    eprint('Unsupported framework:', fw)
+    exit()
+
+eprint('started at', datetime.datetime.now())
+print('Framework: ', fw)
+print('Path to nnkit: ', build_path)
+print('Testcases folder: ', testcases_dir)
+
+hdf_suffix = '.hdf5'
+bin_suffix = '.dat'
+
+def get_command_caf(infilename, outfilename, proto, caffemodel):
+    return [build_path + "/tools/nni/nni", 
+            "--pre", build_path + "/actions/HDF5/libnnkit_HDF5_import_action.so", 
+            "--pre-arg", infilename, 
+            "--backend", build_path + "/backends/caffe/libnnkit_caffe_backend.so",
+             "--backend-arg", proto,
+            "--backend-arg", caffemodel,
+            "--post", build_path + "/actions/HDF5/libnnkit_HDF5_export_action.so", 
+            "--post-arg", outfilename]
+
+def get_command_tfl(infilename, outfilename, model_file):
+    return [build_path + "/tools/nni/nni", 
+            "--pre", build_path + "/actions/HDF5/libnnkit_HDF5_import_action.so", 
+            "--pre-arg", infilename, 
+            "--backend", build_path + "/backends/tflite/libnnkit_tflite_backend.so",
+            "--backend-arg", model_file,
+            "--post", build_path + "/actions/builtin/libnnkit_show_action.so", 
+            "--post", build_path + "/actions/HDF5/libnnkit_HDF5_export_action.so", 
+            "--post-arg", outfilename]
+
+testcase_num = 0
+testcases = glob.glob(testcases_dir + '/testcase*')
+
+#testcases = [t 
+#             for t in testcases 
+#             if remove_existing
+#             or len(glob.glob(t + '/output/output' + hdf_suffix)) == 0 
+#             or len(glob.glob(t + '/output/output' + bin_suffix)) == 0]
+testcases = testcases[proc_num - 1::nproc]
+
+testcases.sort()
+for testcase in testcases:
+    testcase_num += 1
+    try:
+        infile = glob.glob(testcase + '/input/*' + hdf_suffix)
+        if len(infile) > 0:
+            infile = infile[0]
+            outfile = testcase + '/output/output' + hdf_suffix
+            outfile_bin = testcase + '/output/output' + bin_suffix
+            if len(glob.glob(outfile)) == 0 or remove_existing:
+                if fw == 'tfl':
+                    command = get_command_tfl(infile, outfile, model)
+                elif fw == 'caf':
+                    command = get_command_caf(infile, outfile, model_proto, model_caffe)
+                #subprocess.call(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+                subprocess.call(command)
+            if len(glob.glob(outfile_bin)) == 0 or remove_existing:
+                res2bin.res2bin(outfile, outfile_bin)
+            eprint(testcase_num, "/", len(testcases))
+        else:
+            eprint(testcase, ': input not found')
+    except:
+       eprint(testcase, 'failed')
+  
+eprint('ended at', datetime.datetime.now())
+
diff --git a/contrib/nnc/utils/infer_tests/res2bin.py b/contrib/nnc/utils/infer_tests/res2bin.py
new file mode 100644 (file)
index 0000000..dcbfbd1
--- /dev/null
@@ -0,0 +1,32 @@
+import numpy as np
+import h5py
+import struct
+import sys
+
+# This script takes hdf5 file and unfolds it in a vector of float values 
+# which is then writen in binary format to a given file
+# This is used by infer_testcases.py
+
+def res2bin(infilename, outfilename):
+# print("Input filename: ", infilename)
+# print("Output filename: " , outfilename)
+ f = h5py.File(infilename)
+ dset = f[list(f.keys())[0]]
+ vals = np.zeros(np.shape(dset), dtype='float32')
+ for i in range(np.size(dset,0)):
+  vals[i,:] = np.asarray(dset[i], dtype='float32')
+ vals = list(np.reshape(vals, (vals.size)))
+ with open(outfilename, 'wb') as outfile:
+  outfile.write(struct.pack('f'*len(vals), *vals))
+
+if __name__ == '__main__':
+ argc = len(sys.argv)
+ if (argc > 2):
+  res2bin(sys.argv[1], sys.argv[2])
+ else:
+  print("Not enough arguments, expected: hdf5 filename, output filename")
+  exit()
+
diff --git a/contrib/nnc/utils/prepare_inputs/README.md b/contrib/nnc/utils/prepare_inputs/README.md
new file mode 100644 (file)
index 0000000..dcd5899
--- /dev/null
@@ -0,0 +1,8 @@
+These scripts can be useful for developing/testing nnc. Usage and purpose of the scripts can be found in comments in their source code.
+
+Note that these scripts are just development artifacts and are not supposed to go into production in any form. 
+
+jpeg2hdf5.py: prepare '.hdf5' files from '.JPEG' to be used by nnkit. Can also convert those '.JPEG's to binary format along the way.
+
+'testcases' folder structure:
+At the moment we use the following structure: a folder for a model contains 'models' and 'testcases' subfolders. The 'models' subfolder contains model that we run inference on, 'testcases' subfolder contains a 'testcase*' folder for each different testcase. Each of those folders in turn contain 'input' with a '.JPEG' file (and '.hdf5' and '.dat' files after running `jpeg2hdf5` script), and 'output' folder where inference results are stored. 
\ No newline at end of file
diff --git a/contrib/nnc/utils/prepare_inputs/jpeg2hdf5.py b/contrib/nnc/utils/prepare_inputs/jpeg2hdf5.py
new file mode 100644 (file)
index 0000000..c5e9693
--- /dev/null
@@ -0,0 +1,166 @@
+from PIL import Image
+import numpy as np
+import h5py
+import sys
+import glob
+import subprocess
+import struct
+import datetime
+
+# Generates hdf5 files (and optionally binary files) from JPEGs
+# -f - specifies framework to generate them for
+# -t - specifies testcases directory (see it's structure in readme)
+# -i - specifies input node name of the model that will use them (required by nnkit)
+# -r - if files already exist, rewrites them
+# -b - enable binary file generation
+# -p - allow some sort of parallelism by processing only a subset of files, 
+#      you need to specify number of processes and run as much of them 
+#      manually with diferent numbers
+#
+# Example:
+# python3 conv.py -f tfl -t inc_slim/testcases -i input -p 16 1
+#
+
+helpstr = 'Usage: -f (tfl | caf) ' + \
+                 '-t <testcases_directory> ' + \
+                 '[-i <input_layer_name>] ' + \
+                 '[-r] [-b]' + \
+                 '[-p <number_of_processes> <process number>]'
+
+supported_frameworks = ['tfl', 'caf']
+args = {}
+# Defaults
+args['-p'] = (1, 1)
+args['-r'] = False
+args['-b'] = False
+
+argc = len(sys.argv)
+for i in range(len(sys.argv)):
+    arg = sys.argv[i]
+    if arg == '-r' or arg == '-b':
+        args[arg] = True
+    elif arg == '-f' or arg == '-t' or arg == '-i':
+        if i + 1 >= argc or sys.argv[i + 1][0] == '-':
+            print(arg, " is missing it's value")
+            print(helpstr)
+            exit()
+        args[arg] = sys.argv[i + 1]
+    elif arg == '-p':
+        if i + 2 >= argc or sys.argv[i + 1][0] == '-' or sys.argv[i + 2][0] == '-':
+            print(arg, " is missing some of it's values")
+            print(helpstr)
+            exit()
+        args[arg] = (int(sys.argv[i + 1]), int(sys.argv[i + 2]))
+    elif arg[0] == '-':
+        print('Unsupported argument: ', arg)
+        exit()
+
+if not ('-f' in args and '-t' in args):
+    print('Some arguments are not provided')
+    print(helpstr)
+    exit()
+
+fw = args['-f']
+if not fw in supported_frameworks:
+    print('Unsupported framework: ', fw)
+    exit()
+
+indirname = args['-t']
+
+if not '-i' in args:
+    if fw == 'caf':
+        inputname = 'data'
+    elif fw =='tfl':
+        inputname = 'input'
+else:
+    inputname = args['-i']
+
+nproc, proc_num = args['-p']
+remove_existing = args['-r']
+gen_binary = args['-b']
+
+print('started at', datetime.datetime.now())
+testcases = glob.glob(indirname + '/testcase*/')
+testcases.sort()
+testcases = testcases[proc_num - 1::nproc]
+
+number = 0
+for testcase in testcases:
+     try: 
+          infilename = glob.glob(testcase + 'input/*.JPEG')
+          if len(infilename) > 0:
+              number += 1
+              infilename = infilename[0]
+              outfilename = testcase + 'input/' + infilename.split('/')[-1] + '.hdf5'
+              binoutfilename = testcase + 'input/' + infilename.split('/')[-1] + '.dat'
+              found_hdf = len(glob.glob(outfilename)) != 0
+              found_bin = len(glob.glob(binoutfilename)) != 0
+              if not found_hdf or (not found_bin and gen_binary) or remove_existing:
+                  with Image.open(infilename) as im:
+                      #TODO: check if order is correct here and in other places
+                      h = im.size[0]
+                      w = im.size[1]
+                      s = im.split()
+                  if len(s) == 3:
+                      r, g, b = s
+                  else:
+                      r = s[0]
+                      g = s[0]
+                      b = s[0]
+                  rf = r.convert('F')
+                  gf = g.convert('F')
+                  bf = b.convert('F')
+                  rfb = rf.tobytes()
+                  gfb = gf.tobytes()
+                  bfb = bf.tobytes()
+                  
+                  made_hdf = False
+                  if not found_hdf or remove_existing:
+                      if fw == 'tfl':
+                          reds = np.fromstring(rfb, count=(h * w), dtype='float32')
+                          greens = np.fromstring(gfb, count=(h * w), dtype='float32')
+                          blues = np.fromstring(bfb, count=(h * w), dtype='float32') 
+                          
+                          dset_shape = (1, h, w, 3)
+                          narr = np.ndarray(shape=(0))
+                          mixed_ch = []
+                          for i in range(h * w):
+                              mixed_ch += [reds[i] / 255.0, greens[i] / 255.0, blues[i] / 255.0]
+                          narr = np.append(narr, mixed_ch) 
+                      elif fw == 'caf':
+                          dset_shape = (1, 3, h, w)
+                          narr = np.fromstring(rfb + gfb + bfb, count=(3 * h * w), dtype='float32')
+                          for i in range(3 * h * w):
+                               narr[i] /= 255.0
+                      if remove_existing:
+                          subprocess.call(['rm', '-f', outfilename])
+                      with h5py.File(outfilename) as f:
+                          # nnkit hdf5_import asserts to use IEEE_F32BE, which is >f4 in numpy
+                          dset = f.require_dataset(inputname, dset_shape, dtype='>f4')
+                          dset[0] = np.reshape(narr, dset_shape)
+                      made_hdf = True
+                  
+                  if gen_binary and (not found_bin or remove_existing):
+                      if fw == 'tfl' and made_hdf:
+                          l = narr.tolist()
+                      else:
+                          reds = np.fromstring(rfb, count=(h * w), dtype='float32')
+                          greens = np.fromstring(gfb, count=(h * w), dtype='float32')
+                          blues = np.fromstring(bfb, count=(h * w), dtype='float32') 
+                          l = np.ndarray(shape=(0))
+                          mixed_ch = []
+                          for i in range(h * w):
+                              mixed_ch += [reds[i] / 255.0, greens[i] / 255.0, blues[i] / 255.0]
+                          l = np.append(l, mixed_ch) 
+                          l = l.tolist()
+                      with open(binoutfilename, 'wb') as out:
+                          out.write(struct.pack('f'*len(l), *l))
+                  print(number, ': ' + testcase + ' Done')
+              else:
+                  print(testcase, ' nothing to do')
+          else:
+              print(testcase, ' JPEG not found')
+     except:
+         print(testcase, " FAILED")
+print('started at', ended.datetime.now())
+