Imported Upstream version 1.8.0

author Chunseok Lee <chunseok.lee@samsung.com>

Fri, 14 Aug 2020 06:19:19 +0000 (15:19 +0900)

committer Chunseok Lee <chunseok.lee@samsung.com>

Fri, 14 Aug 2020 06:19:19 +0000 (15:19 +0900)
author Chunseok Lee <chunseok.lee@samsung.com>
Fri, 14 Aug 2020 06:19:19 +0000 (15:19 +0900)
committer Chunseok Lee <chunseok.lee@samsung.com>
Fri, 14 Aug 2020 06:19:19 +0000 (15:19 +0900)
diff --git a/.ahub/tcchecker-tca/config.yaml b/.ahub/tcchecker-tca/config.yaml

new file mode 100644 (file)

index 0000000..cd34d79
--- /dev/null
+++ b/.ahub/tcchecker-tca/config.yaml
@@ -0,0 +1,43 @@
+version: 2
+test:
+  - name: NN Runtime
+    testCaseLanguage: CPP
+    testFW: GTEST
+    testCaseFolder:
+      - ./compute/test/cker
+      - ./runtime/onert/core/src/backend/cpu_common
+      - ./runtime/onert/frontend/nnapi
+      - ./runtime/onert/test/core/compiler
+      - ./runtime/onert/test/core/exec
+      - ./runtime/onert/test/core/interp
+      - ./runtime/onert/test/graph
+      - ./runtime/onert/test/graph/operand
+      - ./runtime/onert/test/graph/operation
+      - ./runtime/onert/test/graph/verifier
+      - ./runtime/onert/test/ir
+      - ./runtime/onert/test/util
+      - ./tests/nnapi/src
+      - ./tests/nnfw_api/src
+      - ./tests/tools/tflite_run/src
+
+    testFile:
+      - extension: cpp
+        any: true
+      - extension: cc
+        any: true
+
+    testCase:
+      - condition:
+        - functionName:
+            starts:
+              - TEST
+ 
+    negativeTestCase:
+      - condition:
+        - testName:
+            starts:
+              - neg_
+
+    positiveTestCase:
+      - condition:
+        - inverse: negativeTestCase
diff --git a/.ctags b/.ctags

index 13c27ab..8815f1f 100644 (file)
--- a/.ctags
+++ b/.ctags
@@ -3,5 +3,6 @@
  --exclude=build
  --exclude=tags
  --exclude=tests/scripts/framework/cache
+--exclude=tests/scripts/models/cache
  --exclude=tools/cross/rootfs
  --exclude=doxygen
diff --git a/.gitignore b/.gitignore

index d093191..263856b 100644 (file)
--- a/.gitignore
+++ b/.gitignore
@@ -8,7 +8,7 @@
  *.pyc
  
  # Test cache for model download
-/tests/scripts/framework/cache
+/tests/scripts/**/cache
  
  # Test report
  /report
diff --git a/Makefile.template b/Makefile.template

index 6c919f3..a21937d 100644 (file)
--- a/Makefile.template
+++ b/Makefile.template
@@ -108,7 +108,9 @@ build: build_internal
  
  install: $(TIMESTAMP_INSTALL)
  
-create_tar: runtime_tar_internal
+create_package: runtime_tar_internal
+
+create_acl_tar: acl_tar_internal
  
  clean:
         rm -rf $(WORKSPACE)
@@ -149,10 +151,13 @@ install_internal:
         touch $(TIMESTAMP_INSTALL)
  
  runtime_tar_internal: $(TIMESTAMP_BUILD) install_internal
-       tar -zcf nnfw-package.tar.gz -C $(INSTALL_PATH) lib
-       tar -zcf nnfw-dev-package.tar.gz -C $(INSTALL_PATH) include/nnfw
-       tar -zcf nnfw-internal-dev-package.tar.gz -C $(INSTALL_PATH) include/onert
-       mv nnfw-*package.tar.gz $(INSTALL_PATH)/.
+       tar -zcf $(WORKSPACE)/nnfw-package.tar.gz -C $(INSTALL_PATH) lib
+       tar -zcf $(WORKSPACE)/nnfw-devel-package.tar.gz -C $(INSTALL_PATH) include/nnfw
+       tar -zcf $(WORKSPACE)/nnfw-plugin-devel-package.tar.gz -C $(INSTALL_PATH) include/onert
+       tar -zcf $(WORKSPACE)/nnfw-test-package.tar.gz -C ${INSTALL_PATH} bin test unittest unittest_standalone
+
+acl_tar_internal: $(BUILD_FOLDER)
+       tar -zcf $(WORKSPACE)/nnfw-acl.tar.gz -C ${OVERLAY_FOLDER} lib
  
  install_internal_acl:
  # Workaround to install acl for test (ignore error when there is no file to copy)
diff --git a/compiler/.ahub/tcchecker-tca/config.yaml b/compiler/.ahub/tcchecker-tca/config.yaml

new file mode 100644 (file)

index 0000000..ef681de
--- /dev/null
+++ b/compiler/.ahub/tcchecker-tca/config.yaml
@@ -0,0 +1,54 @@
+version: 2
+test:
+  - name: NN Compiler
+    testCaseLanguage: CPP
+    testFW: GTEST
+    testCaseFolder:
+      - ./angkor
+      - ./arser
+      - ./circle2circle
+      - ./circle-quantizer
+      - ./cwrap
+      - ./foder
+      - ./hermes
+      - ./hermes-std
+      - ./loco
+      - ./locomotiv
+      - ./locop
+      - ./logo
+      - ./logo-core
+      - ./luci
+      - ./luci-interpreter
+      - ./luci-value-test
+      - ./mio-circle
+      - ./mio-tflite
+      - ./oops
+      - ./pepper-assert
+      - ./pepper-str
+      - ./pepper-strcast
+      - ./pp
+      - ./record-minmax
+      - ./safemain
+      - ./souschef
+      - ./stdex
+      - ./tflite2circle
+
+    testFile:
+      - extension: .test.cpp
+        any: true
+
+    testCase:
+      - condition:
+        - functionName:
+            starts:
+              - TEST
+
+    negativeTestCase:
+      - condition:
+        - testName:
+            ends:
+              - _NEG
+
+    positiveTestCase:
+      - condition:
+        - inverse: negativeTestCase
diff --git a/compiler/bcq-tools/CMakeLists.txt b/compiler/bcq-tools/CMakeLists.txt

new file mode 100644 (file)

index 0000000..fcf01de
--- /dev/null
+++ b/compiler/bcq-tools/CMakeLists.txt
@@ -0,0 +1,27 @@
+set(BCQ_TOOLS_FILES
+    generate_bcq_output_arrays
+    preserve_bcq_info
+)
+
+foreach(BCQ_TOOLS IN ITEMS ${BCQ_TOOLS_FILES})
+
+  set(BCQ_TOOLS_FILE ${BCQ_TOOLS})
+  set(BCQ_TOOLS_SRC "${CMAKE_CURRENT_SOURCE_DIR}/${BCQ_TOOLS_FILE}")
+  set(BCQ_TOOLS_BIN "${CMAKE_CURRENT_BINARY_DIR}/${BCQ_TOOLS_FILE}")
+  set(BCQ_TOOLS_TARGET "${BCQ_TOOLS}_target")
+
+  add_custom_command(OUTPUT ${BCQ_TOOLS_BIN}
+    COMMAND ${CMAKE_COMMAND} -E copy "${BCQ_TOOLS_SRC}" "${BCQ_TOOLS_BIN}"
+    DEPENDS ${BCQ_TOOLS_SRC}
+    COMMENT "Generate ${BCQ_TOOLS_BIN}"
+  )
+
+  add_custom_target(${BCQ_TOOLS_TARGET} ALL DEPENDS ${BCQ_TOOLS_BIN})
+
+  install(FILES ${BCQ_TOOLS_BIN}
+          PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                      GROUP_READ GROUP_EXECUTE
+                      WORLD_READ WORLD_EXECUTE
+          DESTINATION bin)
+
+endforeach(BCQ_TOOLS)
diff --git a/compiler/bcq-tools/README.md b/compiler/bcq-tools/README.md

new file mode 100644 (file)

index 0000000..18b0f48
--- /dev/null
+++ b/compiler/bcq-tools/README.md
@@ -0,0 +1,78 @@
+# BCQ Tools
+
+This directory includes some tools related with BCQ.
+
+## preserve_bcq_info
+
+### Purpose
+
+`preserve_bcq_info` is for preserving constant nodes which include BCQ information.
+When `.pb` file is converted to `.tflite` file by TFlite converter, constant nodes whose values are exactly same are removed and then linked to only one representative node.
+This makes us impossible to know what constant node should be linked to a node which we want to apply BCQ.
+One of the solutions is making all the same constant nodes different by inserting unique values and ignore the newly generated unique values when BCQ fusing is applied.
+`preserve_bcq_info` will generate and insert unique dummy values to the constant nodes whose values are same not to be removed by Tensorflow Lite converter.
+As a result, BCQ information will be preserved.
+
+### How to use
+
+```bash
+preserve_bcq_info \
+--input_path /path/to/original_model.pb \
+--output_path /path/to/preserved_model.pb
+```
+
+### How it works
+
+If we add unique dummy value at the end of each constant nodes, all the constant nodes would be different. Following is an example.
+
+```
+[Original Constant Nodes]
+const(value=[1, 2, 3], name='const1')
+const(value=[1, 2, 3], name='const2')
+const(value=[1, 2, 3], name='const3')
+
+[After BCQ information preserved]
+const(value=[1, 2, 3, -1], name='const1')
+const(value=[1, 2, 3, -2], name='const2')
+const(value=[1, 2, 3, -3], name='const3')
+```
+
+For dummy values, negative values are used instead of positive values.
+This is because positive valus may be confused with original constant node values.
+For your information, unique dummy value starts from -1 and moves to -2, -3, ..., -N, where N is the number of preserved constant nodes.
+
+### Caution
+
+- Newly generated dummy values should be ignored when the constant nodes are used.
+
+## generate_bcq_output_arrays
+
+### Purpose
+
+To apply BCQ, BCQ information nodes should be designated as model output so that they are alive even after TFLite conversion is finished.
+However, there are so many nodes to designate and sometimes we cannot copy and paste all of them because the string size is too big.
+`generate_bcq_output_arrays` is for generating output_arrays, which include BCQ information nodes.
+
+### How to use
+
+```bash
+generate_bcq_output_arrays \
+--input_path /path/to/original_model.pb \
+--output_path /path/to/output_arrays.txt
+```
+
+### How it works
+
+```
+[Original BCQ information nodes]
+const(value=[1, 2, 3, -1], name='const1')
+const(value=[1, 2, 3, -2], name='const2')
+const(value=[1, 2, 3, -3], name='const3')
+
+[Generated output_arrays]
+,const1,const2,const3
+```
+
+### Caution
+
+- Generated output_arrays will be start with comma.
diff --git a/compiler/bcq-tools/generate_bcq_output_arrays b/compiler/bcq-tools/generate_bcq_output_arrays

new file mode 100644 (file)

index 0000000..48e8a93
--- /dev/null
+++ b/compiler/bcq-tools/generate_bcq_output_arrays
@@ -0,0 +1,90 @@
+#!/usr/bin/env python3
+
+import tensorflow as tf
+
+import argparse
+import sys
+
+
+def _get_parser():
+    """
+    Returns an ArgumentParser for generating output_arrays.
+    """
+    parser = argparse.ArgumentParser(
+        description=("Command line tool to generated output_arrays of BCQ nodes"))
+
+    # Input and output path.
+    parser.add_argument(
+        "-i",
+        "--input_path",
+        type=str,
+        help="Full filepath of the input file.",
+        required=True)
+    parser.add_argument(
+        "-o",
+        "--output_path",
+        type=str,
+        help="Full filepath of the output file.",
+        required=True)
+
+    return parser
+
+
+def load_graph(frozen_graph_filename):
+    """
+    Load graph from frozen pb file
+    """
+    with tf.compat.v1.gfile.GFile(frozen_graph_filename, "rb") as f:
+        graph_def = tf.compat.v1.GraphDef()
+        graph_def.ParseFromString(f.read())
+    with tf.Graph().as_default() as graph:
+        tf.import_graph_def(graph_def, name='')
+    return graph
+
+
+def dtype2str(dtype):
+    if dtype == "int32":
+        return "TF_INT32"
+    elif dtype == "int64":
+        return "TF_INT64"
+    elif dtype == "float32":
+        return "TF_FLOAT"
+    elif dtype == "bool":
+        return "TF_BOOL"
+    else:
+        raise Exception("Not supported dtype")
+
+
+def print_output_arrays(flags):
+    graph_model = load_graph(flags.input_path)
+    graph_model_def = graph_model.as_graph_def()
+    ops = graph_model.get_operations()
+
+    output_names = [op.outputs[0].name for op in ops 
+        if op.type == "Const" and "bcqinfo_" in op.outputs[0].name]
+
+    output_arrays = ""    
+    for output_name in output_names:
+        output_arrays += ","
+
+        colon_index = output_name.find(":")
+        if colon_index == -1:
+            output_arrays += output_name
+        else:
+            output_arrays += output_name[:colon_index]
+
+    f = open(flags.output_path, 'w')
+    f.write(output_arrays)
+    f.close()
+
+
+def main():
+    # Parse argument.
+    parser = _get_parser()
+    flags = parser.parse_known_args(args=sys.argv[1:])
+
+    print_output_arrays(flags[0])
+
+
+if __name__ == "__main__":
+    main()
diff --git a/compiler/bcq-tools/preserve_bcq_info b/compiler/bcq-tools/preserve_bcq_info

new file mode 100644 (file)

index 0000000..2ede8d4
--- /dev/null
+++ b/compiler/bcq-tools/preserve_bcq_info
@@ -0,0 +1,116 @@
+#!/usr/bin/env python3
+
+import tensorflow as tf
+import numpy as np
+
+import argparse
+import sys
+
+
+def _get_parser():
+    """
+    Returns an ArgumentParser for preserving BCQ information.
+    """
+    parser = argparse.ArgumentParser(
+        description=("Command line tool to preserve BCQ information"))
+
+    # Input and output path.
+    parser.add_argument(
+        "-i",
+        "--input_path",
+        type=str,
+        help="Full filepath of the input file.",
+        required=True)
+    parser.add_argument(
+        "-o",
+        "--output_path",
+        type=str,
+        help="Full filepath of the output file.",
+        required=True)
+
+    return parser
+
+
+def load_graph(frozen_graph_filename):
+    """
+    Load graph from frozen pb file
+    """
+    with tf.compat.v1.gfile.GFile(frozen_graph_filename, "rb") as f:
+        graph_def = tf.compat.v1.GraphDef()
+        graph_def.ParseFromString(f.read())
+    with tf.Graph().as_default() as graph:
+        tf.import_graph_def(graph_def, name='')
+    return graph
+
+
+def preserve_bcq_info(flags):
+    """
+    Generate unique dummy value from -1 to -N.
+
+    We use negative values to preserve BCQ information because
+    positive values may cause some confusion with real BCQ information values.
+    """
+
+    class UniqueValueGen:
+        def __init__(self):
+            self.unique_value = -1
+
+        def gen(self):
+            val = self.unique_value
+            self.unique_value = val - 1
+            return val
+
+    unique_value = UniqueValueGen()
+
+    original_graph_model = load_graph(flags.input_path)
+    original_graph_model_def = original_graph_model.as_graph_def()
+
+    new_graph = tf.compat.v1.GraphDef()
+    substitution_dict = {}
+
+    DT_INT32 = None  # Just for copying DT_INT32 attribute value
+
+    for node in original_graph_model_def.node:
+        if node.op == "Const":
+            # Because bcqinfo_do_w_x is BOOL type, we cannot add dummy value at the end.
+            # Therefore we should convert the type to INT32 type.
+            if "/bcqinfo_do_w_x" in node.name:
+                original_tensor = tf.make_ndarray(node.attr["value"].tensor)
+                substitution_dict[node.name] = tf.make_tensor_proto(
+                    [int(original_tensor[0]), unique_value.gen()], tf.int32)
+
+            preserved_bcqinfo_list = ["/bcqinfo_number_of_clusters", "/bcqinfo_size_of_clusters", 
+                "/bcqinfo_qbits_of_clusters"]
+
+            if any(name in node.name for name in preserved_bcqinfo_list):
+                original_tensor = tf.make_ndarray(
+                    node.attr["value"].tensor)  # variable name change
+                substitution_dict[node.name] = tf.make_tensor_proto(
+                    np.append(original_tensor, unique_value.gen()), tf.int32)
+                DT_INT32 = node.attr["dtype"]
+
+    for node in original_graph_model_def.node:
+        if node.name in substitution_dict:
+            new_node = new_graph.node.add()
+            new_node.op = "Const"
+            new_node.name = node.name
+            new_node.attr["dtype"].CopyFrom(DT_INT32)
+            new_node.attr["value"].tensor.CopyFrom(substitution_dict[node.name])
+        else:
+            new_node = new_graph.node.add()
+            new_node.CopyFrom(node)
+
+    tf.io.write_graph(new_graph, '.', flags.output_path, False)
+
+
+def main():
+    # Parse argument.
+    parser = _get_parser()
+    flags = parser.parse_known_args(args=sys.argv[1:])
+
+    # Generate a new pb file, which BCQ information is preserved.
+    preserve_bcq_info(flags[0])
+
+
+if __name__ == "__main__":
+    main()
diff --git a/compiler/circle-quantizer/CMakeLists.txt b/compiler/circle-quantizer/CMakeLists.txt

index 1335057..009bfab 100644 (file)
--- a/compiler/circle-quantizer/CMakeLists.txt
+++ b/compiler/circle-quantizer/CMakeLists.txt
@@ -13,5 +13,6 @@ target_link_libraries(circle-quantizer luci_service)
  target_link_libraries(circle-quantizer luci_pass)
  target_link_libraries(circle-quantizer luci_export)
  target_link_libraries(circle-quantizer arser)
+target_link_libraries(circle-quantizer vconone)
  
  install(TARGETS circle-quantizer DESTINATION bin)
diff --git a/compiler/circle-quantizer/requires.cmake b/compiler/circle-quantizer/requires.cmake

index 2293e53..c21e28e 100644 (file)
--- a/compiler/circle-quantizer/requires.cmake
+++ b/compiler/circle-quantizer/requires.cmake
@@ -5,3 +5,4 @@ require("safemain")
  require("luci")
  require("oops")
  require("arser")
+require("vconone")
diff --git a/compiler/circle-quantizer/src/CircleQuantizer.cpp b/compiler/circle-quantizer/src/CircleQuantizer.cpp

index b56b547..8d3a80c 100644 (file)
--- a/compiler/circle-quantizer/src/CircleQuantizer.cpp
+++ b/compiler/circle-quantizer/src/CircleQuantizer.cpp
@@ -25,6 +25,7 @@
  
  #include <oops/InternalExn.h>
  #include <arser/arser.h>
+#include <vconone/vconone.h>
  
  #include <functional>
  #include <iostream>
@@ -36,6 +37,12 @@ using OptionHook = std::function<int(const char **)>;
  using Algorithms = luci::CircleOptimizer::Options::Algorithm;
  using AlgorithmParameters = luci::CircleOptimizer::Options::AlgorithmParameters;
  
+void print_version(void)
+{
+  std::cout << "circle-quantizer version " << vconone::get_string() << std::endl;
+  std::cout << vconone::get_copyright() << std::endl;
+}
+
  int entry(int argc, char **argv)
  {
    // Simple argument parser (based on map)
@@ -49,13 +56,20 @@ int entry(int argc, char **argv)
  
    arser::Arser arser("circle-quantizer provides circle model quantization");
  
+  arser.add_argument("--version")
+      .nargs(0)
+      .required(false)
+      .default_value(false)
+      .help("Show version information and exit")
+      .exit_with(print_version);
+
    arser.add_argument(qdqw)
        .nargs(3)
        .type(arser::DataType::STR_VEC)
        .required(false)
        .help("Quantize-dequantize weight values required action before quantization. "
              "Three arguments required: input_dtype(float32) "
-            "output_dtype(uint8) granularity(layer)");
+            "output_dtype(uint8) granularity(layer, channel)");
  
    arser.add_argument(qwmm)
        .nargs(3)
@@ -63,7 +77,7 @@ int entry(int argc, char **argv)
        .required(false)
        .help("Quantize with min/max values. "
              "Three arguments required: input_dtype(float32) "
-            "output_dtype(uint8) granularity(layer)");
+            "output_dtype(uint8) granularity(layer, channel)");
  
    arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
    arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
diff --git a/compiler/circle-tensordump/driver/Driver.cpp b/compiler/circle-tensordump/driver/Driver.cpp

index a55cd45..5bab9f5 100644 (file)
--- a/compiler/circle-tensordump/driver/Driver.cpp
+++ b/compiler/circle-tensordump/driver/Driver.cpp
@@ -46,7 +46,14 @@ int entry(int argc, char **argv)
    {
      std::cout << err.what() << std::endl;
      std::cout << arser;
-    return 0;
+    return 255;
+  }
+
+  if (arser["--tensors_to_hdf5"] == arser["--tensors"])
+  {
+    std::cout << "[Error] You must specify one option for how to print." << std::endl;
+    std::cout << arser;
+    return 255;
    }
  
    std::unique_ptr<circletensordump::DumpInterface> dump;
diff --git a/compiler/circle-tensordump/src/Dump.cpp b/compiler/circle-tensordump/src/Dump.cpp

index dfa78f0..a8d3256 100644 (file)
--- a/compiler/circle-tensordump/src/Dump.cpp
+++ b/compiler/circle-tensordump/src/Dump.cpp
@@ -136,6 +136,7 @@ void DumpTensors::run(std::ostream &os, const circle::Model *model, const std::s
          auto max = quant_param->max();
          auto scale = quant_param->scale();
          auto zero_point = quant_param->zero_point();
+        auto quantized_dimension = quant_param->quantized_dimension();
  
          os << " " + print_format2 + "   ├── min        : ";
          ::print_comma_sepearted(os, min);
@@ -146,9 +147,11 @@ void DumpTensors::run(std::ostream &os, const circle::Model *model, const std::s
          os << " " + print_format2 + "   ├── scale      : ";
          ::print_comma_sepearted(os, scale);
          os << std::endl;
-        os << " " + print_format2 + "Â Â  â\94\94── zero_point : ";
+        os << " " + print_format2 + "Â Â  â\94\9c── zero_point : ";
          ::print_comma_sepearted(os, zero_point);
          os << std::endl;
+        os << " " + print_format2 + "   └── quantized_dimension : " << quantized_dimension;
+        os << std::endl;
        }
  
        // buffer
@@ -229,7 +232,7 @@ std::vector<hsize_t> hdf5_dims_cast(const flatbuffers::Vector<T> *data,
  }
  
  /**
- *  This function writes data to given hdf5 file like below.
+ *  This function writes vector data to given hdf5 file like below.
   *
   *  GROUP "group_name"
   *   ㄴDATATYPE "type"
@@ -238,9 +241,9 @@ std::vector<hsize_t> hdf5_dims_cast(const flatbuffers::Vector<T> *data,
   *   ㄴDATA "data"
   */
  template <typename T>
-void write_data_to_hdf5(H5::H5File &file, std::string &group_name, std::string dataset_name,
-                        const H5::PredType &type, const flatbuffers::Vector<T> *data,
-                        std::vector<hsize_t> dims)
+void write_vector_data_to_hdf5(H5::H5File &file, std::string &group_name, std::string dataset_name,
+                               const H5::PredType &type, const flatbuffers::Vector<T> *data,
+                               std::vector<hsize_t> dims)
  {
    if (data == nullptr)
      return;
@@ -250,6 +253,17 @@ void write_data_to_hdf5(H5::H5File &file, std::string &group_name, std::string d
    dataset->write(data->data(), type);
  }
  
+/// @brief This function writes scalar data to given hdf5 file
+template <typename T>
+void write_scalar_data_to_hdf5(H5::H5File &file, std::string &group_name, std::string dataset_name,
+                               const H5::PredType &type, T data)
+{
+  auto dataspace = std::make_unique<H5::DataSpace>(H5S_SCALAR);
+  auto dataset = std::make_unique<H5::DataSet>(
+      file.createDataSet(group_name + "/" + dataset_name, type, *dataspace));
+  dataset->write(&data, type);
+}
+
  } // namespace
  
  namespace circletensordump
@@ -297,8 +311,9 @@ void DumpTensorsToHdf5::run(std::ostream &os, const circle::Model *model,
        auto buff_data_ptr = reader.buffers()->Get(buff_idx)->data();
        if (buff_data_ptr)
        {
-        ::write_data_to_hdf5(file, group_name, "weights", ::hdf5_dtype_cast(tensor->type()),
-                             buff_data_ptr, ::hdf5_dims_cast(buff_data_ptr, tensor->shape()));
+        ::write_vector_data_to_hdf5(file, group_name, "weights", ::hdf5_dtype_cast(tensor->type()),
+                                    buff_data_ptr,
+                                    ::hdf5_dims_cast(buff_data_ptr, tensor->shape()));
        }
  
        // write quantization parameters
@@ -306,17 +321,20 @@ void DumpTensorsToHdf5::run(std::ostream &os, const circle::Model *model,
        if (quant_param)
        {
          auto min = quant_param->min();
-        ::write_data_to_hdf5(file, group_name, "min", H5::PredType::NATIVE_FLOAT, min,
-                             ::hdf5_dims_cast(min));
+        ::write_vector_data_to_hdf5(file, group_name, "min", H5::PredType::NATIVE_FLOAT, min,
+                                    ::hdf5_dims_cast(min));
          auto max = quant_param->max();
-        ::write_data_to_hdf5(file, group_name, "max", H5::PredType::NATIVE_FLOAT, max,
-                             ::hdf5_dims_cast(max));
+        ::write_vector_data_to_hdf5(file, group_name, "max", H5::PredType::NATIVE_FLOAT, max,
+                                    ::hdf5_dims_cast(max));
          auto scale = quant_param->scale();
-        ::write_data_to_hdf5(file, group_name, "scale", H5::PredType::NATIVE_FLOAT, scale,
-                             ::hdf5_dims_cast(scale));
+        ::write_vector_data_to_hdf5(file, group_name, "scale", H5::PredType::NATIVE_FLOAT, scale,
+                                    ::hdf5_dims_cast(scale));
          auto zero_point = quant_param->zero_point();
-        ::write_data_to_hdf5(file, group_name, "zero_point", H5::PredType::NATIVE_INT64, zero_point,
-                             ::hdf5_dims_cast(zero_point));
+        ::write_vector_data_to_hdf5(file, group_name, "zero_point", H5::PredType::NATIVE_INT64,
+                                    zero_point, ::hdf5_dims_cast(zero_point));
+        auto quantized_dimension = quant_param->quantized_dimension();
+        ::write_scalar_data_to_hdf5(file, group_name, "quantized_dimension",
+                                    H5::PredType::NATIVE_INT32, quantized_dimension);
        }
      }
    }
diff --git a/compiler/circle-verify/src/Driver.cpp b/compiler/circle-verify/src/Driver.cpp

index 1af31d9..7a44c65 100644 (file)
--- a/compiler/circle-verify/src/Driver.cpp
+++ b/compiler/circle-verify/src/Driver.cpp
@@ -35,7 +35,7 @@ int entry(int argc, char **argv)
    {
      std::cout << err.what() << std::endl;
      std::cout << arser;
-    return 0;
+    return 255;
    }
  
    auto verifier = std::make_unique<VerifyFlatbuffers>();
diff --git a/compiler/circle2circle-dredd-recipe-test/CMakeLists.txt b/compiler/circle2circle-dredd-recipe-test/CMakeLists.txt

index 6663cb9..4bcaae3 100644 (file)
--- a/compiler/circle2circle-dredd-recipe-test/CMakeLists.txt
+++ b/compiler/circle2circle-dredd-recipe-test/CMakeLists.txt
@@ -1,25 +1,12 @@
  nnas_include(TargetRequire)
  
  unset(REQUIRED_TARGETS)
-list(APPEND REQUIRED_TARGETS circlechef)
  list(APPEND REQUIRED_TARGETS circle-inspect)
  list(APPEND REQUIRED_TARGETS circle-verify)
  list(APPEND REQUIRED_TARGETS circle2circle)
  list(APPEND REQUIRED_TARGETS dredd_rule_lib)
-list(APPEND REQUIRED_TARGETS tflchef)
-list(APPEND REQUIRED_TARGETS tflite2circle)
  TargetRequire_Return(${REQUIRED_TARGETS})
  
-nncc_find_resource(TensorFlowLiteRecipes)
-nncc_find_resource(CircleRecipes)
-
-set(TFLITE_RECIPE_REPO "${TensorFlowLiteRecipes_DIR}")
-set(CIRCLE_RECIPE_REPO "${CircleRecipes_DIR}")
-unset(RECIPE_REPO)
-
-set(TEST_RECIPE_FILENAME "test.recipe")
-set(TEST_RULE_FILENAME "test.rule")
-
  unset(TEST_DEPS)
  unset(TEST_NAMES)
  
@@ -27,21 +14,9 @@ set(options "")
  set(oneValueArgs "")
  set(multiValueArgs PASS)
  
-macro(Add RECIPE)
-  if(NOT EXISTS "${TFLITE_RECIPE_REPO}/${RECIPE}/test.recipe")
-    if(NOT EXISTS "${CIRCLE_RECIPE_REPO}/${RECIPE}/test.recipe")
-      message(FATAL_ERROR "Missing recipe of '${RECIPE}' test")
-    else()
-      set(RECIPE_REPO ${CIRCLE_RECIPE_REPO})
-    endif()
-  else()
-    set(RECIPE_REPO ${TFLITE_RECIPE_REPO})
-  endif()
-
-  if(NOT EXISTS "${RECIPE_REPO}/${RECIPE}/test.rule")
-    message(FATAL_ERROR "Missing rule of '${RECIPE}' test")
-  endif()
+get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
  
+macro(Add RECIPE)
    cmake_parse_arguments(ARG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
    unset(OPT_OPTIONS)
    foreach(src ${ARG_PASS})
@@ -49,71 +24,20 @@ macro(Add RECIPE)
      list(APPEND OPT_OPTIONS "--${src}")
    endforeach(src ${ARG_PASS})
  
-  set(RECIPE_FILE "${RECIPE}.recipe")
-  set(RECIPE_SOURCE_PATH "${RECIPE_REPO}/${RECIPE}/${TEST_RECIPE_FILENAME}")
-  set(RECIPE_BINARY_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE_FILE}")
-  
-  set(RULE_FILE "${RECIPE}.rule")
-  set(RULE_SOURCE_PATH "${RECIPE_REPO}/${RECIPE}/${TEST_RULE_FILENAME}")
-  set(RULE_BINARY_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RULE_FILE}")
-
-  set(TFLITE_FILE "${RECIPE}.tflite")
-  set(TFLITE_OUTPUT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${TFLITE_FILE}")
-
    set(CIRCLE_FILE "${RECIPE}.circle")
-  set(CIRCLE_OUTPUT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${CIRCLE_FILE}")
+  set(CIRCLE_PATH "${ARTIFACTS_BIN_PATH}/${CIRCLE_FILE}")
  
    set(OPT_CIRCLE_FILE "${RECIPE}.opt.circle")
    set(OPT_CIRCLE_OUTPUT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${OPT_CIRCLE_FILE}")
  
-  # Copy .recipe
-  add_custom_command(OUTPUT ${RECIPE_BINARY_PATH}
-    COMMAND ${CMAKE_COMMAND} -E copy "${RECIPE_SOURCE_PATH}" "${RECIPE_BINARY_PATH}"
-    DEPENDS ${RECIPE_SOURCE_PATH}
-    COMMENT "Generate ${RECIPE_FILE}"
-  )
-
-  # Copy .rule
-  add_custom_command(OUTPUT ${RULE_BINARY_PATH}
-    COMMAND ${CMAKE_COMMAND} -E copy "${RULE_SOURCE_PATH}" "${RULE_BINARY_PATH}"
-    DEPENDS ${RULE_SOURCE_PATH}
-    COMMENT "Generate ${RULE_FILE}"
-  )
-
-  if(${RECIPE_REPO} STREQUAL ${TFLITE_RECIPE_REPO})
-    # Generate .tflite
-    add_custom_command(OUTPUT ${TFLITE_OUTPUT_PATH}
-      COMMAND $<TARGET_FILE:tflchef-file> ${RECIPE_BINARY_PATH} ${TFLITE_OUTPUT_PATH}
-      DEPENDS $<TARGET_FILE:tflchef-file> ${RECIPE_BINARY_PATH}
-      COMMENT "Generate ${TFLITE_FILE}"
-    )
-
-    # Generate .circle
-    add_custom_command(OUTPUT ${CIRCLE_OUTPUT_PATH}
-      COMMAND $<TARGET_FILE:tflite2circle> ${TFLITE_OUTPUT_PATH} ${CIRCLE_OUTPUT_PATH}
-      DEPENDS $<TARGET_FILE:tflite2circle> ${TFLITE_OUTPUT_PATH}
-      COMMENT "Generate ${CIRCLE_FILE}"
-    )
-
-    list(APPEND TEST_DEPS ${TFLITE_OUTPUT_PATH})
-  else()
-   # Generate .circle
-    add_custom_command(OUTPUT ${CIRCLE_OUTPUT_PATH}
-      COMMAND $<TARGET_FILE:circlechef-file> ${RECIPE_BINARY_PATH} ${CIRCLE_OUTPUT_PATH}
-      DEPENDS $<TARGET_FILE:circlechef-file> ${RECIPE_BINARY_PATH}
-      COMMENT "Generate ${CIRCLE_FILE}"
-    )
-  endif()
-
    # Generate optimized .circle
    add_custom_command(OUTPUT ${OPT_CIRCLE_OUTPUT_PATH}
-    COMMAND $<TARGET_FILE:circle2circle> ${OPT_OPTIONS} ${CIRCLE_OUTPUT_PATH} ${OPT_CIRCLE_OUTPUT_PATH}
-    DEPENDS $<TARGET_FILE:circle2circle> ${CIRCLE_OUTPUT_PATH}
+    COMMAND $<TARGET_FILE:circle2circle> ${OPT_OPTIONS} ${CIRCLE_PATH} ${OPT_CIRCLE_OUTPUT_PATH}
+    DEPENDS $<TARGET_FILE:circle2circle> ${CIRCLE_PATH}
      COMMENT "Generate ${OPT_CIRCLE_FILE}"
    )
  
-  list(APPEND TEST_DEPS ${RECIPE_BINARY_PATH} ${RULE_BINARY_PATH}
-                        ${CIRCLE_OUTPUT_PATH} ${OPT_CIRCLE_OUTPUT_PATH})
+  list(APPEND TEST_DEPS ${OPT_CIRCLE_OUTPUT_PATH})
    list(APPEND TEST_NAMES ${RECIPE})
  endmacro(Add)
  
@@ -174,12 +98,15 @@ list(APPEND TEST_DEPS "${RULE_LIB_BINARY_PATH}")
  
  # Generate dependencies
  add_custom_target(circle2circle_dredd_recipe_test ALL DEPENDS ${TEST_DEPS})
+add_dependencies(circle2circle_dredd_recipe_test common_artifacts_deps)
+
+get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
  
  # Run tests
  add_test(
    NAME circle2circle_dredd_recipe_test
    COMMAND "${TEST_RUNNER}"
            "${TEST_CONFIG}"
-          "${CMAKE_CURRENT_BINARY_DIR}"
+          "${ARTIFACTS_BIN_PATH}"
            ${TEST_NAMES}
  )
diff --git a/compiler/circle2circle-dredd-recipe-test/requires.cmake b/compiler/circle2circle-dredd-recipe-test/requires.cmake

index e4a5b71..70e7c52 100644 (file)
--- a/compiler/circle2circle-dredd-recipe-test/requires.cmake
+++ b/compiler/circle2circle-dredd-recipe-test/requires.cmake
@@ -1,7 +1,5 @@
-require("circlechef")
  require("circle2circle")
  require("circle-inspect")
  require("circle-verify")
+require("common-artifacts")
  require("dredd-rule-lib")
-require("tflchef")
-require("tflite2circle")
diff --git a/compiler/circle2circle-dredd-recipe-test/test.lst b/compiler/circle2circle-dredd-recipe-test/test.lst

index 202f669..6328a64 100644 (file)
--- a/compiler/circle2circle-dredd-recipe-test/test.lst
+++ b/compiler/circle2circle-dredd-recipe-test/test.lst
@@ -11,9 +11,10 @@
  ## TFLITE RECIPE
  
  Add(Net_InstanceNorm_001 PASS fuse_instnorm)
-# Add(Net_InstanceNorm_002 PASS fuse_instnorm)
+Add(Net_InstanceNorm_002 PASS fuse_instnorm)
  Add(BatchMatMulV2_000 PASS resolve_customop_batchmatmul)
  Add(MatMul_000 PASS resolve_customop_matmul)
+Add(DepthwiseConv2D_003 PASS)
  
  ## CIRCLE RECIPE
  
diff --git a/compiler/circle2circle-dredd-recipe-test/testall.sh b/compiler/circle2circle-dredd-recipe-test/testall.sh

index 33a2036..2899587 100755 (executable)
--- a/compiler/circle2circle-dredd-recipe-test/testall.sh
+++ b/compiler/circle2circle-dredd-recipe-test/testall.sh
@@ -13,21 +13,22 @@ if [[ $# -lt 2 ]]; then
    exit 255
  fi
  
+WORKDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
  CONFIG_PATH="$1"; shift
-WORKDIR="$1"; shift
+RESOURCE_DIR="$1"; shift
  
  source "${CONFIG_PATH}"
  
  echo "-- Found circle-inspect: ${CIRCLE_INSPECT_PATH}"
  echo "-- Found circle-verify: ${CIRCLE_VERIFY_PATH}"
  echo "-- Found circle2circle: ${CIRCLE2CIRCLE_PATH}"
-echo "-- Found workdir: ${WORKDIR}"
+echo "-- Found common-artifacts: ${RESOURCE_DIR}"
  
  TESTED=()
  PASSED=()
  FAILED=()
  
-pushd "${WORKDIR}"
+pushd ${WORKDIR}
  while [[ $# -ne 0 ]]; do
    PREFIX="$1"; shift
  
@@ -40,7 +41,7 @@ while [[ $# -ne 0 ]]; do
    cat > "${PREFIX}.log" <(
      exec 2>&1
  
-    echo "-- Found tflite: ${PREFIX}.tflite"
+    echo "-- Found circle: ${PREFIX}.opt.circle"
  
      # Exit immediately if any command fails
      set -e
@@ -55,7 +56,7 @@ while [[ $# -ne 0 ]]; do
      set +x
  
      # (COMPILED_FILE, INSPECT_PROG_PATH, VERIFY_PROG_PATH, ERROR_LOG) must be set for rule-lib.sh
-    COMPILED_FILE="${WORKDIR}/${PREFIX}.opt.circle"
+    COMPILED_FILE="${PREFIX}.opt.circle"
      INSPECT_PROG_PATH=${CIRCLE_INSPECT_PATH}
      VERIFY_PROG_PATH=${CIRCLE_VERIFY_PATH}
      ERROR_LOG="${PREFIX}.error"
@@ -66,7 +67,7 @@ while [[ $# -ne 0 ]]; do
      trap 'echo "** ERROR **" ; cat "${ERROR_LOG}"' ERR
  
      source rule-lib.sh
-    source "${PREFIX}.rule"
+    source "${RESOURCE_DIR}/${PREFIX}.rule"
  
      # unset
      trap - ERR
diff --git a/compiler/circle2circle/CMakeLists.txt b/compiler/circle2circle/CMakeLists.txt

index 7b2bf9b..f60c896 100644 (file)
--- a/compiler/circle2circle/CMakeLists.txt
+++ b/compiler/circle2circle/CMakeLists.txt
@@ -19,6 +19,7 @@ target_link_libraries(circle2circle luci_service)
  target_link_libraries(circle2circle luci_pass)
  target_link_libraries(circle2circle luci_export)
  target_link_libraries(circle2circle arser)
+target_link_libraries(circle2circle vconone)
  
  install(TARGETS circle2circle DESTINATION bin)
  
@@ -44,3 +45,4 @@ target_link_libraries(circle2circle_test luci_service)
  target_link_libraries(circle2circle_test luci_pass)
  target_link_libraries(circle2circle_test luci_export)
  target_link_libraries(circle2circle_test arser)
+target_link_libraries(circle2circle_test vconone)
diff --git a/compiler/circle2circle/requires.cmake b/compiler/circle2circle/requires.cmake

index 8cbb90d..36a9efd 100644 (file)
--- a/compiler/circle2circle/requires.cmake
+++ b/compiler/circle2circle/requires.cmake
@@ -9,3 +9,4 @@ require("hermes")
  require("hermes-std")
  require("luci")
  require("arser")
+require("vconone")
diff --git a/compiler/circle2circle/src/Circle2Circle.cpp b/compiler/circle2circle/src/Circle2Circle.cpp

index 6888d26..849597b 100644 (file)
--- a/compiler/circle2circle/src/Circle2Circle.cpp
+++ b/compiler/circle2circle/src/Circle2Circle.cpp
@@ -26,6 +26,7 @@
  
  #include <oops/InternalExn.h>
  #include <arser/arser.h>
+#include <vconone/vconone.h>
  
  #include <functional>
  #include <iostream>
@@ -34,6 +35,12 @@
  using Algorithms = luci::CircleOptimizer::Options::Algorithm;
  using AlgorithmParameters = luci::CircleOptimizer::Options::AlgorithmParameters;
  
+void print_version(void)
+{
+  std::cout << "circle2circle version " << vconone::get_string() << std::endl;
+  std::cout << vconone::get_copyright() << std::endl;
+}
+
  int entry(int argc, char **argv)
  {
    // Simple argument parser (based on map)
@@ -44,6 +51,13 @@ int entry(int argc, char **argv)
  
    arser::Arser arser("circle2circle provides circle model optimization and transformations");
  
+  arser.add_argument("--version")
+      .nargs(0)
+      .required(false)
+      .default_value(false)
+      .help("Show version information and exit")
+      .exit_with(print_version);
+
    arser.add_argument("--all").nargs(0).required(false).default_value(false).help(
        "Enable all optimize options");
  
diff --git a/compiler/circlechef/CMakeLists.txt b/compiler/circlechef/CMakeLists.txt

index cba7d0a..3e2ddcb 100644 (file)
--- a/compiler/circlechef/CMakeLists.txt
+++ b/compiler/circlechef/CMakeLists.txt
@@ -18,4 +18,6 @@ add_subdirectory(core)
  add_subdirectory(circle)
  # Tools
  add_subdirectory(tools)
-add_subdirectory(tests)
+if(ENABLE_TEST)
+  add_subdirectory(tests)
+endif(ENABLE_TEST)
diff --git a/compiler/circlechef/circle/src/RecipeChef.cpp b/compiler/circlechef/circle/src/RecipeChef.cpp

index 17ef1be..51326c7 100644 (file)
--- a/compiler/circlechef/circle/src/RecipeChef.cpp
+++ b/compiler/circlechef/circle/src/RecipeChef.cpp
@@ -181,6 +181,8 @@ std::unique_ptr<ModelRecipe> generate_recipe(const circle::Model *model)
          for (uint32_t idx = 0; idx < quant->zero_point()->size(); ++idx)
            chef_quant->add_zero_point(quant->zero_point()->Get(idx));
        }
+      circlechef::TensorQuantization *chef_quant = operand->mutable_quant();
+      chef_quant->set_quantized_dimension(quant->quantized_dimension());
      }
    }
  
diff --git a/compiler/circlechef/core/src/ModelChef.cpp b/compiler/circlechef/core/src/ModelChef.cpp

index 76aeacd..d81467d 100644 (file)
--- a/compiler/circlechef/core/src/ModelChef.cpp
+++ b/compiler/circlechef/core/src/ModelChef.cpp
@@ -413,6 +413,7 @@ template <typename T> void cook_graph(const T &graph, CookParams &cp)
        quant_builder.add_min(quant_min);
        quant_builder.add_scale(quant_scale);
        quant_builder.add_zero_point(quant_zero_point);
+      quant_builder.add_quantized_dimension(quant.quantized_dimension());
  
        // Update QuantizationParameters Index
        quant_index = quant_builder.Finish();
diff --git a/compiler/circlechef/proto/circlechef.proto b/compiler/circlechef/proto/circlechef.proto

index b8c009b..3e5e6b1 100644 (file)
--- a/compiler/circlechef/proto/circlechef.proto
+++ b/compiler/circlechef/proto/circlechef.proto
@@ -35,6 +35,7 @@ message TensorQuantization {
    repeated float max = 2;
    repeated float scale = 3;
    repeated int64 zero_point = 4;
+  optional int32 quantized_dimension = 5 [default = 0];
  }
  
  message Operand {
diff --git a/compiler/circlechef/tools/file/Driver.cpp b/compiler/circlechef/tools/file/Driver.cpp

index a15da40..bcc0c7a 100644 (file)
--- a/compiler/circlechef/tools/file/Driver.cpp
+++ b/compiler/circlechef/tools/file/Driver.cpp
@@ -41,7 +41,7 @@ int entry(int argc, char **argv)
    {
      std::cout << err.what() << std::endl;
      std::cout << arser;
-    return 0;
+    return 255;
    }
  
    int32_t model_version = 1;
diff --git a/compiler/circlechef/tools/reverse/Driver.cpp b/compiler/circlechef/tools/reverse/Driver.cpp

index 9c0b9ea..8a2b85f 100644 (file)
--- a/compiler/circlechef/tools/reverse/Driver.cpp
+++ b/compiler/circlechef/tools/reverse/Driver.cpp
@@ -38,7 +38,7 @@ int entry(int argc, char **argv)
    {
      std::cout << err.what() << std::endl;
      std::cout << arser;
-    return 0;
+    return 255;
    }
  
    std::string circle_path = arser.get<std::string>("circle");
diff --git a/compiler/circledump/driver/Driver.cpp b/compiler/circledump/driver/Driver.cpp

index b8f561f..657f24f 100644 (file)
--- a/compiler/circledump/driver/Driver.cpp
+++ b/compiler/circledump/driver/Driver.cpp
@@ -33,7 +33,7 @@ int entry(int argc, char **argv)
    {
      std::cout << err.what() << '\n';
      std::cout << arser;
-    return 0;
+    return 255;
    }
  
    std::string circle_path = arser.get<std::string>("circle");
diff --git a/compiler/circledump/src/OpPrinter.cpp b/compiler/circledump/src/OpPrinter.cpp

index 2c03203..3294bb2 100644 (file)
--- a/compiler/circledump/src/OpPrinter.cpp
+++ b/compiler/circledump/src/OpPrinter.cpp
@@ -593,6 +593,20 @@ public:
    }
  };
  
+class UniquePrinter : public OpPrinter
+{
+public:
+  void options(const circle::Operator *op, std::ostream &os) const override
+  {
+    if (auto *params = op->builtin_options_as_UniqueOptions())
+    {
+      os << "    ";
+      os << "idx_out_type(" << EnumNameTensorType(params->idx_out_type()) << ") ";
+      os << std::endl;
+    }
+  }
+};
+
  class WhilePrinter : public OpPrinter
  {
  public:
@@ -710,9 +724,11 @@ OpPrinterRegistry::OpPrinterRegistry()
    _op_map[circle::BuiltinOperator_MAX_POOL_2D] = make_unique<Pool2DPrinter>();
    _op_map[circle::BuiltinOperator_MIRROR_PAD] = make_unique<MirrorPadPrinter>();
    _op_map[circle::BuiltinOperator_MUL] = make_unique<MulPrinter>();
+  // There is no Option for NON_MAX_SUPPRESSION_V4
    _op_map[circle::BuiltinOperator_ONE_HOT] = make_unique<OneHotPrinter>();
    _op_map[circle::BuiltinOperator_PACK] = make_unique<PackPrinter>();
    // There is no Option for PAD
+  // There is no Option for PADV2
    // There is no Option for PRELU
    // There is no Option for RELU
    // There is no Option for RELU6
@@ -744,6 +760,7 @@ OpPrinterRegistry::OpPrinterRegistry()
    _op_map[circle::BuiltinOperator_SUM] = make_unique<ReducerPrinter>();
    _op_map[circle::BuiltinOperator_TRANSPOSE_CONV] = make_unique<TransposeConvPrinter>();
    // There is no Option for TOPK_V2
+  _op_map[circle::BuiltinOperator_UNIQUE] = make_unique<UniquePrinter>();
    _op_map[circle::BuiltinOperator_WHILE] = make_unique<WhilePrinter>();
    _op_map[circle::BuiltinOperator_CUSTOM] = make_unique<CustomOpPrinter>();
  
diff --git a/compiler/common-artifacts/CMakeLists.txt b/compiler/common-artifacts/CMakeLists.txt

index ee4191d..ef50e8d 100644 (file)
--- a/compiler/common-artifacts/CMakeLists.txt
+++ b/compiler/common-artifacts/CMakeLists.txt
@@ -13,30 +13,47 @@ if(${PYTHON_VERSION_MINOR} LESS 3)
  endif()
  
  # Create python virtual environment with tensorflow 1.13.2
-set(VIRTUALENV_OVERLAY "${NNCC_OVERLAY_DIR}/venv_1_13_2")
+set(VIRTUALENV_OVERLAY_TF_1_13_2 "${NNCC_OVERLAY_DIR}/venv_1_13_2")
+
+# Create python virtual environment with tensorflow 2.3.0
+set(VIRTUALENV_OVERLAY_TF_2_3_0 "${NNCC_OVERLAY_DIR}/venv_2_3_0")
+
+add_custom_command(
+  OUTPUT ${VIRTUALENV_OVERLAY_TF_1_13_2}
+  COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY_TF_1_13_2}
+)
  
  add_custom_command(
-  OUTPUT ${VIRTUALENV_OVERLAY}
-  COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY}
+  OUTPUT ${VIRTUALENV_OVERLAY_TF_2_3_0}
+  COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY_TF_2_3_0}
  )
  
  # Create requirements.txt and install required pip packages
  set(REQUIREMENTS_FILE "requirements.txt")
-set(REQUIREMENTS_OVERLAY_PATH "${NNCC_OVERLAY_DIR}/${REQUIREMENTS_FILE}")
+set(REQUIREMENTS_OVERLAY_PATH_TF_1_13_2 "${VIRTUALENV_OVERLAY_TF_1_13_2}/${REQUIREMENTS_FILE}")
+set(REQUIREMENTS_OVERLAY_PATH_TF_2_3_0 "${VIRTUALENV_OVERLAY_TF_2_3_0}/${REQUIREMENTS_FILE}")
  
  add_custom_command(
-  OUTPUT ${REQUIREMENTS_OVERLAY_PATH}
-  COMMAND ${CMAKE_COMMAND} -E echo "tensorflow==1.13.2" > ${REQUIREMENTS_OVERLAY_PATH}
-  COMMAND ${VIRTUALENV_OVERLAY}/bin/python -m pip --default-timeout=1000 install --upgrade pip setuptools
-  COMMAND ${VIRTUALENV_OVERLAY}/bin/python -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH} --upgrade
-  DEPENDS ${VIRTUALENV_OVERLAY} ${REQUIREMENTS_OVERLAY_PATH}
+  OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2}
+  COMMAND ${CMAKE_COMMAND} -E echo "tensorflow==1.13.2" > ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2}
+  COMMAND ${VIRTUALENV_OVERLAY_TF_1_13_2}/bin/python -m pip --default-timeout=1000 install --upgrade pip setuptools
+  COMMAND ${VIRTUALENV_OVERLAY_TF_1_13_2}/bin/python -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2} --upgrade
+  DEPENDS ${VIRTUALENV_OVERLAY_TF_1_13_2}
  )
  
-add_custom_target(common_artifacts_python_deps ALL
-  DEPENDS ${VIRTUALENV_OVERLAY} ${REQUIREMENTS_OVERLAY_PATH}
+add_custom_command(
+  OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
+  COMMAND ${CMAKE_COMMAND} -E remove -f ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
+  COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.3.0" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
+  COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==1.12" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
+  COMMAND ${VIRTUALENV_OVERLAY_TF_2_3_0}/bin/python -m pip --default-timeout=1000 install --upgrade pip setuptools
+  COMMAND ${VIRTUALENV_OVERLAY_TF_2_3_0}/bin/python -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0} --upgrade
+  DEPENDS ${VIRTUALENV_OVERLAY_TF_2_3_0}
  )
  
-# TODO Create python virtual environment with tensorflow 2.3.0-rc0
+add_custom_target(common_artifacts_python_deps ALL
+  DEPENDS ${VIRTUALENV_OVERLAY_TF_1_13_2} ${VIRTUALENV_OVERLAY_TF_2_3_0} ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2} ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
+)
  
  #[[ Generate common resources ]]
  # TODO add pbtxt
@@ -52,6 +69,7 @@ set(SOURCES src/TestDataGenerator.cpp)
  add_executable(testDataGenerator ${SOURCES})
  target_include_directories(testDataGenerator PRIVATE ${HDF5_INCLUDE_DIRS})
  target_link_libraries(testDataGenerator PRIVATE ${HDF5_CXX_LIBRARIES})
+target_link_libraries(testDataGenerator PRIVATE arser)
  target_link_libraries(testDataGenerator PRIVATE foder)
  target_link_libraries(testDataGenerator PRIVATE luci_import)
  target_link_libraries(testDataGenerator PRIVATE luci_interpreter)
diff --git a/compiler/common-artifacts/exclude.lst b/compiler/common-artifacts/exclude.lst

index b614b71..fe9933a 100644 (file)
--- a/compiler/common-artifacts/exclude.lst
+++ b/compiler/common-artifacts/exclude.lst
@@ -5,9 +5,12 @@
  
  #[[ optimize : Exclude from circle optimization(circle2circle) ]]
  ## TensorFlowLiteRecipes
-optimize(ReLU6_000)
-optimize(Where_000)
-optimize(Where_001)
+optimize(Unique_000)
+optimize(Unique_001)
+optimize(Unique_002)
+optimize(Unique_003)
+optimize(Unique_U8_000)
+optimize(Unique_U8_001)
  
  ## CircleRecipes
  
@@ -46,6 +49,7 @@ tcgenerate(DepthToSpace_000)
  tcgenerate(DepthwiseConv2D_001) # runtime doesn't support dilation
  tcgenerate(DepthwiseConv2D_003) # runtime doesn't support dilation
  tcgenerate(DepthwiseConv2D_U8_000)
+tcgenerate(DepthwiseConv2D_U8_001)  # luci-interpreter doesn't support channel-wise quantization yet
  tcgenerate(Div_000)
  tcgenerate(ELU_000)
  tcgenerate(Equal_000)
@@ -90,13 +94,15 @@ tcgenerate(Maximum_000)
  tcgenerate(MaxPool2D_U8_000)
  tcgenerate(Mean_U8_000)
  tcgenerate(Minimum_000)
+tcgenerate(NonMaxSuppressionV4_000)
+tcgenerate(NonMaxSuppressionV4_001)
  tcgenerate(MirrorPad_000)
  tcgenerate(Mul_U8_000)
  tcgenerate(Neg_000)
  tcgenerate(Net_Dangle_001)
  tcgenerate(Net_InstanceNorm_001)
  tcgenerate(Net_InstanceNorm_002)
-tcgenerate(Net_ZeroDim_001) # fix luci
+tcgenerate(Net_ZeroDim_001) # luci-interpreter doesn't support zero dim
  tcgenerate(NotEqual_000)
  tcgenerate(OneHot_000)
  tcgenerate(OneHot_001)
@@ -105,6 +111,7 @@ tcgenerate(OneHot_003)
  tcgenerate(Pack_000)
  tcgenerate(Pack_U8_000)
  tcgenerate(Pad_U8_000)
+tcgenerate(PadV2_000)
  tcgenerate(Pow_000)
  tcgenerate(PRelu_000)
  tcgenerate(Range_000)
@@ -120,11 +127,12 @@ tcgenerate(ReduceProd_001)
  tcgenerate(ReduceProd_002)
  tcgenerate(ReduceProd_003)
  tcgenerate(ReLU_000)
-tcgenerate(ReLU6_000) # luci NYI
+tcgenerate(ReLU6_000)
  tcgenerate(ReLUN1To1_000)
-tcgenerate(Reshape_003) # fix luci
+tcgenerate(Reshape_003) # luci-interpreter doesn't support reshape without built-in option
  tcgenerate(Reshape_U8_000)
  tcgenerate(ResizeBilinear_000)
+tcgenerate(ResizeBilinear_U8_000) # luci-interpreter
  tcgenerate(ResizeNearestNeighbor_000)
  tcgenerate(ReverseSequence_000)
  tcgenerate(ReverseV2_000)
@@ -148,7 +156,7 @@ tcgenerate(SpaceToBatchND_002)
  tcgenerate(SpaceToBatchND_003)
  tcgenerate(SpaceToDepth_000)
  tcgenerate(SparseToDense_000)
-tcgenerate(SplitV_000) # fix luci
+tcgenerate(SplitV_000)
  tcgenerate(Sqrt_000)
  tcgenerate(Square_000)
  tcgenerate(SquaredDifference_000)
@@ -164,22 +172,21 @@ tcgenerate(Sum_001)
  tcgenerate(Tanh_000)
  tcgenerate(Tile_000)
  tcgenerate(Tile_U8_000)
-tcgenerate(TopKV2_000) # fix luci
-tcgenerate(TopKV2_001) # fix luci
-tcgenerate(TransposeConv_000) # fix interpreter
+tcgenerate(TopKV2_000)
+tcgenerate(TopKV2_001)
  tcgenerate(Unique_000)
  tcgenerate(Unique_001)
  tcgenerate(Unique_002)
  tcgenerate(Unique_003)
  tcgenerate(Unique_U8_000)
  tcgenerate(Unique_U8_001)
-tcgenerate(Where_000) # luci NYI
-tcgenerate(Where_001) # luci NYI
-tcgenerate(While_000) # fix luci
+tcgenerate(Where_000)
+tcgenerate(Where_001)
+tcgenerate(While_000)
  tcgenerate(While_001)
  tcgenerate(While_002)
  tcgenerate(While_003)
-tcgenerate(YUV_TO_RGB_000) # fix luci
+tcgenerate(YUV_TO_RGB_000)
  tcgenerate(YUV_TO_RGB_U8_000)
  tcgenerate(ZerosLike_000)
  
diff --git a/compiler/common-artifacts/requires.cmake b/compiler/common-artifacts/requires.cmake

index 8c27565..d7bed21 100644 (file)
--- a/compiler/common-artifacts/requires.cmake
+++ b/compiler/common-artifacts/requires.cmake
@@ -1,3 +1,4 @@
+require("arser")
  require("circle2circle")
  require("circlechef")
  require("foder")
diff --git a/compiler/common-artifacts/src/TestDataGenerator.cpp b/compiler/common-artifacts/src/TestDataGenerator.cpp

index 739300d..7a07dd8 100644 (file)
--- a/compiler/common-artifacts/src/TestDataGenerator.cpp
+++ b/compiler/common-artifacts/src/TestDataGenerator.cpp
@@ -14,6 +14,7 @@
   * limitations under the License.
   */
  
+#include <arser/arser.h>
  #include <foder/FileLoader.h>
  #include <luci/Importer.h>
  #include <luci_interpreter/Interpreter.h>
@@ -62,10 +63,9 @@ template <typename T> void geneate_random_data(std::mt19937 &gen, void *data, ui
    }
  }
  
-void fill_random_data(void *data, uint32_t size, loco::DataType dtype)
+void fill_random_data(void *data, uint32_t size, loco::DataType dtype, uint32_t seed)
  {
-  std::random_device rd;  // used to obtain a seed for the random number engine
-  std::mt19937 gen(rd()); // standard mersenne_twister_engine seeded with rd()
+  std::mt19937 gen(seed); // standard mersenne_twister_engine seeded with rd()
  
    switch (dtype)
    {
@@ -90,7 +90,25 @@ void fill_random_data(void *data, uint32_t size, loco::DataType dtype)
  
  int entry(int argc, char **argv)
  {
-  std::string circle_file{argv[1]};
+  arser::Arser arser;
+  arser.add_argument("circle").type(arser::DataType::STR).help("Circle file you want to test");
+  arser.add_argument("--fixed_seed")
+      .required(false)
+      .nargs(0)
+      .help("Put a fixed seed into the random number generator");
+
+  try
+  {
+    arser.parse(argc, argv);
+  }
+  catch (const std::runtime_error &err)
+  {
+    std::cout << err.what() << std::endl;
+    std::cout << arser;
+    return 255;
+  }
+
+  std::string circle_file = arser.get<std::string>("circle");
    size_t last_dot_index = circle_file.find_last_of(".");
    std::string prefix = circle_file.substr(0, last_dot_index);
  
@@ -136,6 +154,7 @@ int entry(int argc, char **argv)
    std::unique_ptr<H5::Group> output_value_group =
        std::make_unique<H5::Group>(output_file.createGroup("value"));
  
+  std::random_device rd; // used to obtain a seed for the random number engine
    uint32_t input_index = 0;
    for (uint32_t g = 0; g < circle_model->subgraphs()->size(); g++)
    {
@@ -174,7 +193,10 @@ int entry(int argc, char **argv)
        std::vector<int8_t> data(byte_size);
  
        // generate random data
-      fill_random_data(data.data(), data_size, input_node->dtype());
+      if (arser["--fixed_seed"])
+        fill_random_data(data.data(), data_size, input_node->dtype(), 0);
+      else
+        fill_random_data(data.data(), data_size, input_node->dtype(), rd());
  
        dataset->write(data.data(), dtype);
  
diff --git a/compiler/hermes/src/hermes.test.cpp b/compiler/hermes/src/hermes.test.cpp

index 2cbc093..ea7ef65 100644 (file)
--- a/compiler/hermes/src/hermes.test.cpp
+++ b/compiler/hermes/src/hermes.test.cpp
@@ -18,7 +18,28 @@
  
  #include <gtest/gtest.h>
  
-TEST(HermesTest, simple_usecase)
+namespace
  {
-  // TO BE FILLED
+
+class Logger final : public hermes::Source
+{
+public:
+  Logger(hermes::Context *ctx);
+  ~Logger();
+};
+
+Logger::Logger(hermes::Context *ctx) { activate(ctx->sources(), ctx->bus()); }
+Logger::~Logger() { deactivate(); }
+
+} // namespace
+
+TEST(HermesTest, logger_constructor_NEG)
+{
+  hermes::Context context;
+  // we expect segmentfault from nullptr->sources()
+  ASSERT_DEATH(Logger logger(&context), "");
+
+  SUCCEED();
  }
+
+// TODO add HermesTest simple_usecase
diff --git a/compiler/locomotiv/src/Node/BiasEncode.test.cpp b/compiler/locomotiv/src/Node/BiasEncode.test.cpp

index cdb255c..4680f5c 100644 (file)
--- a/compiler/locomotiv/src/Node/BiasEncode.test.cpp
+++ b/compiler/locomotiv/src/Node/BiasEncode.test.cpp
@@ -90,6 +90,16 @@ template <typename T> void test()
  }
  } // namespace
  
-TEST(NodeExecution_BiasEncode, s32) { test<int32_t>(); }
+TEST(NodeExecution_BiasEncode, s32)
+{
+  test<int32_t>();
+
+  SUCCEED();
+}
  
-TEST(NodeExecution_BiasEncode, f32) { test<float>(); }
+TEST(NodeExecution_BiasEncode, f32)
+{
+  test<float>();
+
+  SUCCEED();
+}
diff --git a/compiler/locomotiv/src/Node/MatMul.test.cpp b/compiler/locomotiv/src/Node/MatMul.test.cpp

index f1f3a52..7d942e1 100644 (file)
--- a/compiler/locomotiv/src/Node/MatMul.test.cpp
+++ b/compiler/locomotiv/src/Node/MatMul.test.cpp
@@ -142,6 +142,8 @@ TEST(NodeExecution_MatMul, f32_2x3_3x3)
    };
  
    run_test<float>(lhs, rhs, out, Shape{2, 3}, Shape{3, 3}, Shape{2, 3}, loco::DataType::FLOAT32);
+
+  SUCCEED();
  }
  
  /* from the code below:
@@ -183,6 +185,8 @@ TEST(NodeExecution_MatMul, s32_4x2_2x6)
    };
  
    run_test<int32_t>(lhs, rhs, out, Shape{4, 2}, Shape{2, 6}, Shape{4, 6}, loco::DataType::S32);
+
+  SUCCEED();
  }
  
  // clang-format on
diff --git a/compiler/locop/src/FormattedGraph.test.cpp b/compiler/locop/src/FormattedGraph.test.cpp

index c9808d3..aff9ebe 100644 (file)
--- a/compiler/locop/src/FormattedGraph.test.cpp
+++ b/compiler/locop/src/FormattedGraph.test.cpp
@@ -28,6 +28,8 @@ TEST(LinearV1FormatterTest, simple)
  
    // TODO Validate the output (when the implementation becomes stable)
    std::cout << locop::fmt<locop::LinearV1>(g) << std::endl;
+
+  SUCCEED();
  }
  
  TEST(LinearV1FormatterTest, user_defined_node_summary_builder)
diff --git a/compiler/locop/src/FormattedTensorShape.test.cpp b/compiler/locop/src/FormattedTensorShape.test.cpp

index 0f0017a..fc85df3 100644 (file)
--- a/compiler/locop/src/FormattedTensorShape.test.cpp
+++ b/compiler/locop/src/FormattedTensorShape.test.cpp
@@ -30,4 +30,6 @@ TEST(FormattedTensorShapeTest, BracketFormat)
    tensor_shape->dim(0) = 4;
  
    std::cout << fmt<TensorShapeFormat::Bracket>(tensor_shape.get()) << std::endl;
+
+  SUCCEED();
  }
diff --git a/compiler/luci-interpreter/include/luci_interpreter/core/Tensor.h b/compiler/luci-interpreter/include/luci_interpreter/core/Tensor.h

index 9987898..4ac3d86 100644 (file)
--- a/compiler/luci-interpreter/include/luci_interpreter/core/Tensor.h
+++ b/compiler/luci-interpreter/include/luci_interpreter/core/Tensor.h
@@ -79,12 +79,11 @@ private:
  //
  // Note that due to historical and performance reasons, per-tensor quantization uses unsigned
  // integer types, while per-channel uses signed types assuming 'zero_point' == 0.
-//
-// TODO Add 'quantized_dimension' field for per-channel case when IR provides it.
  struct AffineQuantization
  {
    std::vector<float> scale;
    std::vector<int32_t> zero_point;
+  int32_t quantized_dimension;
  };
  
  class Tensor
@@ -108,6 +107,12 @@ public:
      return _quantization.zero_point[0];
    }
  
+  const std::vector<float> &scales() const { return _quantization.scale; }
+
+  const std::vector<int32_t> &zero_points() const { return _quantization.zero_point; }
+
+  int32_t quantized_dimension() const { return _quantization.quantized_dimension; }
+
    template <typename T> const T *data() const { return reinterpret_cast<const T *>(_data.get()); }
  
    template <typename T> T *data() { return reinterpret_cast<T *>(_data.get()); }
diff --git a/compiler/luci-interpreter/src/core/KernelParams.h b/compiler/luci-interpreter/src/core/KernelParams.h

index a32e0d4..65d1197 100644 (file)
--- a/compiler/luci-interpreter/src/core/KernelParams.h
+++ b/compiler/luci-interpreter/src/core/KernelParams.h
@@ -56,6 +56,11 @@ struct Conv2DParams
    Activation activation;
  };
  
+struct DepthToSpaceParams
+{
+  int block_size;
+};
+
  struct DepthwiseConv2DParams
  {
    Padding padding;
diff --git a/compiler/luci-interpreter/src/kernels/Add.cpp b/compiler/luci-interpreter/src/kernels/Add.cpp

index 9b93347..9ed155e 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Add.cpp
+++ b/compiler/luci-interpreter/src/kernels/Add.cpp
@@ -36,7 +36,10 @@ Add::Add(const Tensor *input1, const Tensor *input2, Tensor *output, const AddPa
  
  void Add::configure()
  {
-  assert(input1()->element_type() == input2()->element_type());
+  if (input1()->element_type() != input2()->element_type())
+  {
+    throw std::runtime_error("Input Tensor Data Type Mismatch.");
+  }
    output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/Add.test.cpp b/compiler/luci-interpreter/src/kernels/Add.test.cpp

index 54e1cc6..705b648 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Add.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Add.test.cpp
@@ -169,6 +169,33 @@ TEST(AddTest, Float)
    }
  }
  
+TEST(AddTest, Input_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2});
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  AddParams params{};
+  params.activation = Activation::RELU;
+
+  Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(AddTest, Invalid_Input_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1});
+  Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2});
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  AddParams params{};
+  params.activation = Activation::RELU;
+
+  Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  kernel.configure();
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
  } // namespace
  } // namespace kernels
  } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/CMakeLists.txt b/compiler/luci-interpreter/src/kernels/CMakeLists.txt

index fe36231..a1fd1de 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/kernels/CMakeLists.txt
@@ -12,6 +12,8 @@ set(SOURCES
      Concatenation.cpp
      Conv2D.h
      Conv2D.cpp
+    DepthToSpace.h
+    DepthToSpace.cpp
      DepthwiseConv2D.h
      DepthwiseConv2D.cpp
      Elu.h
@@ -40,6 +42,10 @@ set(SOURCES
      Pad.cpp
      Reshape.h
      Reshape.cpp
+    Reverse.h
+    Reverse.cpp
+    Slice.h
+    Slice.cpp
      Softmax.h
      Softmax.cpp
      SpaceToDepth.h
@@ -77,6 +83,7 @@ set(TEST_SOURCES
      AveragePool2D.test.cpp
      Concatenation.test.cpp
      Conv2D.test.cpp
+    DepthToSpace.test.cpp
      DepthwiseConv2D.test.cpp
      Elu.test.cpp
      FullyConnected.test.cpp
@@ -91,6 +98,8 @@ set(TEST_SOURCES
      Mul.test.cpp
      Pad.test.cpp
      Reshape.test.cpp
+    Reverse.test.cpp
+    Slice.test.cpp
      Softmax.test.cpp
      SpaceToDepth.test.cpp
      Split.test.cpp
diff --git a/compiler/luci-interpreter/src/kernels/DepthToSpace.cpp b/compiler/luci-interpreter/src/kernels/DepthToSpace.cpp

new file mode 100644 (file)

index 0000000..cab63e2
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/DepthToSpace.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthToSpace.h"
+#include "Utils.h"
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+DepthToSpace::DepthToSpace(const Tensor *input, Tensor *output, const DepthToSpaceParams &params)
+    : KernelWithParams<DepthToSpaceParams>({input}, {output}, params)
+{
+}
+
+void DepthToSpace::configure()
+{
+  if (input()->shape().num_dims() != 4)
+  {
+    throw std::runtime_error("Invalid input num_dims.");
+  }
+  if (output()->element_type() != DataType::FLOAT32 && output()->element_type() != DataType::U8 &&
+      output()->element_type() != DataType::S8 && output()->element_type() != DataType::S32 &&
+      output()->element_type() != DataType::S64)
+  {
+    throw std::runtime_error("Invalid output type");
+  }
+  if (input()->element_type() != output()->element_type())
+  {
+    throw std::runtime_error("Type mismatch on input and output.");
+  }
+  const int block_size = params().block_size;
+  const int32_t input_height = input()->shape().dim(1);
+  const int32_t input_width = input()->shape().dim(2);
+  const int32_t input_channels = input()->shape().dim(3);
+  int32_t output_height = input_height * block_size;
+  int32_t output_width = input_width * block_size;
+  int32_t output_channels = input_channels / block_size / block_size;
+
+  assert(input_height == output_height / block_size);
+  assert(input_width == output_width / block_size);
+  assert(input_channels == output_channels * block_size * block_size);
+
+  Shape output_shape(4);
+  output_shape.dim(0) = input()->shape().dim(0);
+  output_shape.dim(1) = output_height;
+  output_shape.dim(2) = output_width;
+  output_shape.dim(3) = output_channels;
+
+  output()->resize(output_shape);
+}
+
+void DepthToSpace::execute() const
+{
+  tflite::DepthToSpaceParams op_params;
+  op_params.block_size = params().block_size;
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::optimized_ops::DepthToSpace(op_params, getTensorShape(input()),
+                                          getTensorData<float>(input()), getTensorShape(output()),
+                                          getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      tflite::optimized_ops::DepthToSpace(op_params, getTensorShape(input()),
+                                          getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                          getTensorData<uint8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported Type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/DepthToSpace.h b/compiler/luci-interpreter/src/kernels/DepthToSpace.h

new file mode 100644 (file)

index 0000000..63ce376
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/DepthToSpace.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_DEPTHTOSPACE_H
+#define LUCI_INTERPRETER_KERNELS_DEPTHTOSPACE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <vector>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class DepthToSpace : public KernelWithParams<DepthToSpaceParams>
+{
+public:
+  DepthToSpace(const Tensor *input, Tensor *output, const DepthToSpaceParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_DEPTHTOSPACE_H
diff --git a/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp b/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp

new file mode 100644 (file)

index 0000000..1b80570
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/DepthToSpace.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T> class DepthToSpaceTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_CASE(DepthToSpaceTest, DataTypes);
+
+TYPED_TEST(DepthToSpaceTest, SimpleCase)
+{
+  std::vector<TypeParam> input_data{1, 2, 3, 4, 5, 6, 7, 8};
+  Shape input_shape{1, 1, 2, 4};
+  std::vector<TypeParam> output_data{1, 2, 5, 6, 3, 4, 7, 8};
+  std::vector<int32_t> output_shape{1, 2, 4, 1};
+
+  Tensor input_tensor = makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data);
+  Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>());
+
+  DepthToSpaceParams params{};
+  params.block_size = 2;
+
+  DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
+              ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp b/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp

index fad450d..f53eaca 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp
@@ -45,12 +45,9 @@ TEST(L2NormalizeTest, Float)
                ElementsAreArray(ArrayFloatNear(ref_output_data)));
  }
  
-TEST(L2NormalizeTest, Uint8Quantized)
-{
-  // TODO
-  // Implement GetDequantizedOutput Function.
-  // Create Test for Uint8 Case
-}
+// TODO Uint8Quantized
+// Implement GetDequantizedOutput Function.
+// Create Test for Uint8 Case
  
  } // namespace
  } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp b/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp

index b0c06e7..c79d3d6 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp
@@ -61,15 +61,14 @@ TEST(LeakReluTest, FloatSimple)
                     1.0f, -0.5f, -1.0f, // Row 2
                 },
                 /*alpha=*/0.5f, getElementType<float>());
-}
  
-TEST(LeakReluTest, Uint8Simple)
-{
-  // TODO
-  // Implement GetDequantizedOutput Function.
-  // Create Test for Uint8 Case
+  SUCCEED();
  }
  
+// TODO Uint8Simple
+// Implement GetDequantizedOutput Function.
+// Create Test for Uint8 Case
+
  } // namespace
  } // namespace kernels
  } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Logistic.test.cpp b/compiler/luci-interpreter/src/kernels/Logistic.test.cpp

index 17456a4..00feddf 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Logistic.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Logistic.test.cpp
@@ -49,10 +49,8 @@ TEST(LogisticTest, Float)
    // TODO make a Shape checking of output_tensor.
  }
  
-TEST(LogisticTest, Uint8)
-{
-  // Need to Implement GetDequantizedOutput Function.
-}
+// TODO Uint8
+// Need to Implement GetDequantizedOutput Function.
  
  } // namespace
  } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/Reverse.cpp b/compiler/luci-interpreter/src/kernels/Reverse.cpp

new file mode 100644 (file)

index 0000000..a463084
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Reverse.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Reverse.h"
+#include "kernels/Utils.h"
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Reverse::Reverse(const Tensor *input, const Tensor *axes, Tensor *output)
+    : Kernel({input, axes}, {output})
+{
+}
+
+void Reverse::configure()
+{
+  assert(axes()->shape().num_dims() == 1);
+  assert(input()->shape().num_dims() >= axes()->shape().num_elements());
+  if (input()->element_type() != DataType::S32 && input()->element_type() != DataType::FLOAT32 &&
+      input()->element_type() != DataType::U8 && input()->element_type() != DataType::S16 &&
+      input()->element_type() != DataType::S64)
+  {
+    throw std::runtime_error("Unsupported input type.");
+  }
+  if (axes()->element_type() != DataType::S32)
+  {
+    throw std::runtime_error("Unsupported axes type.");
+  }
+  if (axes()->shape().num_elements() > 1)
+  {
+    throw std::runtime_error("Current implementation does not support more than 1 axis.");
+  }
+  int axis_value = getTensorData<int32_t>(axes())[0];
+  if (axis_value < 0 || axis_value >= input()->shape().num_dims())
+  {
+    throw std::runtime_error("Invalid axes value");
+  }
+  assert(input()->element_type() == output()->element_type());
+
+  output()->resize(input()->shape());
+}
+
+void Reverse::execute() const
+{
+  int axis_value = getTensorData<int32_t>(axes())[0];
+  switch (output()->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::reference_ops::Reverse<float>(axis_value, getTensorShape(input()),
+                                            getTensorData<float>(input()), getTensorShape(output()),
+                                            getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      tflite::reference_ops::Reverse<uint8_t>(
+          axis_value, getTensorShape(input()), getTensorData<uint8_t>(input()),
+          getTensorShape(output()), getTensorData<uint8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported output type");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Reverse.h b/compiler/luci-interpreter/src/kernels/Reverse.h

new file mode 100644 (file)

index 0000000..3489dae
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Reverse.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_REVERSE_H
+#define LUCI_INTERPRETER_KERNELS_REVERSE_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Reverse : public Kernel
+{
+public:
+  Reverse(const Tensor *input, const Tensor *axes, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *axes() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_REVERSE_H
diff --git a/compiler/luci-interpreter/src/kernels/Reverse.test.cpp b/compiler/luci-interpreter/src/kernels/Reverse.test.cpp

new file mode 100644 (file)

index 0000000..5475a8b
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Reverse.test.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Reverse.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T> class ReverseTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_CASE(ReverseTest, DataTypes);
+
+TYPED_TEST(ReverseTest, MultiDimensions)
+{
+  // TypeParam
+  std::vector<TypeParam> input_data{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
+                                    13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24};
+  Shape input_shape{4, 3, 2};
+  std::vector<int32_t> axis_data{1};
+  Shape axis_shape{1};
+
+  std::vector<TypeParam> output_data{5,  6,  3,  4,  1,  2,  11, 12, 9,  10, 7,  8,
+                                     17, 18, 15, 16, 13, 14, 23, 24, 21, 22, 19, 20};
+  std::vector<int32_t> output_shape{4, 3, 2};
+
+  Tensor input_tensor = makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data);
+  Tensor axis_tensor = makeInputTensor<DataType::S32>(axis_shape, axis_data);
+
+  Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>());
+
+  Reverse kernel = Reverse(&input_tensor, &axis_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
+              ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Slice.cpp b/compiler/luci-interpreter/src/kernels/Slice.cpp

new file mode 100644 (file)

index 0000000..c4bc3c5
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Slice.cpp
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Slice.h"
+#include "Utils.h"
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+#include <cassert>
+#include <cstring>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+const int max_dim = 4;
+
+Slice::Slice(const Tensor *input, const Tensor *begin, const Tensor *size, Tensor *output)
+    : Kernel({input, begin, size}, {output})
+{
+}
+
+template <typename T>
+Shape calculateOutputShape(const Tensor *input, const Tensor *begin, const Tensor *size)
+{
+  Shape output_shape = Shape(input->shape().num_dims());
+  for (int idx = 0; idx < input->shape().num_dims(); idx++)
+  {
+    T size_value = getTensorData<T>(size)[idx];
+    if (size_value < 0)
+    {
+      if (size_value != -1)
+      {
+        throw std::runtime_error("Invalid size.");
+      }
+      size_value = input->shape().dim(idx) - getTensorData<T>(begin)[idx];
+    }
+    else
+    {
+      if (input->shape().dim(idx) < getTensorData<T>(begin)[idx] + size_value)
+      {
+        throw std::runtime_error("Invalid begin and size.");
+      }
+    }
+    output_shape.dim(idx) = static_cast<int>(size_value);
+  }
+  return output_shape;
+}
+
+template <typename T>
+void getBeginAndSizeVectors(int dimensions, const Tensor *begin, const Tensor *size,
+                            std::vector<int> *begins, std::vector<int> *sizes)
+{
+  for (int idx = dimensions - 1; idx >= 0; --idx)
+  {
+    begins->push_back(getTensorData<T>(begin)[idx]);
+    sizes->push_back(getTensorData<T>(size)[idx]);
+  }
+}
+
+void Slice::configure()
+{
+  assert(input()->element_type() == output()->element_type());
+  assert(begin()->element_type() == DataType::S32 || begin()->element_type() == DataType::S64);
+  assert(size()->element_type() == DataType::S32 || size()->element_type() == DataType::S64);
+  assert(begin()->shape().num_dims() == 1);
+  assert(size()->shape().num_dims() == 1);
+  assert(input()->shape().num_dims() <= max_dim);
+
+  if (begin()->element_type() == DataType::S32)
+  {
+    output()->resize(calculateOutputShape<int32_t>(input(), begin(), size()));
+  }
+  else if (begin()->element_type() == DataType::S64)
+  {
+    output()->resize(calculateOutputShape<int64_t>(input(), begin(), size()));
+  }
+  else
+  {
+    throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Slice::execute() const
+{
+  std::vector<int> begins;
+  begins.reserve(max_dim);
+  std::vector<int> sizes;
+  sizes.reserve(max_dim);
+  if (begin()->element_type() == DataType::S32)
+  {
+    getBeginAndSizeVectors<int32_t>(input()->shape().num_dims(), begin(), size(), &begins, &sizes);
+  }
+  else if (begin()->element_type() == DataType::S64)
+  {
+    getBeginAndSizeVectors<int64_t>(input()->shape().num_dims(), begin(), size(), &begins, &sizes);
+  }
+  else
+  {
+    throw std::runtime_error("Unsupported begin type.");
+  }
+  for (int i = input()->shape().num_dims(); i < max_dim; ++i)
+  {
+    begins.push_back(0);
+    sizes.push_back(1);
+  }
+
+  assert(begins.size() == 4);
+  assert(sizes.size() == 4);
+  tflite::SliceParams op_params{};
+  op_params.begin_count = 4;
+  op_params.size_count = 4;
+  for (int i = 0; i < 4; i++)
+  {
+    op_params.begin[i] = begins[3 - i];
+    op_params.size[i] = sizes[3 - i];
+  }
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::optimized_ops::Slice(op_params, getTensorShape(input()),
+                                   getTensorData<float>(input()), getTensorShape(output()),
+                                   getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      tflite::optimized_ops::Slice(op_params, getTensorShape(input()),
+                                   getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                   getTensorData<uint8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported input type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Slice.h b/compiler/luci-interpreter/src/kernels/Slice.h

new file mode 100644 (file)

index 0000000..23c3596
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Slice.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SLICE_H
+#define LUCI_INTERPRETER_KERNELS_SLICE_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Slice : public Kernel
+{
+public:
+  Slice(const Tensor *input, const Tensor *begin, const Tensor *size, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *begin() const { return _inputs[1]; }
+  const Tensor *size() const { return _inputs[2]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SLICE_H
diff --git a/compiler/luci-interpreter/src/kernels/Slice.test.cpp b/compiler/luci-interpreter/src/kernels/Slice.test.cpp

new file mode 100644 (file)

index 0000000..a360a29
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Slice.test.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Slice.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T> class SliceTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_CASE(SliceTest, DataTypes);
+
+TYPED_TEST(SliceTest, SimpleTest)
+{
+  std::vector<TypeParam> input_data{1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6};
+  Shape input_shape{3, 2, 3, 1};
+  std::vector<int32_t> begin_data{1, 0, 0, 0};
+  Shape begin_shape{4};
+  std::vector<int32_t> size_data{2, 1, -1, 1};
+  Shape size_shape{4};
+  std::vector<TypeParam> output_data{3, 3, 3, 5, 5, 5};
+  std::vector<int32_t> output_shape{2, 1, 3, 1};
+
+  Tensor input_tensor = makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data);
+  Tensor begin_tensor = makeInputTensor<DataType::S32>(begin_shape, begin_data);
+  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data);
+
+  Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>());
+
+  Slice kernel(&input_tensor, &begin_tensor, &size_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
+              ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp b/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp

index 3386d36..b8c0ac4 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp
@@ -68,6 +68,8 @@ TEST(TransposeConvTest, FloatSimple)
        /*output_data=*/{29, 62, 83, 75, 99, 192, 237, 198, 207, 372, 417, 330, 263, 446, 485, 365},
        /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1,
        getElementType<float>());
+
+  SUCCEED();
  }
  
  TEST(TransposeConvTest, FloatTwoFiltersTest)
@@ -82,21 +84,18 @@ TEST(TransposeConvTest, FloatTwoFiltersTest)
                         3352, 3652, 2760},
        /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1,
        getElementType<float>());
-}
  
-TEST(TransposeConvTest, Uint8Simple)
-{
-  // TODO
-  // Implement GetDequantizedOutput Function.
-  // Create Test for Uint8 Case
-}
-TEST(TransposeConvTest, Uint8FiltersTest)
-{
-  // TODO
-  // Implement GetDequantizedOutput Function.
-  // Create Test for Uint8 Case
+  SUCCEED();
  }
  
+// TODO Uint8Simple
+// Implement GetDequantizedOutput Function.
+// Create Test for Uint8 Case
+
+// TODO Uint8FiltersTest
+// Implement GetDequantizedOutput Function.
+// Create Test for Uint8 Case
+
  } // namespace
  } // namespace kernels
  } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/CMakeLists.txt b/compiler/luci-interpreter/src/loader/CMakeLists.txt

index fb36c4a..d99485d 100644 (file)
--- a/compiler/luci-interpreter/src/loader/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/loader/CMakeLists.txt
@@ -1,3 +1,5 @@
+nnas_find_package(GTest REQUIRED)
+
  set(SOURCES
      GraphLoader.h
      GraphLoader.cpp
@@ -13,3 +15,8 @@ target_include_directories(luci_interpreter_loader PUBLIC "${LUCI_INTERPRETER_SO
  target_link_libraries(luci_interpreter_loader
      PUBLIC luci_lang luci_interpreter_core
      PRIVATE luci_interpreter_kernels nncc_common)
+
+set(TEST_SOURCES KernelBuilder.test.cpp)
+
+GTest_AddTest(luci_interpreter_loader_test ${TEST_SOURCES})
+target_link_libraries(luci_interpreter_loader_test luci_interpreter_loader)
diff --git a/compiler/luci-interpreter/src/loader/GraphLoader.cpp b/compiler/luci-interpreter/src/loader/GraphLoader.cpp

index 779fa06..95c6547 100644 (file)
--- a/compiler/luci-interpreter/src/loader/GraphLoader.cpp
+++ b/compiler/luci-interpreter/src/loader/GraphLoader.cpp
@@ -16,7 +16,6 @@
  
  #include "loader/GraphLoader.h"
  
-#include "loader/ModuleLoader.h"
  #include "loader/KernelBuilder.h"
  
  #include <loco/IR/Algorithm.h>
@@ -70,9 +69,10 @@ bool isExecutableNode(const luci::CircleNode *node)
    switch (node->opcode())
    {
      // These nodes denote inputs / outputs of a graph.
-    case luci::CircleOpcode::CONST:
+    case luci::CircleOpcode::CIRCLECONST:
      case luci::CircleOpcode::CIRCLEINPUT:
      case luci::CircleOpcode::CIRCLEOUTPUT:
+    case luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE:
      // The following nodes denote outputs of multiple-output nodes.
      case luci::CircleOpcode::CIRCLEIFOUT:
      case luci::CircleOpcode::CIRCLESPLITOUT:
@@ -102,11 +102,12 @@ bool isTensorProducingNode(const luci::CircleNode *node)
  
  } // namespace
  
-GraphLoader::GraphLoader(const ModuleLoader &module_loader, const loco::Graph *graph,
-                         RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
-                         std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
-    : _module_loader(module_loader), _graph(graph), _runtime_graph(runtime_graph),
-      _runtime_to_ir(runtime_to_ir), _node_to_tensor(node_to_tensor)
+GraphLoader::GraphLoader(
+    const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
+    const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
+    std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
+    : _graph(graph), _runtime_graph(runtime_graph), _runtime_to_ir(runtime_to_ir),
+      _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor)
  {
  }
  
@@ -136,6 +137,7 @@ void GraphLoader::loadTensors()
        const luci::CircleQuantParam *params = node->quantparam();
        quantization.scale.assign(params->scale.cbegin(), params->scale.cend());
        quantization.zero_point.assign(params->zerop.cbegin(), params->zerop.cend());
+      quantization.quantized_dimension = params->quantized_dimension;
      }
  
      auto tensor = std::make_unique<Tensor>(node->dtype(), std::move(shape), std::move(quantization),
@@ -178,7 +180,7 @@ void GraphLoader::initInputOutputTensors() const
  
  void GraphLoader::loadOperators()
  {
-  KernelBuilder kernel_builder(_module_loader, *this);
+  KernelBuilder kernel_builder(_graph_to_runtime_graph, _node_to_tensor);
  
    // Create kernels for executable nodes. This has to be done in execution order.
    for (const loco::Node *loco_node :
@@ -195,11 +197,4 @@ void GraphLoader::loadOperators()
    }
  }
  
-void GraphLoader::load()
-{
-  loadTensors();
-  initInputOutputTensors();
-  loadOperators();
-}
-
  } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/GraphLoader.h b/compiler/luci-interpreter/src/loader/GraphLoader.h

index e0adc0f..89c5bca 100644 (file)
--- a/compiler/luci-interpreter/src/loader/GraphLoader.h
+++ b/compiler/luci-interpreter/src/loader/GraphLoader.h
@@ -27,29 +27,23 @@
  namespace luci_interpreter
  {
  
-class ModuleLoader;
-
  class GraphLoader
  {
  public:
-  GraphLoader(const ModuleLoader &module_loader, const loco::Graph *graph,
-              RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
+  GraphLoader(const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
+              const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
                std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor);
  
-  void load();
-
-  Tensor *getTensorForNode(const loco::Node *node) const { return _node_to_tensor.at(node); }
-
-private:
-  void loadOperators();
-  void initInputOutputTensors() const;
    void loadTensors();
+  void initInputOutputTensors() const;
+  void loadOperators();
  
-  const ModuleLoader &_module_loader;
+private:
    const loco::Graph *_graph;
    RuntimeGraph *_runtime_graph;
    RuntimeToIR &_runtime_to_ir;
  
+  const std::unordered_map<const loco::Graph *, RuntimeGraph *> &_graph_to_runtime_graph;
    std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor;
  };
  
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.cpp b/compiler/luci-interpreter/src/loader/KernelBuilder.cpp

index 56da961..12c7f45 100644 (file)
--- a/compiler/luci-interpreter/src/loader/KernelBuilder.cpp
+++ b/compiler/luci-interpreter/src/loader/KernelBuilder.cpp
@@ -21,6 +21,7 @@
  #include "kernels/AveragePool2D.h"
  #include "kernels/Concatenation.h"
  #include "kernels/Conv2D.h"
+#include "kernels/DepthToSpace.h"
  #include "kernels/DepthwiseConv2D.h"
  #include "kernels/Elu.h"
  #include "kernels/FullyConnected.h"
@@ -35,6 +36,8 @@
  #include "kernels/Mul.h"
  #include "kernels/Pad.h"
  #include "kernels/Reshape.h"
+#include "kernels/Reverse.h"
+#include "kernels/Slice.h"
  #include "kernels/Softmax.h"
  #include "kernels/SpaceToDepth.h"
  #include "kernels/Split.h"
@@ -43,8 +46,6 @@
  #include "kernels/Unpack.h"
  #include "kernels/Transpose.h"
  #include "kernels/TransposeConv.h"
-#include "loader/GraphLoader.h"
-#include "loader/ModuleLoader.h"
  
  #include <stdexcept>
  
@@ -68,20 +69,23 @@ static std::vector<const loco::Node *> collectOutputNodes(const luci::CircleNode
  
  const Tensor *KernelBuilder::getInputTensor(const loco::Node *node) const
  {
-  const Tensor *tensor = _graph_loader.getTensorForNode(node);
+  const Tensor *tensor = _node_to_tensor.at(node);
    assert(tensor != nullptr);
    return tensor;
  }
  
  const Tensor *KernelBuilder::getOptionalInputTensor(const loco::Node *node) const
  {
-  // TODO Revise this when optional inputs are implemented in the IR.
+  if (dynamic_cast<const luci::CircleOutputExclude *>(node))
+  {
+    return nullptr;
+  }
    return getInputTensor(node);
  }
  
  Tensor *KernelBuilder::getOutputTensor(const loco::Node *node) const
  {
-  Tensor *tensor = _graph_loader.getTensorForNode(node);
+  Tensor *tensor = _node_to_tensor.at(node);
    assert(tensor != nullptr);
    return tensor;
  }
@@ -98,7 +102,7 @@ KernelBuilder::getOutputTensors(const std::vector<const loco::Node *> &nodes) co
  
  RuntimeGraph *KernelBuilder::getRuntimeGraph(const loco::Graph *graph) const
  {
-  RuntimeGraph *runtime_graph = _module_loader.getRuntimeGraph(graph);
+  RuntimeGraph *runtime_graph = _graph_to_runtime_graph.at(graph);
    assert(runtime_graph != nullptr);
    return runtime_graph;
  }
@@ -120,14 +124,14 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleAdd *node)
  std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleArgMax *node)
  {
    assert(node->arity() == 2);
-  const Tensor *input1 = getInputTensor(node->input());
-  const Tensor *input2 = getInputTensor(node->dimension());
+  const Tensor *input = getInputTensor(node->input());
+  const Tensor *axis = getInputTensor(node->dimension());
    Tensor *output = getOutputTensor(node);
  
    ArgMaxParams params{};
    params.output_type = node->output_type();
  
-  return std::make_unique<kernels::ArgMax>(input1, input2, output, params);
+  return std::make_unique<kernels::ArgMax>(input, axis, output, params);
  }
  
  std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleAveragePool2D *node)
@@ -188,6 +192,19 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleConv2D *node)
    return std::make_unique<kernels::Conv2D>(input, filter, bias, output, params);
  }
  
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleDepthToSpace *node)
+{
+  assert(node->arity() == 1);
+
+  const Tensor *input = getInputTensor(node->input());
+  Tensor *output = getOutputTensor(node);
+
+  DepthToSpaceParams params{};
+  params.block_size = node->block_size();
+
+  return std::make_unique<kernels::DepthToSpace>(input, output, params);
+}
+
  std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleDepthwiseConv2D *node)
  {
    assert(node->arity() == 3);
@@ -224,14 +241,14 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleFullyConnected *n
    assert(node->arity() == 3);
  
    const Tensor *input = getInputTensor(node->input());
-  const Tensor *filter = getInputTensor(node->weights());
+  const Tensor *weights = getInputTensor(node->weights());
    const Tensor *bias = getOptionalInputTensor(node->bias());
    Tensor *output = getOutputTensor(node);
  
    FullyConnectedParams params{};
    params.activation = node->fusedActivationFunction();
  
-  return std::make_unique<kernels::FullyConnected>(input, filter, bias, output, params);
+  return std::make_unique<kernels::FullyConnected>(input, weights, bias, output, params);
  }
  
  std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleIf *node)
@@ -255,6 +272,11 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleIf *node)
                                         else_graph);
  }
  
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleInput *)
+{
+  throw std::runtime_error("Input node cannot be executed.");
+}
+
  std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleL2Normalize *node)
  {
    assert(node->arity() == 1);
@@ -323,11 +345,6 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleLogistic *node)
    return std::make_unique<kernels::Logistic>(input, output);
  }
  
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleInput *)
-{
-  throw std::runtime_error("Input node cannot be executed.");
-}
-
  std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleMaxPool2D *node)
  {
    assert(node->arity() == 1);
@@ -402,6 +419,30 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleReshape *node)
    return std::make_unique<kernels::Reshape>(input, shape, output);
  }
  
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleReverseV2 *node)
+{
+  assert(node->arity() == 2);
+
+  const Tensor *input = getInputTensor(node->tensor());
+  const Tensor *axes = getInputTensor(node->axis());
+  Tensor *output = getOutputTensor(node);
+
+  return std::make_unique<kernels::Reverse>(input, axes, output);
+}
+
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSlice *node)
+{
+  assert(node->arity() == 3);
+
+  const Tensor *input = getInputTensor(node->input());
+  const Tensor *begin = getInputTensor(node->begin());
+  const Tensor *size = getInputTensor(node->size());
+
+  Tensor *output = getOutputTensor(node);
+
+  return std::make_unique<kernels::Slice>(input, begin, size, output);
+}
+
  std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSoftmax *node)
  {
    assert(node->arity() == 1);
@@ -442,6 +483,19 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSplit *node)
    return std::make_unique<kernels::Split>(axis, input, std::move(outputs));
  }
  
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSqueeze *node)
+{
+  assert(node->arity() == 1);
+
+  const Tensor *input = getInputTensor(node->input());
+  Tensor *output = getOutputTensor(node);
+
+  SqueezeParams params{};
+  params.squeeze_dims = node->squeeze_dims();
+
+  return std::make_unique<kernels::Squeeze>(input, output, params);
+}
+
  std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleStridedSlice *node)
  {
    assert(node->arity() == 4);
@@ -463,21 +517,15 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleStridedSlice *nod
    return std::make_unique<kernels::StridedSlice>(input, begin, end, strides, output, params);
  }
  
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSqueeze *node)
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTranspose *node)
  {
-  assert(node->arity() == 1);
+  assert(node->arity() == 2);
  
-  const Tensor *input = getInputTensor(node->input());
+  const Tensor *input = getInputTensor(node->a());
+  const Tensor *perm = getInputTensor(node->perm());
    Tensor *output = getOutputTensor(node);
  
-  SqueezeParams params{};
-  assert(node->squeeze_dims().size() <= 4);
-  for (size_t i = 0; i < node->squeeze_dims().size(); i++)
-  {
-    params.squeeze_dims.push_back(node->squeeze_dims().at(i));
-  }
-
-  return std::make_unique<kernels::Squeeze>(input, output, params);
+  return std::make_unique<kernels::Transpose>(input, perm, output);
  }
  
  std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTransposeConv *node)
@@ -515,15 +563,4 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleUnpack *node)
    return std::make_unique<kernels::Unpack>(input, std::move(outputs), params);
  }
  
-std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTranspose *node)
-{
-  assert(node->arity() == 2);
-
-  const Tensor *input = getInputTensor(node->a());
-  const Tensor *perm = getInputTensor(node->perm());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::Transpose>(input, perm, output);
-}
-
  } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.h b/compiler/luci-interpreter/src/loader/KernelBuilder.h

index 7e30d39..d5c5a4b 100644 (file)
--- a/compiler/luci-interpreter/src/loader/KernelBuilder.h
+++ b/compiler/luci-interpreter/src/loader/KernelBuilder.h
@@ -24,18 +24,18 @@
  
  #include <memory>
  #include <vector>
+#include <unordered_map>
  
  namespace luci_interpreter
  {
  
-class GraphLoader;
-class ModuleLoader;
-
  class KernelBuilder : public luci::CircleNodeVisitor<std::unique_ptr<Kernel>>
  {
  public:
-  KernelBuilder(const ModuleLoader &module_loader, const GraphLoader &graph_loader)
-      : _module_loader(module_loader), _graph_loader(graph_loader)
+  KernelBuilder(
+      const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
+      const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
+      : _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor)
    {
    }
  
@@ -45,6 +45,7 @@ public:
    std::unique_ptr<Kernel> visit(const luci::CircleConcatenation *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleConv2D *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleConst *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleDepthToSpace *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleDepthwiseConv2D *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleElu *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleFullyConnected *node) override;
@@ -61,6 +62,8 @@ public:
    std::unique_ptr<Kernel> visit(const luci::CircleOutput *node) override;
    std::unique_ptr<Kernel> visit(const luci::CirclePad *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleReshape *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleReverseV2 *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleSlice *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleSoftmax *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleSpaceToDepth *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleSplit *node) override;
@@ -82,8 +85,8 @@ private:
    RuntimeGraph *getRuntimeGraph(const loco::Graph *graph) const;
  
  private:
-  const ModuleLoader &_module_loader;
-  const GraphLoader &_graph_loader;
+  const std::unordered_map<const loco::Graph *, RuntimeGraph *> &_graph_to_runtime_graph;
+  const std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor;
  };
  
  } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp b/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp

new file mode 100644 (file)

index 0000000..33bc8ec
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp
@@ -0,0 +1,743 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loader/GraphLoader.h"
+#include "loader/KernelBuilder.h"
+
+#include <kernels/Add.h>
+#include <kernels/ArgMax.h>
+#include <kernels/AveragePool2D.h>
+#include <kernels/Concatenation.h>
+#include <kernels/Conv2D.h>
+#include <kernels/DepthToSpace.h>
+#include <kernels/DepthwiseConv2D.h>
+#include <kernels/Elu.h>
+#include <kernels/FullyConnected.h>
+#include <kernels/L2Normalize.h>
+#include <kernels/L2Pool2D.h>
+#include <kernels/LeakyRelu.h>
+#include <kernels/LocalResponseNormalization.h>
+#include <kernels/Logistic.h>
+#include <kernels/MaxPool2D.h>
+#include <kernels/Mean.h>
+#include <kernels/Mul.h>
+#include <kernels/Pad.h>
+#include <kernels/Reshape.h>
+#include <kernels/Reverse.h>
+#include <kernels/Slice.h>
+#include <kernels/Softmax.h>
+#include <kernels/SpaceToDepth.h>
+#include <kernels/Split.h>
+#include <kernels/Squeeze.h>
+#include <kernels/StridedSlice.h>
+#include <kernels/Transpose.h>
+#include <kernels/TransposeConv.h>
+#include <kernels/Unpack.h>
+
+#include <gmock/gmock.h>
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class KernelBuilderTest : public Test
+{
+protected:
+  luci::CircleInput *createInputNode() { return createNode<luci::CircleInput>(); }
+
+  template <typename NodeT, typename... Args> NodeT *createNode(Args &&... args)
+  {
+    auto *node = _graph.nodes()->create<NodeT>(std::forward<Args>(args)...);
+    // The actual type does not matter for the purpose of the tests.
+    // NOTE The type is meaningless for nodes with multiple outputs (corresponding *Out nodes carry
+    //  actual output types).
+    node->dtype(loco::DataType::FLOAT32);
+    return node;
+  }
+
+  template <typename NodeOutT> NodeOutT *createNodeOut(loco::Node *node, int index)
+  {
+    auto *node_out = createNode<NodeOutT>();
+    node_out->input(node);
+    node_out->index(index);
+    return node_out;
+  }
+
+  template <typename KernelT> std::unique_ptr<KernelT> buildKernel(const luci::CircleNode *op)
+  {
+    std::unordered_map<const loco::Graph *, RuntimeGraph *> graph_to_runtime_graph;
+
+    RuntimeGraph runtime_graph(nullptr);
+    RuntimeToIR runtime_to_ir;
+    GraphLoader graph_loader(&_graph, &runtime_graph, runtime_to_ir, graph_to_runtime_graph,
+                             _node_to_tensor);
+    graph_loader.loadTensors();
+
+    KernelBuilder kernel_builder(graph_to_runtime_graph, _node_to_tensor);
+
+    auto kernel = op->accept(&kernel_builder);
+    return std::unique_ptr<KernelT>(dynamic_cast<KernelT *>(kernel.release()));
+  }
+
+  void checkTensor(const Tensor *tensor, const loco::Node *node)
+  {
+    EXPECT_THAT(tensor, Eq(_node_to_tensor.at(node)));
+  }
+
+private:
+  loco::Graph _graph;
+  std::unordered_map<const loco::Node *, Tensor *> _node_to_tensor;
+};
+
+TEST_F(KernelBuilderTest, Add)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CircleAdd>();
+  op->x(input1);
+  op->y(input2);
+
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::Add>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input1(), input1);
+  checkTensor(kernel->input2(), input2);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, ArgMax)
+{
+  auto *input = createInputNode();
+  auto *axis = createInputNode();
+
+  auto *op = createNode<luci::CircleArgMax>();
+  op->input(input);
+  op->dimension(axis);
+
+  op->output_type(loco::DataType::FLOAT32);
+
+  auto kernel = buildKernel<kernels::ArgMax>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->axis(), axis);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().output_type, Eq(op->output_type()));
+}
+
+TEST_F(KernelBuilderTest, AveragePool2D)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleAveragePool2D>();
+  op->value(input);
+
+  op->padding(luci::Padding::SAME);
+  op->filter()->h(11);
+  op->filter()->w(13);
+  op->stride()->h(17);
+  op->stride()->w(19);
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::AveragePool2D>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
+  EXPECT_THAT(kernel->params().filter_height, Eq(op->filter()->h()));
+  EXPECT_THAT(kernel->params().filter_width, Eq(op->filter()->w()));
+  EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
+  EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, Concatenation)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CircleConcatenation>(2);
+  op->values(0, input1);
+  op->values(1, input2);
+  op->axis(11);
+
+  auto kernel = buildKernel<kernels::Concatenation>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(0), input1);
+  checkTensor(kernel->input(1), input2);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().axis, Eq(op->axis()));
+}
+
+TEST_F(KernelBuilderTest, Conv2D)
+{
+  auto *input = createInputNode();
+  auto *filter = createInputNode();
+  auto *bias = createInputNode();
+
+  auto *op = createNode<luci::CircleConv2D>();
+  op->input(input);
+  op->filter(filter);
+  op->bias(bias);
+
+  op->padding(luci::Padding::SAME);
+  op->stride()->h(11);
+  op->stride()->w(13);
+  op->dilation()->h(17);
+  op->dilation()->w(19);
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::Conv2D>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->filter(), filter);
+  checkTensor(kernel->bias(), bias);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
+  EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
+  EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
+  EXPECT_THAT(kernel->params().dilation_height_factor, Eq(op->dilation()->h()));
+  EXPECT_THAT(kernel->params().dilation_width_factor, Eq(op->dilation()->w()));
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, DepthToSpace)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleDepthToSpace>();
+  op->input(input);
+
+  op->block_size(11);
+
+  auto kernel = buildKernel<kernels::DepthToSpace>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().block_size, Eq(op->block_size()));
+}
+
+TEST_F(KernelBuilderTest, DepthwiseConv2D)
+{
+  auto *input = createInputNode();
+  auto *filter = createInputNode();
+  auto *bias = createInputNode();
+
+  auto *op = createNode<luci::CircleDepthwiseConv2D>();
+  op->input(input);
+  op->filter(filter);
+  op->bias(bias);
+
+  op->padding(luci::Padding::SAME);
+  op->depthMultiplier(11);
+  op->stride()->h(13);
+  op->stride()->w(17);
+  op->dilation()->h(19);
+  op->dilation()->w(23);
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::DepthwiseConv2D>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->filter(), filter);
+  checkTensor(kernel->bias(), bias);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
+  EXPECT_THAT(kernel->params().depth_multiplier, Eq(op->depthMultiplier()));
+  EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
+  EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
+  EXPECT_THAT(kernel->params().dilation_height_factor, Eq(op->dilation()->h()));
+  EXPECT_THAT(kernel->params().dilation_width_factor, Eq(op->dilation()->w()));
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, Elu)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleElu>();
+  op->features(input);
+
+  auto kernel = buildKernel<kernels::Elu>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, FullyConnected)
+{
+  auto *input = createInputNode();
+  auto *weights = createInputNode();
+  auto *bias = createInputNode();
+
+  auto *op = createNode<luci::CircleFullyConnected>();
+  op->input(input);
+  op->weights(weights);
+  op->bias(bias);
+
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::FullyConnected>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->weights(), weights);
+  checkTensor(kernel->bias(), bias);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, L2Normalize)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleL2Normalize>();
+  op->x(input);
+
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::L2Normalize>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, L2Pool2D)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleL2Pool2D>();
+  op->value(input);
+
+  op->padding(luci::Padding::SAME);
+  op->filter()->h(11);
+  op->filter()->w(13);
+  op->stride()->h(17);
+  op->stride()->w(19);
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::L2Pool2D>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
+  EXPECT_THAT(kernel->params().filter_height, Eq(op->filter()->h()));
+  EXPECT_THAT(kernel->params().filter_width, Eq(op->filter()->w()));
+  EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
+  EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, LeakyRelu)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleLeakyRelu>();
+  op->features(input);
+
+  op->alpha(11.0f);
+
+  auto kernel = buildKernel<kernels::LeakyRelu>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().alpha, Eq(op->alpha()));
+}
+
+TEST_F(KernelBuilderTest, LocalResponseNormalization)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleLocalResponseNormalization>();
+  op->input(input);
+
+  op->radius(11);
+  op->bias(13.0f);
+  op->alpha(15.0f);
+  op->beta(17.0f);
+
+  auto kernel = buildKernel<kernels::LocalResponseNormalization>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().radius, Eq(op->radius()));
+  EXPECT_THAT(kernel->params().bias, Eq(op->bias()));
+  EXPECT_THAT(kernel->params().alpha, Eq(op->alpha()));
+  EXPECT_THAT(kernel->params().beta, Eq(op->beta()));
+}
+
+TEST_F(KernelBuilderTest, Logistic)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleLogistic>();
+  op->x(input);
+
+  auto kernel = buildKernel<kernels::Logistic>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, MaxPool2D)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleMaxPool2D>();
+  op->value(input);
+
+  op->padding(luci::Padding::SAME);
+  op->filter()->h(11);
+  op->filter()->w(13);
+  op->stride()->h(17);
+  op->stride()->w(19);
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::MaxPool2D>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
+  EXPECT_THAT(kernel->params().filter_height, Eq(op->filter()->h()));
+  EXPECT_THAT(kernel->params().filter_width, Eq(op->filter()->w()));
+  EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
+  EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, Mean)
+{
+  auto *input = createInputNode();
+  auto *axes = createInputNode();
+
+  auto *op = createNode<luci::CircleMean>();
+  op->input(input);
+  op->reduction_indices(axes);
+
+  op->keep_dims(true);
+
+  auto kernel = buildKernel<kernels::Mean>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->axes(), axes);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().keep_dims, Eq(op->keep_dims()));
+}
+
+TEST_F(KernelBuilderTest, Mul)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CircleMul>();
+  op->x(input1);
+  op->y(input2);
+
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::Mul>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input1(), input1);
+  checkTensor(kernel->input2(), input2);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, Pad)
+{
+  auto *input = createInputNode();
+  auto *paddings = createInputNode();
+
+  auto *op = createNode<luci::CirclePad>();
+  op->input(input);
+  op->paddings(paddings);
+
+  auto kernel = buildKernel<kernels::Pad>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->paddings(), paddings);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Reshape)
+{
+  auto *input = createInputNode();
+  auto *shape = createInputNode();
+
+  auto *op = createNode<luci::CircleReshape>();
+  op->tensor(input);
+  op->shape(shape);
+
+  auto kernel = buildKernel<kernels::Reshape>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->shape(), shape);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, ReverseV2)
+{
+  auto *input = createInputNode();
+  auto *axes = createInputNode();
+
+  auto *op = createNode<luci::CircleReverseV2>();
+  op->tensor(input);
+  op->axis(axes);
+
+  auto kernel = buildKernel<kernels::Reverse>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->axes(), axes);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Slice)
+{
+  auto *input = createInputNode();
+  auto *begin = createInputNode();
+  auto *size = createInputNode();
+
+  auto *op = createNode<luci::CircleSlice>();
+  op->input(input);
+  op->begin(begin);
+  op->size(size);
+
+  auto kernel = buildKernel<kernels::Slice>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->begin(), begin);
+  checkTensor(kernel->size(), size);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Softmax)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleSoftmax>();
+  op->logits(input);
+
+  op->beta(11.0f);
+
+  auto kernel = buildKernel<kernels::Softmax>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().beta, Eq(op->beta()));
+}
+
+TEST_F(KernelBuilderTest, SpaceToDepth)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleSpaceToDepth>();
+  op->input(input);
+
+  op->block_size(11);
+
+  auto kernel = buildKernel<kernels::SpaceToDepth>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().block_size, op->block_size());
+}
+
+TEST_F(KernelBuilderTest, Split)
+{
+  auto *axis = createInputNode();
+  auto *input = createInputNode();
+  auto *op = createNode<luci::CircleSplit>();
+  auto *output1 = createNodeOut<luci::CircleSplitOut>(op, 0);
+  auto *output2 = createNodeOut<luci::CircleSplitOut>(op, 1);
+
+  op->split_dim(axis);
+  op->input(input);
+
+  op->num_split(2);
+
+  auto kernel = buildKernel<kernels::Split>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->axis(), axis);
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(0), output1);
+  checkTensor(kernel->output(1), output2);
+}
+
+TEST_F(KernelBuilderTest, Squeeze)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleSqueeze>();
+  op->input(input);
+
+  op->squeeze_dims({11, 13});
+
+  auto kernel = buildKernel<kernels::Squeeze>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().squeeze_dims, ElementsAreArray(op->squeeze_dims()));
+}
+
+TEST_F(KernelBuilderTest, StridedSlice)
+{
+  auto *input = createInputNode();
+  auto *begin = createInputNode();
+  auto *end = createInputNode();
+  auto *strides = createInputNode();
+
+  auto *op = createNode<luci::CircleStridedSlice>();
+  op->input(input);
+  op->begin(begin);
+  op->end(end);
+  op->strides(strides);
+
+  op->begin_mask(11);
+  op->ellipsis_mask(13);
+  op->end_mask(17);
+  op->new_axis_mask(19);
+  op->shrink_axis_mask(23);
+
+  auto kernel = buildKernel<kernels::StridedSlice>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->begin(), begin);
+  checkTensor(kernel->end(), end);
+  checkTensor(kernel->strides(), strides);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().begin_mask, Eq(op->begin_mask()));
+  EXPECT_THAT(kernel->params().ellipsis_mask, Eq(op->ellipsis_mask()));
+  EXPECT_THAT(kernel->params().end_mask, Eq(op->end_mask()));
+  EXPECT_THAT(kernel->params().new_axis_mask, Eq(op->new_axis_mask()));
+  EXPECT_THAT(kernel->params().shrink_axis_mask, Eq(op->shrink_axis_mask()));
+}
+
+TEST_F(KernelBuilderTest, Transpose)
+{
+  auto *input = createInputNode();
+  auto *perm = createInputNode();
+
+  auto *op = createNode<luci::CircleTranspose>();
+  op->a(input);
+  op->perm(perm);
+
+  auto kernel = buildKernel<kernels::Transpose>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->perm(), perm);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, TransposeConv)
+{
+  auto *output_shape = createInputNode();
+  auto *filter = createInputNode();
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleTransposeConv>();
+  op->inputSizes(output_shape);
+  op->filter(filter);
+  op->outBackprop(input);
+
+  op->padding(luci::Padding::SAME);
+  op->stride()->h(11);
+  op->stride()->w(13);
+
+  auto kernel = buildKernel<kernels::TransposeConv>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->output_shape(), output_shape);
+  checkTensor(kernel->filter(), filter);
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
+  EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
+  EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
+}
+
+TEST_F(KernelBuilderTest, Unpack)
+{
+  auto *input = createInputNode();
+  auto *op = createNode<luci::CircleUnpack>();
+  auto *output1 = createNodeOut<luci::CircleUnpackOut>(op, 0);
+  auto *output2 = createNodeOut<luci::CircleUnpackOut>(op, 1);
+
+  op->value(input);
+
+  op->num(2);
+  op->axis(11);
+
+  auto kernel = buildKernel<kernels::Unpack>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(0), output1);
+  checkTensor(kernel->output(1), output2);
+  EXPECT_THAT(kernel->params().axis, Eq(op->axis()));
+}
+
+TEST_F(KernelBuilderTest, NonExisting1_NEG)
+{
+  auto *op = createNode<luci::CircleConst>();
+  ASSERT_ANY_THROW(buildKernel<Kernel>(op));
+}
+
+TEST_F(KernelBuilderTest, NonExisting2_NEG)
+{
+  auto *op = createNode<luci::CircleInput>();
+  ASSERT_ANY_THROW(buildKernel<Kernel>(op));
+}
+
+TEST_F(KernelBuilderTest, NonExisting3_NEG)
+{
+  auto *op = createNode<luci::CircleOutput>();
+  ASSERT_ANY_THROW(buildKernel<Kernel>(op));
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/ModuleLoader.cpp b/compiler/luci-interpreter/src/loader/ModuleLoader.cpp

index 7780a61..b9a2ae0 100644 (file)
--- a/compiler/luci-interpreter/src/loader/ModuleLoader.cpp
+++ b/compiler/luci-interpreter/src/loader/ModuleLoader.cpp
@@ -41,8 +41,11 @@ void ModuleLoader::load()
    {
      const loco::Graph *graph = _module->graph(i);
      RuntimeGraph *runtime_graph = _graph_to_runtime_graph.at(graph);
-    GraphLoader loader(*this, graph, runtime_graph, _runtime_to_ir, _node_to_tensor);
-    loader.load();
+    GraphLoader loader(graph, runtime_graph, _runtime_to_ir, _graph_to_runtime_graph,
+                       _node_to_tensor);
+    loader.loadTensors();
+    loader.initInputOutputTensors();
+    loader.loadOperators();
    }
  }
  
diff --git a/compiler/luci-interpreter/src/loader/ModuleLoader.h b/compiler/luci-interpreter/src/loader/ModuleLoader.h

index 954dbfb..1af0ed7 100644 (file)
--- a/compiler/luci-interpreter/src/loader/ModuleLoader.h
+++ b/compiler/luci-interpreter/src/loader/ModuleLoader.h
@@ -36,11 +36,6 @@ public:
  
    void load();
  
-  RuntimeGraph *getRuntimeGraph(const loco::Graph *graph) const
-  {
-    return _graph_to_runtime_graph.at(graph);
-  }
-
  private:
    const luci::Module *_module;
    RuntimeModule *_runtime_module;
diff --git a/compiler/luci-value-test/CMakeLists.txt b/compiler/luci-value-test/CMakeLists.txt

index 3a5c42b..ec74634 100644 (file)
--- a/compiler/luci-value-test/CMakeLists.txt
+++ b/compiler/luci-value-test/CMakeLists.txt
@@ -20,6 +20,6 @@ add_test(NAME luci_value_test
    COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverify.sh"
            "${CMAKE_CURRENT_BINARY_DIR}"
            "${ARTIFACTS_BIN_PATH}"
-          "${NNCC_OVERLAY_DIR}/venv_1_13_2"
+          "${NNCC_OVERLAY_DIR}/venv_2_3_0"
            ${LUCI_VALUE_TESTS}
  )
diff --git a/compiler/luci-value-test/evalverify.sh b/compiler/luci-value-test/evalverify.sh

index dfd55a6..12c9a45 100755 (executable)
--- a/compiler/luci-value-test/evalverify.sh
+++ b/compiler/luci-value-test/evalverify.sh
@@ -4,8 +4,10 @@
  #
  # HOW TO USE
  #
-# ./evalverify.sh <path/to/work_dir> <TEST 1> <TEST 2> ...
-# work_dir : build directory of luci-value-test (ex: build/compiler/luci-value-test)
+# ./evalverify.sh <path/to/bin_dir> <path/to/work_dir> <path/to/venv_dir> <TEST 1> <TEST 2> ...
+# bin_dir  : build directory of luci-value-test (ex: build/compiler/luci-value-test)
+# work_dir : artifacts directoy where test materials exist
+# venv_dir : python virtual environment home directory
  
  VERIFY_SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  VERIFY_SCRIPT_PATH="${VERIFY_SOURCE_PATH}/luci_eval_verifier.py"
diff --git a/compiler/luci-value-test/luci_eval_verifier.py b/compiler/luci-value-test/luci_eval_verifier.py

index 6999110..7a2cebb 100755 (executable)
--- a/compiler/luci-value-test/luci_eval_verifier.py
+++ b/compiler/luci-value-test/luci_eval_verifier.py
@@ -35,6 +35,10 @@ for i in range(num_inputs):
          input_data = np.array(
              np.random.randint(0, 256, size=input_details["shape"]),
              input_details["dtype"])
+    elif input_details["dtype"] == np.bool_:
+        input_data = np.array(
+            np.random.choice(a=[True, False], size=input_details["shape"]),
+            input_details["dtype"])
      else:
          raise SystemExit("Unsupported input dtype")
  
@@ -44,11 +48,6 @@ for i in range(num_inputs):
  # Do inference
  interpreter.invoke()
  
-# Get reference output data.
-assert len(interpreter.get_output_details()) == 1  # TODO: Support multiple outputs
-output_details = interpreter.get_output_details()[0]
-ref_output_data = interpreter.get_tensor(output_details["index"])
-
  # Execute luci interpreter.
  subprocess.run(
      [
@@ -56,27 +55,56 @@ subprocess.run(
          str(num_inputs), circle_model + ".input", circle_model + ".output"
      ],
      check=True)
-output_data = np.fromfile(circle_model + ".output", output_details["dtype"])
-shape_file = open(circle_model + ".output.shape", 'r')
-output_shape = [int(i) for i in shape_file.read().split(',')]
-shape_file.close()
-luci_output_data = np.reshape(output_data, output_shape)
  
  # Compare the results.
-try:
-    if output_details["dtype"] == np.uint8:
-        if np.allclose(luci_output_data, ref_output_data, rtol=0, atol=0) == False:
-            raise SystemExit("Execution result of " + tflite_model +
-                             " does not match with " + circle_model)
-    elif output_details["dtype"] == np.float32:
-        if np.allclose(
-                luci_output_data, ref_output_data, rtol=1.e-5, atol=1.e-5) == False:
-            raise SystemExit("Execution result of " + tflite_model +
-                             " does not match with " + circle_model)
-    else:
-        raise SystemExit("Unsupported data type: ", output_details["dtype"])
-except:
-    print(traceback.format_exc())
-    quit(255)
+for idx in range(len(interpreter.get_output_details())):
+    output_details = interpreter.get_output_details()[idx]
+    output_data = np.fromfile(circle_model + ".output" + str(idx),
+                              output_details["dtype"])
+    shape_file = open(circle_model + ".output" + str(idx) + ".shape", 'r')
+    output_shape = [int(i) for i in shape_file.read().split(',')]
+    luci_output_data = np.reshape(output_data, output_shape)
+    try:
+        if output_details["dtype"] == np.uint8:
+            if np.allclose(
+                    luci_output_data,
+                    interpreter.get_tensor(
+                        interpreter.get_output_details()[idx]["index"]),
+                    rtol=0,
+                    atol=0) == False:
+                raise SystemExit("Execution result of " + tflite_model +
+                                 " does not match with " + circle_model)
+        elif output_details["dtype"] == np.float32:
+            if np.allclose(
+                    luci_output_data,
+                    interpreter.get_tensor(
+                        interpreter.get_output_details()[idx]["index"]),
+                    rtol=1.e-5,
+                    atol=1.e-5) == False:
+                raise SystemExit("Execution result of " + tflite_model +
+                                 " does not match with " + circle_model)
+        elif output_details["dtype"] == np.int64:
+            if np.allclose(
+                    luci_output_data,
+                    interpreter.get_tensor(
+                        interpreter.get_output_details()[idx]["index"]),
+                    rtol=0,
+                    atol=0) == False:
+                raise SystemExit("Execution result of " + tflite_model +
+                                 " does not match with " + circle_model)
+        elif output_details["dtype"] == np.int32:
+            if np.allclose(
+                    luci_output_data,
+                    interpreter.get_tensor(
+                        interpreter.get_output_details()[idx]["index"]),
+                    rtol=0,
+                    atol=0) == False:
+                raise SystemExit("Execution result of " + tflite_model +
+                                 " does not match with " + circle_model)
+        else:
+            raise SystemExit("Unsupported data type: ", output_details["dtype"])
+    except:
+        print(traceback.format_exc())
+        quit(255)
  
  quit(0)
diff --git a/compiler/luci-value-test/test.lst b/compiler/luci-value-test/test.lst

index 6a332f9..0e5231e 100644 (file)
--- a/compiler/luci-value-test/test.lst
+++ b/compiler/luci-value-test/test.lst
@@ -1,81 +1,183 @@
  #addeval(Abs_000)
  addeval(Add_000)
+#addeval(Add_001)
  addeval(Add_U8_000)
-#addeval(ArgMax_000)
-#addeval(ArgMax_001)
-#addeval(ArgMax_002)
-#addeval(ArgMax_003)
-#addeval(ArgMax_U8_000)
-#addeval(ArgMax_U8_001)
-#addeval(ArgMax_U8_002)
-#addeval(ArgMax_U8_003)
+#addeval(AddN_000)
+addeval(ArgMax_000)
+addeval(ArgMax_001)
+addeval(ArgMax_002)
+addeval(ArgMax_003)
+addeval(ArgMax_U8_000)
+addeval(ArgMax_U8_001)
+addeval(ArgMax_U8_002)
+addeval(ArgMax_U8_003)
+#addeval(ArgMin_000)
+#addeval(ArgMin_001)
+#addeval(ArgMin_002)
+#addeval(ArgMin_003)
+#addeval(ArgMin_U8_000)
+#addeval(ArgMin_U8_001)
+#addeval(ArgMin_U8_002)
+#addeval(ArgMin_U8_003)
  addeval(AveragePool2D_000)
+#addeval(BatchMatMul_000)
  #addeval(BatchMatMulV2_000)
  #addeval(BatchMatMulV2_001)
  #addeval(BatchToSpaceND_000)
  #addeval(Cast_000)
+#addeval(Cast_001)
+#addeval(Ceil_000)
  addeval(Concatenation_000)
  addeval(Concatenation_U8_000)
  addeval(Conv2D_000)
  addeval(Conv2D_001)
  addeval(Conv2D_002)
+#addeval(Conv2D_003)
  addeval(Conv2D_U8_000)
  addeval(Conv2D_U8_001)
  #addeval(Cos_000)
+#addeval(DepthToSpace_000)
  addeval(DepthwiseConv2D_000)
  addeval(DepthwiseConv2D_U8_000)
+#addeval(DepthwiseConv2D_U8_001)
+addeval(DepthwiseConv2D_001)
  #addeval(Div_000)
+addeval(ELU_000)
  #addeval(Equal_000)
  #addeval(Exp_000)
+#addeval(ExpandDims_000)
+#addeval(ExpandDims_001)
+#addeval(ExpandDims_002)
+#addeval(ExpandDims_003)
+#addeval(Fill_000)
+#addeval(Fill_001)
+#addeval(Floor_000)
+#addeval(FloorDiv_000)
+#addeval(FloorDiv_001)
+#addeval(FloorMod_000)
+#addeval(FloorMod_001)
  addeval(FullyConnected_000)
  addeval(FullyConnected_001)
-#addeval(FullyConnected_002)
+addeval(FullyConnected_002)
  #addeval(FullyConnected_U8_000)
  #addeval(Gather_000)
-#addeval(If_000)
-#addeval(If_001)
+#addeval(GatherNd_000)
+#addeval(Greater_000)
+#addeval(GreaterEqual_000)
+addeval(If_000)
+addeval(If_001)
+addeval(L2Normalize_000)
+addeval(L2Pool2D_000)
+#addeval(L2Pool2D_U8_000)
+addeval(LeakyRelu_000)
+#addeval(Less_000)
+#addeval(LessEqual_000)
+addeval(LocalResponseNormalization_000)
+#addeval(Log_000)
+#addeval(LogicalAnd_000)
  #addeval(LogicalNot_000)
  #addeval(LogicalOr_000)
-#addeval(Logistic_000)
+addeval(Logistic_000)
+#addeval(LogSoftmax_000)
+#addeval(MatMul_000)
+#addeval(MatrixDiag_000)
+#addeval(MatrixSetDiag_000)
+#addeval(Maximum_000)
  addeval(MaxPool2D_000)
  addeval(MaxPool2D_U8_000)
  addeval(Mean_000)
  addeval(Mean_001)
-addeval(Mean_U8_000)
+#addeval(Mean_U8_000)
+#addeval(Minimum_000)
+#addeval(MirrorPad_000)
  addeval(Mul_000)
  #addeval(Mul_U8_000)
+#addeval(Neg_000)
+#addeval(NotEqual_000)
+#addeval(OneHot_000)
+#addeval(OneHot_001)
+#addeval(OneHot_002)
+#addeval(OneHot_003)
  #addeval(Pack_000)
  #addeval(Pack_U8_000)
  addeval(Pad_000)
  addeval(Pad_U8_000)
+#addeval(Pow_000)
+#addeval(PRelu_000)
+#addeval(Range_000)
+#addeval(Rank_000)
+#addeval(ReduceAny_000)
+#addeval(ReduceAny_001)
+#addeval(ReduceAny_002)
+#addeval(ReduceAny_003)
+#addeval(ReduceMax_000)
+#addeval(ReduceMin_000)
  #addeval(ReduceProd_000)
  #addeval(ReduceProd_001)
  #addeval(ReduceProd_002)
  #addeval(ReduceProd_003)
  #addeval(ReLU_000)
+#addeval(ReLU6_000)
+#addeval(ReLUN1To1_000)
  addeval(Reshape_000)
  addeval(Reshape_001)
  addeval(Reshape_002)
  #addeval(Reshape_003)
  addeval(Reshape_U8_000)
+#addeval(ResizeBilinear_000)
+#addeval(ResizeNearestNeighbor_000)
+#addeval(ReverseSequence_000)
+#addeval(ReverseV2_000)
+#addeval(Round_000)
  #addeval(Rsqrt_000)
+#addeval(ScatterNd_000)
+#addeval(SegmentSum_000)
+#addeval(Select_000)
+#addeval(Select_001)
+#addeval(Select_002)
+#addeval(SelectV2_000)
+#addeval(SelectV2_001)
+#addeval(SelectV2_002)
+#addeval(Shape_000)
  #addeval(Sin_000)
+addeval(Slice_000)
  addeval(Softmax_000)
  #addeval(Softmax_U8_000)
  #addeval(SpaceToBatchND_000)
  #addeval(SpaceToBatchND_001)
  #addeval(SpaceToBatchND_002)
  #addeval(SpaceToBatchND_003)
-#addeval(StridedSlice_000)
-#addeval(StridedSlice_001)
+addeval(SpaceToDepth_000)
+#addeval(SparseToDense_000)
+addeval(Split_000)
+#addeval(SplitV_000)
+#addeval(Sqrt_000)
+#addeval(Square_000)
+#addeval(SquaredDifference_000)
+addeval(Squeeze_000)
+addeval(StridedSlice_000)
+addeval(StridedSlice_001)
+addeval(StridedSlice_002)
  #addeval(Sub_000)
  #addeval(Sub_U8_000)
+#addeval(Sum_000)
+#addeval(Sum_001)
  #addeval(Tanh_000)
  #addeval(Tile_000)
  #addeval(Tile_U8_000)
-#addeval(Transpose_000)
-#addeval(Unpack_000)
-#addeval(Unpack_001)
-#addeval(Unpack_002)
+#addeval(TopKV2_000)
+#addeval(TopKV2_001)
+addeval(Transpose_000)
+#addeval(TransposeConv_000)
+addeval(Unpack_000)
+addeval(Unpack_001)
+addeval(Unpack_002)
+addeval(Unpack_003)
+#addeval(Where_000)
+#addeval(Where_001)
  #addeval(While_000)
  #addeval(While_001)
+#addeval(While_002)
+#addeval(While_003)
+#addeval(YUV_TO_RGB_U8_000)
+#addeval(ZerosLike_000)
diff --git a/compiler/luci-value-test/tester/src/EvalTester.cpp b/compiler/luci-value-test/tester/src/EvalTester.cpp

index 58f62f5..09eef22 100644 (file)
--- a/compiler/luci-value-test/tester/src/EvalTester.cpp
+++ b/compiler/luci-value-test/tester/src/EvalTester.cpp
@@ -129,7 +129,7 @@ int entry(int argc, char **argv)
    assert(num_inputs == input_nodes.size());
    for (int32_t i = 0; i < num_inputs; i++)
    {
-    const auto *input_node = dynamic_cast<const luci::CircleInput *>(input_nodes[i]);
+    const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[i]);
      std::vector<char> input_data(getTensorSize(input_node));
      readDataFromFile(std::string(input_prefix) + std::to_string(i), input_data.data(),
                       input_data.size());
@@ -141,24 +141,35 @@ int entry(int argc, char **argv)
  
    // Get output.
    const auto output_nodes = loco::output_nodes(module->graph());
-  // TODO: Support multiple outputs
-  assert(output_nodes.size() == 1);
-  const auto *output_node = dynamic_cast<const luci::CircleOutput *>(output_nodes[0]);
-  std::vector<char> output_data(getTensorSize(output_node));
-  interpreter.readOutputTensor(output_node, output_data.data(), output_data.size());
-
-  // Output data is written in ${output_file}
-  // (ex: Add.circle.output)
-  // Output shape is written in ${output_file}.shape
-  // (ex: Add.circle.output.shape)
-  // TODO: Use HDF5 file format
-  writeDataToFile(output_file, output_data.data(), output_data.size());
-  auto shape_str = std::to_string(output_node->dim(0).value());
-  for (int i = 1; i < output_node->rank(); i++)
+  for (int i = 0; i < module->graph()->outputs()->size(); i++)
    {
-    shape_str += ",";
-    shape_str += std::to_string(output_node->dim(i).value());
+    const auto *output_node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]);
+    std::vector<char> output_data(getTensorSize(output_node));
+    interpreter.readOutputTensor(output_node, output_data.data(), output_data.size());
+
+    // Output data is written in ${output_file}
+    // (ex: Add.circle.output0)
+    // Output shape is written in ${output_file}.shape
+    // (ex: Add.circle.output0.shape)
+    writeDataToFile(std::string(output_file) + std::to_string(i), output_data.data(),
+                    output_data.size());
+    // In case of Tensor output is Scalar value.
+    // The output tensor with rank 0 is treated as a scalar with shape (1)
+    if (output_node->rank() == 0)
+    {
+      writeDataToFile(std::string(output_file) + std::to_string(i) + ".shape", "1", 1);
+    }
+    else
+    {
+      auto shape_str = std::to_string(output_node->dim(0).value());
+      for (int j = 1; j < output_node->rank(); j++)
+      {
+        shape_str += ",";
+        shape_str += std::to_string(output_node->dim(j).value());
+      }
+      writeDataToFile(std::string(output_file) + std::to_string(i) + ".shape", shape_str.c_str(),
+                      shape_str.size());
+    }
    }
-  writeDataToFile(std::string(output_file) + ".shape", shape_str.c_str(), shape_str.size());
    return EXIT_SUCCESS;
  }
diff --git a/compiler/luci/export/src/CircleOperationExporter.cpp b/compiler/luci/export/src/CircleOperationExporter.cpp

index 3c01b67..bca1220 100644 (file)
--- a/compiler/luci/export/src/CircleOperationExporter.cpp
+++ b/compiler/luci/export/src/CircleOperationExporter.cpp
@@ -102,6 +102,7 @@ public:
    void visit(luci::CircleMirrorPad *) final;
    void visit(luci::CircleMul *) final;
    void visit(luci::CircleNeg *) final;
+  void visit(luci::CircleNonMaxSuppressionV4 *) final;
    void visit(luci::CircleNotEqual *) final;
    void visit(luci::CircleOneHot *) final;
    void visit(luci::CirclePack *) final;
@@ -149,6 +150,7 @@ public:
    void visit(luci::CircleTopKV2 *) final;
    void visit(luci::CircleTranspose *) final;
    void visit(luci::CircleTransposeConv *) final;
+  void visit(luci::CircleUnique *) final;
    void visit(luci::CircleUnpack *) final;
    void visit(luci::CircleWhere *) final;
    void visit(luci::CircleWhile *) final;
@@ -165,9 +167,11 @@ public:
    // Virtual for multiple-outputs
    void visit(luci::CircleCustomOut *) final {}
    void visit(luci::CircleIfOut *) final {}
+  void visit(luci::CircleNonMaxSuppressionV4Out *) final {}
    void visit(luci::CircleSplitOut *) final {}
    void visit(luci::CircleSplitVOut *) final {}
    void visit(luci::CircleTopKV2Out *) final {}
+  void visit(luci::CircleUniqueOut *) final {}
    void visit(luci::CircleUnpackOut *) final {}
    void visit(luci::CircleWhileOut *) final {}
  
@@ -599,7 +603,9 @@ void OperationExporter::visit(luci::CircleLocalResponseNormalization *node)
  {
    export_simple(node, circle::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION,
                  circle::BuiltinOptions_LocalResponseNormalizationOptions,
-                CreateLocalResponseNormalizationOptions(builder).Union());
+                CreateLocalResponseNormalizationOptions(builder, node->radius(), node->bias(),
+                                                        node->alpha(), node->beta())
+                    .Union());
  }
  
  void OperationExporter::visit(luci::CircleLog *node)
@@ -691,6 +697,49 @@ void OperationExporter::visit(luci::CircleNeg *node)
                  CreateNegOptions(builder).Union());
  }
  
+void OperationExporter::visit(luci::CircleNonMaxSuppressionV4 *node)
+{
+  auto nms_outs = loco::succs(node);
+  assert(nms_outs.size() == 2);
+
+  uint32_t op_idx =
+      md.registerBuiltinOpcode(circle::BuiltinOperator_NON_MAX_SUPPRESSION_V4, node->op_version());
+  std::vector<int32_t> inputs_vec{
+      get_tensor_index(node->boxes()),           get_tensor_index(node->scores()),
+      get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()),
+      get_tensor_index(node->score_threshold()),
+  };
+  std::vector<int32_t> outputs_vec;
+
+  for (uint32_t idx = 0; idx < nms_outs.size(); ++idx)
+  {
+    // store in order of index
+    bool found = false;
+    for (auto out : nms_outs)
+    {
+      auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV4Out *>(out);
+      if (nms_out->index() == static_cast<int32_t>(idx))
+      {
+        outputs_vec.push_back(get_tensor_index(nms_out));
+        found = true;
+        break;
+      }
+    }
+    if (!found)
+    {
+      INTERNAL_EXN("Invalid NonMaxSuppressionV4 output");
+    }
+  }
+
+  auto inputs = builder.CreateVector(inputs_vec);
+  auto outputs = builder.CreateVector(outputs_vec);
+  auto options = CreateNonMaxSuppressionV4Options(builder);
+  auto op_offset =
+      CreateOperator(builder, op_idx, inputs, outputs,
+                     circle::BuiltinOptions_NonMaxSuppressionV4Options, options.Union());
+  gd._operators.push_back(op_offset);
+}
+
  void OperationExporter::visit(luci::CircleNotEqual *node)
  {
    export_simple(node, circle::BuiltinOperator_NOT_EQUAL, circle::BuiltinOptions_NotEqualOptions,
@@ -890,7 +939,7 @@ void OperationExporter::visit(luci::CircleSpaceToDepth *node)
  {
    export_simple(node, circle::BuiltinOperator_SPACE_TO_DEPTH,
                  circle::BuiltinOptions_SpaceToDepthOptions,
-                CreateSpaceToDepthOptions(builder).Union());
+                CreateSpaceToDepthOptions(builder, node->block_size()).Union());
  }
  
  void OperationExporter::visit(luci::CircleSparseToDense *node)
@@ -1090,6 +1139,43 @@ void OperationExporter::visit(luci::CircleTransposeConv *node)
                      .Union());
  }
  
+void OperationExporter::visit(luci::CircleUnique *node)
+{
+  auto unique_outs = loco::succs(node);
+  assert(int32_t(unique_outs.size()) == 2);
+  uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_UNIQUE, node->op_version());
+
+  std::vector<int32_t> inputs_vec{get_tensor_index(node->input())};
+  std::vector<int32_t> outputs_vec;
+
+  for (int32_t index = 0; index < 2; index++)
+  {
+    // store in order of index
+    bool found = false;
+    for (auto out : unique_outs)
+    {
+      auto unique_out = loco::must_cast<luci::CircleUniqueOut *>(out);
+      if (unique_out->index() == index)
+      {
+        outputs_vec.push_back(get_tensor_index(unique_out));
+        found = true;
+        break;
+      }
+    }
+    if (!found)
+    {
+      INTERNAL_EXN("Invalid Unique output");
+    }
+  }
+
+  auto inputs = builder.CreateVector(inputs_vec);
+  auto outputs = builder.CreateVector(outputs_vec);
+  auto options = CreateUniqueOptions(builder, to_circle_tensortype(node->idx_out_type()));
+  auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
+                                  circle::BuiltinOptions_UniqueOptions, options.Union());
+  gd._operators.push_back(op_offset);
+}
+
  void OperationExporter::visit(luci::CircleUnpack *node)
  {
    LOGGER(l);
diff --git a/compiler/luci/export/src/CircleTensorExporter.cpp b/compiler/luci/export/src/CircleTensorExporter.cpp

index 5cad392..dc8c2fb 100644 (file)
--- a/compiler/luci/export/src/CircleTensorExporter.cpp
+++ b/compiler/luci/export/src/CircleTensorExporter.cpp
@@ -302,7 +302,10 @@ encodeQuantizationParameters(FlatBufferBuilder &builder, luci::CircleQuantParam
      scale = builder.CreateVector(quantparam->scale);
      zero_point = builder.CreateVector(quantparam->zerop);
    }
-  return circle::CreateQuantizationParameters(builder, min, max, scale, zero_point);
+  // Note: QuantizationDetails is not supported
+  return circle::CreateQuantizationParameters(builder, min, max, scale, zero_point,
+                                              circle::QuantizationDetails::QuantizationDetails_NONE,
+                                              0, quantparam->quantized_dimension);
  }
  
  void exportOpDefinedTensor(const CircleTensoInfo &info, FlatBufferBuilder &builder,
diff --git a/compiler/luci/import/include/luci/Import/Nodes.h b/compiler/luci/import/include/luci/Import/Nodes.h

index 2719a5a..825c214 100644 (file)
--- a/compiler/luci/import/include/luci/Import/Nodes.h
+++ b/compiler/luci/import/include/luci/Import/Nodes.h
@@ -73,6 +73,7 @@
  #include "Nodes/CircleMirrorPad.h"
  #include "Nodes/CircleMul.h"
  #include "Nodes/CircleNeg.h"
+#include "Nodes/CircleNonMaxSuppressionV4.h"
  #include "Nodes/CircleNotEqual.h"
  #include "Nodes/CircleOneHot.h"
  #include "Nodes/CirclePack.h"
@@ -120,6 +121,7 @@
  #include "Nodes/CircleTopKV2.h"
  #include "Nodes/CircleTranspose.h"
  #include "Nodes/CircleTransposeConv.h"
+#include "Nodes/CircleUnique.h"
  #include "Nodes/CircleUnpack.h"
  #include "Nodes/CircleWhere.h"
  #include "Nodes/CircleWhile.h"
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleNonMaxSuppressionV4.h b/compiler/luci/import/include/luci/Import/Nodes/CircleNonMaxSuppressionV4.h

new file mode 100644 (file)

index 0000000..f193aae
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleNonMaxSuppressionV4.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V4_H__
+#define __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V4_H__
+
+#include "luci/Import/GraphBuilderBase.h"
+
+namespace luci
+{
+
+class CircleNonMaxSuppressionV4GraphBuilder : public GraphBuilderBase
+{
+public:
+  bool validate(const ValidateArgs &args) const final;
+
+  void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V4_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleUnique.h b/compiler/luci/import/include/luci/Import/Nodes/CircleUnique.h

new file mode 100644 (file)

index 0000000..ed5b503
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleUnique.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_UNIQUE_H__
+#define __LUCI_IMPORT_OP_CIRCLE_UNIQUE_H__
+
+#include "luci/Import/GraphBuilderBase.h"
+
+namespace luci
+{
+
+class CircleUniqueGraphBuilder : public GraphBuilderBase
+{
+public:
+  bool validate(const ValidateArgs &args) const final;
+
+  void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_UNIQUE_H__
diff --git a/compiler/luci/import/src/CircleReader.cpp b/compiler/luci/import/src/CircleReader.cpp

index 81e945d..bc7f397 100644 (file)
--- a/compiler/luci/import/src/CircleReader.cpp
+++ b/compiler/luci/import/src/CircleReader.cpp
@@ -156,6 +156,7 @@ luci_quantparam(const circle::QuantizationParametersT *quantization)
    const auto &max = quantization->max;
    const auto &scale = quantization->scale;
    const auto &zero_point = quantization->zero_point;
+  const auto &quantized_dimension = quantization->quantized_dimension;
  
    if ((!min.empty() && !max.empty()) || (!scale.empty() && !zero_point.empty()))
    {
@@ -165,6 +166,7 @@ luci_quantparam(const circle::QuantizationParametersT *quantization)
      quantparam->max = max;
      quantparam->scale = scale;
      quantparam->zerop = zero_point;
+    quantparam->quantized_dimension = quantized_dimension;
  
      return quantparam;
    }
diff --git a/compiler/luci/import/src/GraphBuilderRegistry.cpp b/compiler/luci/import/src/GraphBuilderRegistry.cpp

index d29557f..cc328cc 100644 (file)
--- a/compiler/luci/import/src/GraphBuilderRegistry.cpp
+++ b/compiler/luci/import/src/GraphBuilderRegistry.cpp
@@ -82,6 +82,7 @@ GraphBuilderRegistry::GraphBuilderRegistry()
    CIRCLE_NODE(MIRROR_PAD, CircleMirrorPadGraphBuilder);                                    // 100
    CIRCLE_NODE(MUL, CircleMulGraphBuilder);                                                 // 18
    CIRCLE_NODE(NEG, CircleNegGraphBuilder);                                                 // 59
+  CIRCLE_NODE(NON_MAX_SUPPRESSION_V4, CircleNonMaxSuppressionV4GraphBuilder);              // 120,
    CIRCLE_NODE(NOT_EQUAL, CircleNotEqualGraphBuilder);                                      // 72
    CIRCLE_NODE(ONE_HOT, CircleOneHotGraphBuilder);                                          // 85
    CIRCLE_NODE(PACK, CirclePackGraphBuilder);                                               // 83
@@ -129,6 +130,7 @@ GraphBuilderRegistry::GraphBuilderRegistry()
    CIRCLE_NODE(TOPK_V2, CircleTopKV2GraphBuilder);                                          // 48
    CIRCLE_NODE(TRANSPOSE, CircleTransposeGraphBuilder);                                     // 39
    CIRCLE_NODE(TRANSPOSE_CONV, CircleTransposeConvGraphBuilder);                            // 67
+  CIRCLE_NODE(UNIQUE, CircleUniqueGraphBuilder);                                           // 103
    CIRCLE_NODE(UNPACK, CircleUnpackGraphBuilder);                                           // 88
    CIRCLE_NODE(WHERE, CircleWhereGraphBuilder);                                             // 109
    CIRCLE_NODE(WHILE, CircleWhileGraphBuilder);                                             // 119
@@ -155,10 +157,8 @@ GraphBuilderRegistry::GraphBuilderRegistry()
    // BuiltinOperator_ARG_MAX = 56,
    // BuiltinOperator_PADV2 = 60,
    // BuiltinOperator_FAKE_QUANT = 80,
-  // BuiltinOperator_UNIQUE = 103,
    // BuiltinOperator_QUANTIZE = 114,
    // BuiltinOperator_HARD_SWISH = 117,
-  // BuiltinOperator_NON_MAX_SUPPRESSION_V4 = 120,
    // BuiltinOperator_NON_MAX_SUPPRESSION_V5 = 121,
    // BuiltinOperator_DENSIFY = 124,
  }
diff --git a/compiler/luci/import/src/Importer.test.cpp b/compiler/luci/import/src/Importer.test.cpp

index 4426e15..8366546 100644 (file)
--- a/compiler/luci/import/src/Importer.test.cpp
+++ b/compiler/luci/import/src/Importer.test.cpp
@@ -20,4 +20,9 @@
  
  #include <gtest/gtest.h>
  
-TEST(TensorFlowLiteImport, Dummy) { luci::Importer import; }
+TEST(TensorFlowLiteImport, Dummy)
+{
+  luci::Importer import;
+
+  SUCCEED();
+}
diff --git a/compiler/luci/import/src/Nodes/CircleAbs.cpp b/compiler/luci/import/src/Nodes/CircleAbs.cpp

index 9054986..3556dc7 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleAbs.cpp
+++ b/compiler/luci/import/src/Nodes/CircleAbs.cpp
@@ -36,7 +36,7 @@ CircleNode *CircleAbsGraphBuilder::build_node(const circle::OperatorT &,
                                                loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleAbs>();
-  node->x(inputs[0]);
+  node->x(inputs.at(0));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleAdd.cpp b/compiler/luci/import/src/Nodes/CircleAdd.cpp

index 3b1bb73..b767d4a 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleAdd.cpp
+++ b/compiler/luci/import/src/Nodes/CircleAdd.cpp
@@ -36,8 +36,8 @@ CircleNode *CircleAddGraphBuilder::build_node(const circle::OperatorT &op,
                                                loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleAdd>();
-  node->x(inputs[0]);
-  node->y(inputs[1]);
+  node->x(inputs.at(0));
+  node->y(inputs.at(1));
  
    const auto *options = op.builtin_options.AsAddOptions();
    node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
diff --git a/compiler/luci/import/src/Nodes/CircleArgMax.cpp b/compiler/luci/import/src/Nodes/CircleArgMax.cpp

index 2679827..10e8516 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleArgMax.cpp
+++ b/compiler/luci/import/src/Nodes/CircleArgMax.cpp
@@ -36,8 +36,8 @@ CircleNode *CircleArgMaxGraphBuilder::build_node(const circle::OperatorT &op,
                                                   loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleArgMax>();
-  node->input(inputs[0]);
-  node->dimension(inputs[1]);
+  node->input(inputs.at(0));
+  node->dimension(inputs.at(1));
  
    const auto *options = op.builtin_options.AsArgMaxOptions();
    node->output_type(luci_datatype(options->output_type));
diff --git a/compiler/luci/import/src/Nodes/CircleArgMin.cpp b/compiler/luci/import/src/Nodes/CircleArgMin.cpp

index 4d85bbf..5ff534d 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleArgMin.cpp
+++ b/compiler/luci/import/src/Nodes/CircleArgMin.cpp
@@ -36,8 +36,8 @@ CircleNode *CircleArgMinGraphBuilder::build_node(const circle::OperatorT &op,
                                                   loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleArgMin>();
-  node->input(inputs[0]);
-  node->dimension(inputs[1]);
+  node->input(inputs.at(0));
+  node->dimension(inputs.at(1));
  
    const auto *options = op.builtin_options.AsArgMinOptions();
    node->output_type(luci_datatype(options->output_type));
diff --git a/compiler/luci/import/src/Nodes/CircleAveragePool2D.cpp b/compiler/luci/import/src/Nodes/CircleAveragePool2D.cpp

index cfc3cf1..ad011f7 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleAveragePool2D.cpp
+++ b/compiler/luci/import/src/Nodes/CircleAveragePool2D.cpp
@@ -34,7 +34,7 @@ CircleNode *CircleAveragePool2DGraphBuilder::build_node(const circle::OperatorT
                                                          loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleAveragePool2D>();
-  node->value(inputs[0]);
+  node->value(inputs.at(0));
  
    const auto *options = op.builtin_options.AsPool2DOptions();
    node->padding(luci_padding(options->padding));
diff --git a/compiler/luci/import/src/Nodes/CircleBCQFullyConnected.cpp b/compiler/luci/import/src/Nodes/CircleBCQFullyConnected.cpp

index 7cc077e..16ecebd 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleBCQFullyConnected.cpp
+++ b/compiler/luci/import/src/Nodes/CircleBCQFullyConnected.cpp
@@ -37,11 +37,11 @@ CircleNode *CircleBCQFullyConnectedGraphBuilder::build_node(const circle::Operat
  {
    auto *node = graph->nodes()->create<CircleBCQFullyConnected>();
  
-  node->input(inputs[0]);
-  node->weights_scales(inputs[1]);
-  node->weights_binary(inputs[2]);
-  node->bias(inputs[3]);
-  node->weights_clusters(inputs[4]);
+  node->input(inputs.at(0));
+  node->weights_scales(inputs.at(1));
+  node->weights_binary(inputs.at(2));
+  node->bias(inputs.at(3));
+  node->weights_clusters(inputs.at(4));
  
    // TODO Find and move to appropriate place for setting optional input
    if (auto bias = dynamic_cast<luci::CircleOutputExclude *>(node->bias()))
diff --git a/compiler/luci/import/src/Nodes/CircleBCQGather.cpp b/compiler/luci/import/src/Nodes/CircleBCQGather.cpp

index c6d2ab5..464f1ac 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleBCQGather.cpp
+++ b/compiler/luci/import/src/Nodes/CircleBCQGather.cpp
@@ -37,10 +37,10 @@ CircleNode *CircleBCQGatherGraphBuilder::build_node(const circle::OperatorT &op,
  {
    auto *node = graph->nodes()->create<CircleBCQGather>();
  
-  node->input_scales(inputs[0]);
-  node->input_binary(inputs[1]);
-  node->indices(inputs[2]);
-  node->input_clusters(inputs[3]);
+  node->input_scales(inputs.at(0));
+  node->input_binary(inputs.at(1));
+  node->indices(inputs.at(2));
+  node->input_clusters(inputs.at(3));
  
    const auto *options = op.builtin_options.AsBCQGatherOptions();
    node->input_hidden_size(options->input_hidden_size);
diff --git a/compiler/luci/import/src/Nodes/CircleBatchMatMul.cpp b/compiler/luci/import/src/Nodes/CircleBatchMatMul.cpp

index 6026b2a..3307756 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleBatchMatMul.cpp
+++ b/compiler/luci/import/src/Nodes/CircleBatchMatMul.cpp
@@ -34,8 +34,8 @@ CircleNode *CircleBatchMatMulGraphBuilder::build_node(const circle::OperatorT &o
                                                        loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleBatchMatMul>();
-  node->x(inputs[0]);
-  node->y(inputs[1]);
+  node->x(inputs.at(0));
+  node->y(inputs.at(1));
  
    const auto *options = op.builtin_options.AsBatchMatMulOptions();
    node->adj_x(options->adjoint_lhs);
diff --git a/compiler/luci/import/src/Nodes/CircleBatchToSpaceND.cpp b/compiler/luci/import/src/Nodes/CircleBatchToSpaceND.cpp

index 4bbfadf..8c2039f 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleBatchToSpaceND.cpp
+++ b/compiler/luci/import/src/Nodes/CircleBatchToSpaceND.cpp
@@ -33,7 +33,7 @@ bool CircleBatchToSpaceNDGraphBuilder::validate(const ValidateArgs &args) const
  
    // input 1 and 2 should have INT32/INT64 type
    const auto &tensors = args.reader.tensors();
-  const auto &tensor_1 = tensors.at(inputs[1]);
+  const auto &tensor_1 = tensors.at(inputs.at(1));
    switch (tensor_1->type)
    {
      case circle::TensorType_INT32:
@@ -42,7 +42,7 @@ bool CircleBatchToSpaceNDGraphBuilder::validate(const ValidateArgs &args) const
      default:
        return false;
    }
-  const auto &tensor_2 = tensors.at(inputs[2]);
+  const auto &tensor_2 = tensors.at(inputs.at(2));
    switch (tensor_2->type)
    {
      case circle::TensorType_INT32:
@@ -53,7 +53,7 @@ bool CircleBatchToSpaceNDGraphBuilder::validate(const ValidateArgs &args) const
    }
  
    // Only support input shape dimension 3 and 4 only
-  const auto &tensor_0 = tensors.at(inputs[0]);
+  const auto &tensor_0 = tensors.at(inputs.at(0));
    const auto t_0_s = tensor_0->shape.size();
    if (t_0_s != 3 && t_0_s != 4)
      return false;
@@ -68,9 +68,9 @@ CircleNode *CircleBatchToSpaceNDGraphBuilder::build_node(const circle::OperatorT
                                                           loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleBatchToSpaceND>();
-  node->input(inputs[0]);
-  node->block_shape(inputs[1]);
-  node->crops(inputs[2]);
+  node->input(inputs.at(0));
+  node->block_shape(inputs.at(1));
+  node->crops(inputs.at(2));
  
    // No options for BatchToSpaceND
  
diff --git a/compiler/luci/import/src/Nodes/CircleCast.cpp b/compiler/luci/import/src/Nodes/CircleCast.cpp

index a4d09b5..7bdb630 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleCast.cpp
+++ b/compiler/luci/import/src/Nodes/CircleCast.cpp
@@ -47,7 +47,7 @@ bool CircleCastGraphBuilder::validate(const ValidateArgs &args) const
      const circle::TensorT &output_tensor = *tensors[outputs[0]];
      auto name = tensor_name(output_tensor);
  
-    const auto &tensor_in = tensors.at(inputs[0]);
+    const auto &tensor_in = tensors.at(inputs.at(0));
      if (tensor_in->type != options->in_data_type)
      {
        if (settings->get(luci::UserSettings::Key::DisableValidation))
@@ -77,7 +77,7 @@ CircleNode *CircleCastGraphBuilder::build_node(const circle::OperatorT &op,
                                                 loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleCast>();
-  node->x(inputs[0]);
+  node->x(inputs.at(0));
  
    const auto *options = op.builtin_options.AsCastOptions();
    if (options != nullptr)
@@ -87,7 +87,7 @@ CircleNode *CircleCastGraphBuilder::build_node(const circle::OperatorT &op,
    }
    else
    {
-    node->in_data_type(inputs[0]->dtype());
+    node->in_data_type(inputs.at(0)->dtype());
      node->out_data_type(loco::DataType::Unknown);
      // type inference should use node->dtype() for Unknown
      // export should use BuiltinOptions_NONE for Unknown
diff --git a/compiler/luci/import/src/Nodes/CircleCeil.cpp b/compiler/luci/import/src/Nodes/CircleCeil.cpp

index d3d6cd9..2e1aaa2 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleCeil.cpp
+++ b/compiler/luci/import/src/Nodes/CircleCeil.cpp
@@ -42,7 +42,7 @@ CircleNode *CircleCeilGraphBuilder::build_node(const circle::OperatorT &,
                                                 loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleCeil>();
-  node->x(inputs[0]);
+  node->x(inputs.at(0));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleConv2D.cpp b/compiler/luci/import/src/Nodes/CircleConv2D.cpp

index 42c5c26..9516ef1 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleConv2D.cpp
+++ b/compiler/luci/import/src/Nodes/CircleConv2D.cpp
@@ -39,11 +39,11 @@ CircleNode *CircleConv2DGraphBuilder::build_node(const circle::OperatorT &op,
                                                   loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleConv2D>();
-  node->input(inputs[0]);
-  node->filter(inputs[1]);
+  node->input(inputs.at(0));
+  node->filter(inputs.at(1));
    // For now, bias is required (checked in `verify` method).
    assert(inputs.size() == 3);
-  node->bias(inputs[2]);
+  node->bias(inputs.at(2));
  
    const auto *options = op.builtin_options.AsConv2DOptions();
    node->padding(luci_padding(options->padding));
diff --git a/compiler/luci/import/src/Nodes/CircleCos.cpp b/compiler/luci/import/src/Nodes/CircleCos.cpp

index 5f61cc7..27d60c6 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleCos.cpp
+++ b/compiler/luci/import/src/Nodes/CircleCos.cpp
@@ -36,7 +36,7 @@ CircleNode *CircleCosGraphBuilder::build_node(const circle::OperatorT &,
                                                loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleCos>();
-  node->x(inputs[0]);
+  node->x(inputs.at(0));
  
    // No options for Cos
  
diff --git a/compiler/luci/import/src/Nodes/CircleDepthToSpace.cpp b/compiler/luci/import/src/Nodes/CircleDepthToSpace.cpp

index 827b634..49d31bb 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleDepthToSpace.cpp
+++ b/compiler/luci/import/src/Nodes/CircleDepthToSpace.cpp
@@ -40,7 +40,7 @@ bool CircleDepthToSpaceGraphBuilder::validate(const ValidateArgs &args) const
  
    const auto &tensors = args.reader.tensors();
  
-  if (tensors[outputs[0]]->type != tensors[inputs[0]]->type)
+  if (tensors[outputs[0]]->type != tensors[inputs.at(0)]->type)
    {
      return false;
    }
@@ -56,7 +56,7 @@ CircleNode *CircleDepthToSpaceGraphBuilder::build_node(const circle::OperatorT &
                                                         loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleDepthToSpace>();
-  node->input(inputs[0]);
+  node->input(inputs.at(0));
  
    const auto *options = op.builtin_options.AsDepthToSpaceOptions();
    node->block_size(options->block_size);
diff --git a/compiler/luci/import/src/Nodes/CircleDepthwiseConv2D.cpp b/compiler/luci/import/src/Nodes/CircleDepthwiseConv2D.cpp

index 2b13f9e..53f85f2 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleDepthwiseConv2D.cpp
+++ b/compiler/luci/import/src/Nodes/CircleDepthwiseConv2D.cpp
@@ -40,11 +40,11 @@ CircleNode *CircleDepthwiseConv2DGraphBuilder::build_node(const circle::Operator
                                                            loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleDepthwiseConv2D>();
-  node->input(inputs[0]);
-  node->filter(inputs[1]);
+  node->input(inputs.at(0));
+  node->filter(inputs.at(1));
    if (inputs.size() != 3)
      throw oops::UserExn("DepthwiseConv2d without bias is unsupported");
-  node->bias(inputs[2]);
+  node->bias(inputs.at(2));
  
    const auto *options = op.builtin_options.AsDepthwiseConv2DOptions();
    node->padding(luci_padding(options->padding));
diff --git a/compiler/luci/import/src/Nodes/CircleDiv.cpp b/compiler/luci/import/src/Nodes/CircleDiv.cpp

index d09cfb8..615c224 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleDiv.cpp
+++ b/compiler/luci/import/src/Nodes/CircleDiv.cpp
@@ -37,8 +37,8 @@ CircleNode *CircleDivGraphBuilder::build_node(const circle::OperatorT &op,
                                                loco::Graph *graph) const
  {
    auto node = graph->nodes()->create<CircleDiv>();
-  node->x(inputs[0]);
-  node->y(inputs[1]);
+  node->x(inputs.at(0));
+  node->y(inputs.at(1));
  
    const auto *options = op.builtin_options.AsDivOptions();
    node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
diff --git a/compiler/luci/import/src/Nodes/CircleElu.cpp b/compiler/luci/import/src/Nodes/CircleElu.cpp

index 37a290c..919e95e 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleElu.cpp
+++ b/compiler/luci/import/src/Nodes/CircleElu.cpp
@@ -35,7 +35,7 @@ bool CircleEluGraphBuilder::validate(const ValidateArgs &args) const
      return false;
  
    const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs[0]);
+  const auto &tensor = tensors.at(inputs.at(0));
  
    switch (tensor->type)
    {
@@ -56,7 +56,7 @@ CircleNode *CircleEluGraphBuilder::build_node(const circle::OperatorT &,
                                                loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleElu>();
-  node->features(inputs[0]);
+  node->features(inputs.at(0));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleEqual.cpp b/compiler/luci/import/src/Nodes/CircleEqual.cpp

index a53f6e9..1db33b8 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleEqual.cpp
+++ b/compiler/luci/import/src/Nodes/CircleEqual.cpp
@@ -34,7 +34,7 @@ bool CircleEqualGraphBuilder::validate(const ValidateArgs &args) const
  
    const auto &tensors = args.reader.tensors();
  
-  return tensors[inputs[0]]->type == tensors[inputs[1]]->type;
+  return tensors[inputs.at(0)]->type == tensors[inputs.at(1)]->type;
  }
  
  CircleNode *CircleEqualGraphBuilder::build_node(const circle::OperatorT &,
@@ -42,8 +42,8 @@ CircleNode *CircleEqualGraphBuilder::build_node(const circle::OperatorT &,
                                                  loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleEqual>();
-  node->x(inputs[0]);
-  node->y(inputs[1]);
+  node->x(inputs.at(0));
+  node->y(inputs.at(1));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleExp.cpp b/compiler/luci/import/src/Nodes/CircleExp.cpp

index a328514..2c031d6 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleExp.cpp
+++ b/compiler/luci/import/src/Nodes/CircleExp.cpp
@@ -31,7 +31,7 @@ bool CircleExpGraphBuilder::validate(const ValidateArgs &args) const
  
    // input type check
    const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs[0]);
+  const auto &tensor = tensors.at(inputs.at(0));
    switch (tensor->type)
    {
      case circle::TensorType_FLOAT16:
@@ -51,7 +51,7 @@ CircleNode *CircleExpGraphBuilder::build_node(const circle::OperatorT &,
                                                loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleExp>();
-  node->x(inputs[0]);
+  node->x(inputs.at(0));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleExpandDims.cpp b/compiler/luci/import/src/Nodes/CircleExpandDims.cpp

index 1cef67a..ab537c7 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleExpandDims.cpp
+++ b/compiler/luci/import/src/Nodes/CircleExpandDims.cpp
@@ -34,7 +34,7 @@ bool CircleExpandDimsGraphBuilder::validate(const ValidateArgs &args) const
  
    const auto &tensors = args.reader.tensors();
  
-  return tensors[inputs[1]]->type == circle::TensorType_INT32;
+  return tensors[inputs.at(1)]->type == circle::TensorType_INT32;
  }
  
  CircleNode *CircleExpandDimsGraphBuilder::build_node(const circle::OperatorT &,
@@ -42,8 +42,8 @@ CircleNode *CircleExpandDimsGraphBuilder::build_node(const circle::OperatorT &,
                                                       loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleExpandDims>();
-  node->input(inputs[0]);
-  node->axis(inputs[1]);
+  node->input(inputs.at(0));
+  node->axis(inputs.at(1));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleFill.cpp b/compiler/luci/import/src/Nodes/CircleFill.cpp

index 6c3d3a2..95d5b87 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleFill.cpp
+++ b/compiler/luci/import/src/Nodes/CircleFill.cpp
@@ -37,8 +37,8 @@ CircleNode *CircleFillGraphBuilder::build_node(const circle::OperatorT &op,
                                                 loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleFill>();
-  node->dims(inputs[0]);
-  node->value(inputs[1]);
+  node->dims(inputs.at(0));
+  node->value(inputs.at(1));
  
    const auto *options = op.builtin_options.AsFillOptions();
    (void)options;
diff --git a/compiler/luci/import/src/Nodes/CircleFloor.cpp b/compiler/luci/import/src/Nodes/CircleFloor.cpp

index 302a9ea..ce756b3 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleFloor.cpp
+++ b/compiler/luci/import/src/Nodes/CircleFloor.cpp
@@ -42,7 +42,7 @@ CircleNode *CircleFloorGraphBuilder::build_node(const circle::OperatorT &,
                                                  loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleFloor>();
-  node->x(inputs[0]);
+  node->x(inputs.at(0));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleFloorDiv.cpp b/compiler/luci/import/src/Nodes/CircleFloorDiv.cpp

index 8751978..55f385d 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleFloorDiv.cpp
+++ b/compiler/luci/import/src/Nodes/CircleFloorDiv.cpp
@@ -39,8 +39,8 @@ bool CircleFloorDivGraphBuilder::validate(const ValidateArgs &args) const
    }
  
    const auto &tensors = args.reader.tensors();
-  const auto &tensor_in_0 = tensors.at(inputs[0]);
-  const auto &tensor_in_1 = tensors.at(inputs[1]);
+  const auto &tensor_in_0 = tensors.at(inputs.at(0));
+  const auto &tensor_in_1 = tensors.at(inputs.at(1));
    const auto &tensor_out = tensors.at(outputs[0]);
  
    if (tensor_in_0->type != tensor_in_1->type)
@@ -59,8 +59,8 @@ CircleNode *CircleFloorDivGraphBuilder::build_node(const circle::OperatorT &,
                                                     loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleFloorDiv>();
-  node->x(inputs[0]);
-  node->y(inputs[1]);
+  node->x(inputs.at(0));
+  node->y(inputs.at(1));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleFloorMod.cpp b/compiler/luci/import/src/Nodes/CircleFloorMod.cpp

index 3ccdce0..2101e41 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleFloorMod.cpp
+++ b/compiler/luci/import/src/Nodes/CircleFloorMod.cpp
@@ -33,8 +33,8 @@ bool CircleFloorModGraphBuilder::validate(const ValidateArgs &args) const
      return false;
  
    const auto &tensors = args.reader.tensors();
-  const auto &tensor_in_0 = tensors.at(inputs[0]);
-  const auto &tensor_in_1 = tensors.at(inputs[1]);
+  const auto &tensor_in_0 = tensors.at(inputs.at(0));
+  const auto &tensor_in_1 = tensors.at(inputs.at(1));
    if (tensor_in_0->type != tensor_in_1->type)
      return false;
  
@@ -48,8 +48,8 @@ CircleNode *CircleFloorModGraphBuilder::build_node(const circle::OperatorT &,
                                                     loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleFloorMod>();
-  node->x(inputs[0]);
-  node->y(inputs[1]);
+  node->x(inputs.at(0));
+  node->y(inputs.at(1));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp b/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp

index 8937e78..65a863b 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp
+++ b/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp
@@ -38,9 +38,9 @@ CircleNode *CircleFullyConnectedGraphBuilder::build_node(const circle::OperatorT
                                                           loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleFullyConnected>();
-  node->input(inputs[0]);
-  node->weights(inputs[1]);
-  node->bias(inputs[2]); // bias is optional
+  node->input(inputs.at(0));
+  node->weights(inputs.at(1));
+  node->bias(inputs.at(2)); // bias is optional
  
    // TODO Find and move to appropriate place for setting optional input
    if (auto bias = dynamic_cast<luci::CircleOutputExclude *>(node->bias()))
diff --git a/compiler/luci/import/src/Nodes/CircleGather.cpp b/compiler/luci/import/src/Nodes/CircleGather.cpp

index 1caa05e..75447a3 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleGather.cpp
+++ b/compiler/luci/import/src/Nodes/CircleGather.cpp
@@ -56,8 +56,8 @@ CircleNode *CircleGatherGraphBuilder::build_node(const circle::OperatorT &op,
  {
    auto *node = graph->nodes()->create<CircleGather>();
  
-  node->params(inputs[0]);
-  node->indices(inputs[1]);
+  node->params(inputs.at(0));
+  node->indices(inputs.at(1));
  
    const auto *options = op.builtin_options.AsGatherOptions();
    node->axis(options->axis);
diff --git a/compiler/luci/import/src/Nodes/CircleGatherNd.cpp b/compiler/luci/import/src/Nodes/CircleGatherNd.cpp

index 621d4ae..981adbf 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleGatherNd.cpp
+++ b/compiler/luci/import/src/Nodes/CircleGatherNd.cpp
@@ -36,7 +36,7 @@ bool CircleGatherNdGraphBuilder::validate(const ValidateArgs &args) const
    if (outputs.size() != 1)
      return false;
  
-  auto &indices_tensor = args.reader.tensors()[inputs[1]];
+  auto &indices_tensor = args.reader.tensors()[inputs.at(1)];
  
    if (!(indices_tensor->type == circle::TensorType::TensorType_INT32 ||
          indices_tensor->type == circle::TensorType::TensorType_INT64))
@@ -53,8 +53,8 @@ CircleNode *CircleGatherNdGraphBuilder::build_node(const circle::OperatorT &,
  {
    auto *node = graph->nodes()->create<CircleGatherNd>();
  
-  node->params(inputs[0]);
-  node->indices(inputs[1]);
+  node->params(inputs.at(0));
+  node->indices(inputs.at(1));
  
    // GatherNd options empty
  
diff --git a/compiler/luci/import/src/Nodes/CircleGreater.cpp b/compiler/luci/import/src/Nodes/CircleGreater.cpp

index 8810758..1ad0467 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleGreater.cpp
+++ b/compiler/luci/import/src/Nodes/CircleGreater.cpp
@@ -43,7 +43,7 @@ bool CircleGreaterGraphBuilder::validate(const ValidateArgs &args) const
  
    const auto &tensors = args.reader.tensors();
  
-  if (tensors[inputs[0]]->type != tensors[inputs[1]]->type)
+  if (tensors[inputs.at(0)]->type != tensors[inputs.at(1)]->type)
      return false;
  
    // NOTE: real models do have output dtype NOT BOOL
@@ -67,8 +67,8 @@ CircleNode *CircleGreaterGraphBuilder::build_node(const circle::OperatorT &,
                                                    loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleGreater>();
-  node->x(inputs[0]);
-  node->y(inputs[1]);
+  node->x(inputs.at(0));
+  node->y(inputs.at(1));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleGreaterEqual.cpp b/compiler/luci/import/src/Nodes/CircleGreaterEqual.cpp

index dff1510..0ac63b0 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleGreaterEqual.cpp
+++ b/compiler/luci/import/src/Nodes/CircleGreaterEqual.cpp
@@ -40,7 +40,7 @@ bool CircleGreaterEqualGraphBuilder::validate(const ValidateArgs &args) const
  
    const auto &tensors = args.reader.tensors();
  
-  if (tensors[inputs[0]]->type != tensors[inputs[1]]->type)
+  if (tensors[inputs.at(0)]->type != tensors[inputs.at(1)]->type)
    {
      return false;
    }
@@ -53,8 +53,8 @@ CircleNode *CircleGreaterEqualGraphBuilder::build_node(const circle::OperatorT &
                                                         loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleGreaterEqual>();
-  node->x(inputs[0]);
-  node->y(inputs[1]);
+  node->x(inputs.at(0));
+  node->y(inputs.at(1));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleIf.cpp b/compiler/luci/import/src/Nodes/CircleIf.cpp

index d609064..db9ffe1 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleIf.cpp
+++ b/compiler/luci/import/src/Nodes/CircleIf.cpp
@@ -43,7 +43,7 @@ bool CircleIfGraphBuilder::validate(const ValidateArgs &args) const
  
    // input 0 should be BOOL type
    const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs[0]);
+  const auto &tensor = tensors.at(inputs.at(0));
    if (tensor->type != circle::TensorType_BOOL)
      return false;
  
diff --git a/compiler/luci/import/src/Nodes/CircleInstanceNorm.cpp b/compiler/luci/import/src/Nodes/CircleInstanceNorm.cpp

index b95c54c..6349fd3 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleInstanceNorm.cpp
+++ b/compiler/luci/import/src/Nodes/CircleInstanceNorm.cpp
@@ -38,9 +38,9 @@ CircleNode *CircleInstanceNormGraphBuilder::build_node(const circle::OperatorT &
                                                         loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleInstanceNorm>();
-  node->input(inputs[0]);
-  node->gamma(inputs[1]);
-  node->beta(inputs[2]);
+  node->input(inputs.at(0));
+  node->gamma(inputs.at(1));
+  node->beta(inputs.at(2));
  
    const auto *options = op.builtin_options.AsInstanceNormOptions();
    node->epsilon(options->epsilon);
diff --git a/compiler/luci/import/src/Nodes/CircleL2Normalize.cpp b/compiler/luci/import/src/Nodes/CircleL2Normalize.cpp

index fe10a85..e4fdc20 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleL2Normalize.cpp
+++ b/compiler/luci/import/src/Nodes/CircleL2Normalize.cpp
@@ -46,7 +46,7 @@ CircleNode *CircleL2NormalizeGraphBuilder::build_node(const circle::OperatorT &o
                                                        loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleL2Normalize>();
-  node->x(inputs[0]);
+  node->x(inputs.at(0));
    const auto *options = op.builtin_options.AsL2NormOptions();
    node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
  
diff --git a/compiler/luci/import/src/Nodes/CircleL2Pool2D.cpp b/compiler/luci/import/src/Nodes/CircleL2Pool2D.cpp

index 0232066..202d9d6 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleL2Pool2D.cpp
+++ b/compiler/luci/import/src/Nodes/CircleL2Pool2D.cpp
@@ -38,7 +38,7 @@ CircleNode *CircleL2Pool2DGraphBuilder::build_node(const circle::OperatorT &op,
                                                     loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleL2Pool2D>();
-  node->value(inputs[0]);
+  node->value(inputs.at(0));
  
    const auto *options = op.builtin_options.AsPool2DOptions();
    node->padding(luci_padding(options->padding));
diff --git a/compiler/luci/import/src/Nodes/CircleLeakyRelu.cpp b/compiler/luci/import/src/Nodes/CircleLeakyRelu.cpp

index 4957cea..ad4979f 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleLeakyRelu.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLeakyRelu.cpp
@@ -39,7 +39,7 @@ CircleNode *CircleLeakyReluGraphBuilder::build_node(const circle::OperatorT &op,
                                                      loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleLeakyRelu>();
-  node->features(inputs[0]);
+  node->features(inputs.at(0));
  
    const auto *options = op.builtin_options.AsLeakyReluOptions();
    node->alpha(options->alpha);
diff --git a/compiler/luci/import/src/Nodes/CircleLess.cpp b/compiler/luci/import/src/Nodes/CircleLess.cpp

index 40ad28c..5060369 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleLess.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLess.cpp
@@ -39,7 +39,7 @@ bool CircleLessGraphBuilder::validate(const ValidateArgs &args) const
    }
  
    const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs[0]);
+  const auto &tensor = tensors.at(inputs.at(0));
  
    switch (tensor->type)
    {
@@ -56,7 +56,7 @@ bool CircleLessGraphBuilder::validate(const ValidateArgs &args) const
        return false;
    }
  
-  if (tensors[inputs[1]]->type != tensor->type)
+  if (tensors[inputs.at(1)]->type != tensor->type)
    {
      return false;
    }
@@ -69,8 +69,8 @@ CircleNode *CircleLessGraphBuilder::build_node(const circle::OperatorT &,
                                                 loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleLess>();
-  node->x(inputs[0]);
-  node->y(inputs[1]);
+  node->x(inputs.at(0));
+  node->y(inputs.at(1));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleLessEqual.cpp b/compiler/luci/import/src/Nodes/CircleLessEqual.cpp

index 13e9950..9b4f934 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleLessEqual.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLessEqual.cpp
@@ -40,7 +40,7 @@ bool CircleLessEqualGraphBuilder::validate(const ValidateArgs &args) const
  
    const auto &tensors = args.reader.tensors();
  
-  if (tensors[inputs[0]]->type != tensors[inputs[1]]->type)
+  if (tensors[inputs.at(0)]->type != tensors[inputs.at(1)]->type)
    {
      return false;
    }
@@ -53,8 +53,8 @@ CircleNode *CircleLessEqualGraphBuilder::build_node(const circle::OperatorT &,
                                                      loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleLessEqual>();
-  node->x(inputs[0]);
-  node->y(inputs[1]);
+  node->x(inputs.at(0));
+  node->y(inputs.at(1));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleLocalResponseNormalization.cpp b/compiler/luci/import/src/Nodes/CircleLocalResponseNormalization.cpp

index 7b1f0db..0e32f62 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleLocalResponseNormalization.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLocalResponseNormalization.cpp
@@ -37,7 +37,7 @@ CircleNode *CircleLocalResponseNormalizationGraphBuilder::build_node(
      const circle::OperatorT &op, const std::vector<CircleNode *> &inputs, loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleLocalResponseNormalization>();
-  node->input(inputs[0]);
+  node->input(inputs.at(0));
  
    const auto *options = op.builtin_options.AsLocalResponseNormalizationOptions();
    node->radius(options->radius);
diff --git a/compiler/luci/import/src/Nodes/CircleLog.cpp b/compiler/luci/import/src/Nodes/CircleLog.cpp

index 2140832..346fc43 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleLog.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLog.cpp
@@ -35,7 +35,7 @@ bool CircleLogGraphBuilder::validate(const ValidateArgs &args) const
    // Must be one of bfloat16, half, float32, float64, complex64, complex128.
    // Currently circle supports half(float16), float32, float64, complex64.
    const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs[0]);
+  const auto &tensor = tensors.at(inputs.at(0));
    switch (tensor->type)
    {
      case circle::TensorType_FLOAT16:
@@ -55,7 +55,7 @@ CircleNode *CircleLogGraphBuilder::build_node(const circle::OperatorT &,
                                                loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleLog>();
-  node->x(inputs[0]);
+  node->x(inputs.at(0));
  
    // No options for Log
  
diff --git a/compiler/luci/import/src/Nodes/CircleLogSoftmax.cpp b/compiler/luci/import/src/Nodes/CircleLogSoftmax.cpp

index e738c4a..ef69e86 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleLogSoftmax.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLogSoftmax.cpp
@@ -38,7 +38,7 @@ CircleNode *CircleLogSoftmaxGraphBuilder::build_node(const circle::OperatorT &,
                                                       loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleLogSoftmax>();
-  node->logits(inputs[0]);
+  node->logits(inputs.at(0));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleLogicalAnd.cpp b/compiler/luci/import/src/Nodes/CircleLogicalAnd.cpp

index 8509dba..7844da0 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleLogicalAnd.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLogicalAnd.cpp
@@ -46,8 +46,8 @@ CircleNode *CircleLogicalAndGraphBuilder::build_node(const circle::OperatorT &,
                                                       loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleLogicalAnd>();
-  node->x(inputs[0]);
-  node->y(inputs[1]);
+  node->x(inputs.at(0));
+  node->y(inputs.at(1));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleLogicalNot.cpp b/compiler/luci/import/src/Nodes/CircleLogicalNot.cpp

index b1ed3ea..3758642 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleLogicalNot.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLogicalNot.cpp
@@ -31,7 +31,7 @@ bool CircleLogicalNotGraphBuilder::validate(const ValidateArgs &args) const
    // Only BOOL type is allowed for the input
    const auto &inputs = args.op.inputs;
    const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs[0]);
+  const auto &tensor = tensors.at(inputs.at(0));
    if (tensor->type != circle::TensorType::TensorType_BOOL)
      return false;
  
@@ -43,7 +43,7 @@ CircleNode *CircleLogicalNotGraphBuilder::build_node(const circle::OperatorT &,
                                                       loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleLogicalNot>();
-  node->x(inputs[0]);
+  node->x(inputs.at(0));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleLogicalOr.cpp b/compiler/luci/import/src/Nodes/CircleLogicalOr.cpp

index 00eb9c5..1b87e6f 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleLogicalOr.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLogicalOr.cpp
@@ -46,8 +46,8 @@ CircleNode *CircleLogicalOrGraphBuilder::build_node(const circle::OperatorT &,
                                                      loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleLogicalOr>();
-  node->x(inputs[0]);
-  node->y(inputs[1]);
+  node->x(inputs.at(0));
+  node->y(inputs.at(1));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleLogistic.cpp b/compiler/luci/import/src/Nodes/CircleLogistic.cpp

index 85e7e55..9606e19 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleLogistic.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLogistic.cpp
@@ -32,22 +32,8 @@ bool CircleLogisticGraphBuilder::validate(const ValidateArgs &args) const
    if (outputs.size() != 1)
      return false;
  
-  // Must be one of the following types
-  // float16, float32, float64, complex64, or complex128
    const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs[0]);
-  switch (tensor->type)
-  {
-    case circle::TensorType_FLOAT16:
-    case circle::TensorType_FLOAT32:
-    case circle::TensorType_FLOAT64:
-    case circle::TensorType_COMPLEX64:
-      break;
-    default:
-      return false;
-  }
-
-  if (tensors.at(inputs[0])->type != tensors.at(outputs[0])->type)
+  if (tensors.at(inputs.at(0))->type != tensors.at(outputs[0])->type)
      return false;
  
    return true;
@@ -58,7 +44,7 @@ CircleNode *CircleLogisticGraphBuilder::build_node(const circle::OperatorT &,
                                                     loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleLogistic>();
-  node->x(inputs[0]);
+  node->x(inputs.at(0));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleMatrixDiag.cpp b/compiler/luci/import/src/Nodes/CircleMatrixDiag.cpp

index f4ae03c..a4a21a8 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleMatrixDiag.cpp
+++ b/compiler/luci/import/src/Nodes/CircleMatrixDiag.cpp
@@ -35,7 +35,7 @@ bool CircleMatrixDiagGraphBuilder::validate(const ValidateArgs &args) const
      return false;
  
    const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs[0]);
+  const auto &tensor = tensors.at(inputs.at(0));
  
    if (tensors[outputs[0]]->type != tensor->type)
      return false;
@@ -48,7 +48,7 @@ CircleNode *CircleMatrixDiagGraphBuilder::build_node(const circle::OperatorT &,
                                                       loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleMatrixDiag>();
-  node->diagonal(inputs[0]);
+  node->diagonal(inputs.at(0));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleMatrixSetDiag.cpp b/compiler/luci/import/src/Nodes/CircleMatrixSetDiag.cpp

index d6f6aee..cf03131 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleMatrixSetDiag.cpp
+++ b/compiler/luci/import/src/Nodes/CircleMatrixSetDiag.cpp
@@ -35,7 +35,7 @@ bool CircleMatrixSetDiagGraphBuilder::validate(const ValidateArgs &args) const
      return false;
  
    const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs[0]);
+  const auto &tensor = tensors.at(inputs.at(0));
  
    if (tensors[outputs[0]]->type != tensor->type)
      return false;
@@ -48,8 +48,8 @@ CircleNode *CircleMatrixSetDiagGraphBuilder::build_node(const circle::OperatorT
                                                          loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleMatrixSetDiag>();
-  node->input(inputs[0]);
-  node->diagonal(inputs[1]);
+  node->input(inputs.at(0));
+  node->diagonal(inputs.at(1));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleMaxPool2D.cpp b/compiler/luci/import/src/Nodes/CircleMaxPool2D.cpp

index 1798819..4bca0f4 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleMaxPool2D.cpp
+++ b/compiler/luci/import/src/Nodes/CircleMaxPool2D.cpp
@@ -36,7 +36,7 @@ CircleNode *CircleMaxPool2DGraphBuilder::build_node(const circle::OperatorT &op,
                                                      loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleMaxPool2D>();
-  node->value(inputs[0]);
+  node->value(inputs.at(0));
  
    const auto *options = op.builtin_options.AsPool2DOptions();
    node->padding(luci_padding(options->padding));
diff --git a/compiler/luci/import/src/Nodes/CircleMaximum.cpp b/compiler/luci/import/src/Nodes/CircleMaximum.cpp

index 6ca7e40..4d1468f 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleMaximum.cpp
+++ b/compiler/luci/import/src/Nodes/CircleMaximum.cpp
@@ -35,7 +35,7 @@ bool CircleMaximumGraphBuilder::validate(const ValidateArgs &args) const
      return false;
  
    const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs[0]);
+  const auto &tensor = tensors.at(inputs.at(0));
  
    switch (tensor->type)
    {
@@ -49,7 +49,7 @@ bool CircleMaximumGraphBuilder::validate(const ValidateArgs &args) const
        return false;
    }
  
-  if (tensors[inputs[1]]->type != tensor->type)
+  if (tensors[inputs.at(1)]->type != tensor->type)
      return false;
  
    if (tensors[outputs[0]]->type != tensor->type)
@@ -63,8 +63,8 @@ CircleNode *CircleMaximumGraphBuilder::build_node(const circle::OperatorT &,
                                                    loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleMaximum>();
-  node->x(inputs[0]);
-  node->y(inputs[1]);
+  node->x(inputs.at(0));
+  node->y(inputs.at(1));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleMean.cpp b/compiler/luci/import/src/Nodes/CircleMean.cpp

index 8261c7b..d8fa9a5 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleMean.cpp
+++ b/compiler/luci/import/src/Nodes/CircleMean.cpp
@@ -34,8 +34,8 @@ CircleNode *CircleMeanGraphBuilder::build_node(const circle::OperatorT &op,
                                                 loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleMean>();
-  node->input(inputs[0]);
-  node->reduction_indices(inputs[1]);
+  node->input(inputs.at(0));
+  node->reduction_indices(inputs.at(1));
  
    const auto *options = op.builtin_options.AsReducerOptions();
    node->keep_dims(options->keep_dims);
diff --git a/compiler/luci/import/src/Nodes/CircleMinimum.cpp b/compiler/luci/import/src/Nodes/CircleMinimum.cpp

index b770f36..8b4daf1 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleMinimum.cpp
+++ b/compiler/luci/import/src/Nodes/CircleMinimum.cpp
@@ -35,7 +35,7 @@ bool CircleMinimumGraphBuilder::validate(const ValidateArgs &args) const
      return false;
  
    const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs[0]);
+  const auto &tensor = tensors.at(inputs.at(0));
  
    switch (tensor->type)
    {
@@ -49,7 +49,7 @@ bool CircleMinimumGraphBuilder::validate(const ValidateArgs &args) const
        return false;
    }
  
-  if (tensors[inputs[1]]->type != tensor->type)
+  if (tensors[inputs.at(1)]->type != tensor->type)
      return false;
  
    if (tensors[outputs[0]]->type != tensor->type)
@@ -63,8 +63,8 @@ CircleNode *CircleMinimumGraphBuilder::build_node(const circle::OperatorT &,
                                                    loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleMinimum>();
-  node->x(inputs[0]);
-  node->y(inputs[1]);
+  node->x(inputs.at(0));
+  node->y(inputs.at(1));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleMirrorPad.cpp b/compiler/luci/import/src/Nodes/CircleMirrorPad.cpp

index 41b5e5d..e0ddd4c 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleMirrorPad.cpp
+++ b/compiler/luci/import/src/Nodes/CircleMirrorPad.cpp
@@ -38,8 +38,8 @@ CircleNode *CircleMirrorPadGraphBuilder::build_node(const circle::OperatorT &op,
                                                      loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleMirrorPad>();
-  node->input(inputs[0]);
-  node->paddings(inputs[1]);
+  node->input(inputs.at(0));
+  node->paddings(inputs.at(1));
  
    const auto *options = op.builtin_options.AsMirrorPadOptions();
    node->mode(luci_mirrorpad_mode(options->mode));
diff --git a/compiler/luci/import/src/Nodes/CircleMul.cpp b/compiler/luci/import/src/Nodes/CircleMul.cpp

index d4412b9..e3c4a7e 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleMul.cpp
+++ b/compiler/luci/import/src/Nodes/CircleMul.cpp
@@ -37,8 +37,8 @@ CircleNode *CircleMulGraphBuilder::build_node(const circle::OperatorT &op,
                                                loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleMul>();
-  node->x(inputs[0]);
-  node->y(inputs[1]);
+  node->x(inputs.at(0));
+  node->y(inputs.at(1));
  
    const auto *options = op.builtin_options.AsMulOptions();
    node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
diff --git a/compiler/luci/import/src/Nodes/CircleNeg.cpp b/compiler/luci/import/src/Nodes/CircleNeg.cpp

index 3d3079c..a64a695 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleNeg.cpp
+++ b/compiler/luci/import/src/Nodes/CircleNeg.cpp
@@ -36,7 +36,7 @@ CircleNode *CircleNegGraphBuilder::build_node(const circle::OperatorT &,
                                                loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleNeg>();
-  node->x(inputs[0]);
+  node->x(inputs.at(0));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV4.cpp b/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV4.cpp

new file mode 100644 (file)

index 0000000..a4ad4a5
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV4.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleNonMaxSuppressionV4.h"
+
+#include <luci/IR/Nodes/CircleNonMaxSuppressionV4.h>
+#include <luci/IR/Nodes/CircleNonMaxSuppressionV4Out.h>
+
+#include <loco.h>
+#include <oops/UserExn.h>
+
+namespace luci
+{
+
+bool CircleNonMaxSuppressionV4GraphBuilder::validate(const ValidateArgs &args) const
+{
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
+
+  if (inputs.size() != 5)
+    return false;
+  if (outputs.size() != 2)
+    return false;
+
+  const auto &tensors = args.reader.tensors();
+  const auto &boxes_tensor = tensors.at(inputs[0]);
+  if (boxes_tensor->shape.size() != 2)
+    return false;
+  if (boxes_tensor->shape.at(1) != 4)
+    return false;
+  if (boxes_tensor->shape.at(0) != tensors.at(inputs[1])->shape.at(0))
+    return false;
+
+  if (tensors.at(inputs[2])->type != circle::TensorType_INT32)
+    return false;
+  if (tensors.at(inputs[3])->type != circle::TensorType_FLOAT32)
+    return false;
+  if (tensors.at(inputs[4])->type != circle::TensorType_FLOAT32)
+    return false;
+
+  return true;
+}
+
+/**
+ * @brief  NonMaxSuppressionV4 Node builder
+ *
+ * @note   Current loco does not provide multiple outputs
+ *         We will create multiple NonMasSuppressionV4Oout nodes to emulate this
+ */
+
+void CircleNonMaxSuppressionV4GraphBuilder::build(const circle::OperatorT &op,
+                                                  GraphBuilderContext *context) const
+{
+  assert(context != nullptr);
+
+  auto graph = context->graph();
+
+  const std::vector<int32_t> &inputs = op.inputs;
+  const std::vector<int32_t> &outputs = op.outputs;
+  const auto &tensors = context->reader()->tensors();
+  const auto &opcodes = context->reader()->opcodes();
+  auto tensors_ptr = context->reader()->tensors_ptr();
+  assert(tensors_ptr != nullptr);
+
+  std::vector<CircleNode *> input_nodes;
+  for (const int32_t input_tensor_index : inputs)
+  {
+    input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
+  }
+
+  // Create CircleNonMaxSuppressionV4
+  auto node = graph->nodes()->create<CircleNonMaxSuppressionV4>();
+  node->boxes(input_nodes[0]);
+  node->scores(input_nodes[1]);
+  node->max_output_size(input_nodes[2]);
+  node->iou_threshold(input_nodes[3]);
+  node->score_threshold(input_nodes[4]);
+
+  assert(outputs.size() == 2);
+  {
+    // Let's use name of output 0 as NonMaxSuppressionV4 name
+    const circle::TensorT &output_tensor = *tensors[outputs[0]];
+    node->name(tensor_name(output_tensor));
+    node->op_version(opcodes[op.opcode_index].get()->version);
+
+    // NOTE We don't set quantization for NonMaxSuppressionV4 itself but to virtual outputs
+  }
+
+  // Create virtual outputs of NonMaxSuppressionV4
+  for (size_t n = 0; n < outputs.size(); ++n)
+  {
+    const circle::TensorT &output_tensor = *tensors[outputs[n]];
+
+    auto *nodeout = graph->nodes()->create<CircleNonMaxSuppressionV4Out>();
+    copy_tensor_attributes(output_tensor, nodeout);
+
+    // mark shape_status
+    if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
+      nodeout->shape_status(ShapeStatus::NOSHAPE);
+    else
+      nodeout->shape_status(ShapeStatus::VALID);
+
+    nodeout->input(node);
+    nodeout->index(n);
+
+    context->nodefinder()->enroll(outputs[n], nodeout);
+  }
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleNotEqual.cpp b/compiler/luci/import/src/Nodes/CircleNotEqual.cpp

index 5b04856..77e986d 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleNotEqual.cpp
+++ b/compiler/luci/import/src/Nodes/CircleNotEqual.cpp
@@ -40,7 +40,7 @@ bool CircleNotEqualGraphBuilder::validate(const ValidateArgs &args) const
  
    const auto &tensors = args.reader.tensors();
  
-  if (tensors[inputs[0]]->type != tensors[inputs[1]]->type)
+  if (tensors[inputs.at(0)]->type != tensors[inputs.at(1)]->type)
    {
      return false;
    }
@@ -53,8 +53,8 @@ CircleNode *CircleNotEqualGraphBuilder::build_node(const circle::OperatorT &,
                                                     loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleNotEqual>();
-  node->x(inputs[0]);
-  node->y(inputs[1]);
+  node->x(inputs.at(0));
+  node->y(inputs.at(1));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleOneHot.cpp b/compiler/luci/import/src/Nodes/CircleOneHot.cpp

index 9fdbfa8..69294e1 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleOneHot.cpp
+++ b/compiler/luci/import/src/Nodes/CircleOneHot.cpp
@@ -38,10 +38,10 @@ bool CircleOneHotGraphBuilder::validate(const ValidateArgs &args) const
      return false;
  
    const auto &tensors = args.reader.tensors();
-  const auto &indices = tensors.at(inputs[0]);
-  const auto &depth = tensors.at(inputs[1]);
-  const auto &on_value = tensors.at(inputs[2]);
-  const auto &off_value = tensors.at(inputs[3]);
+  const auto &indices = tensors.at(inputs.at(0));
+  const auto &depth = tensors.at(inputs.at(1));
+  const auto &on_value = tensors.at(inputs.at(2));
+  const auto &off_value = tensors.at(inputs.at(3));
  
    if (options->axis < -1 || options->axis > static_cast<int32_t>(indices->shape.size()))
      return false;
@@ -63,10 +63,10 @@ CircleNode *CircleOneHotGraphBuilder::build_node(const circle::OperatorT &op,
  {
    auto *node = graph->nodes()->create<CircleOneHot>();
  
-  node->indices(inputs[0]);
-  node->depth(inputs[1]);
-  node->on_value(inputs[2]);
-  node->off_value(inputs[3]);
+  node->indices(inputs.at(0));
+  node->depth(inputs.at(1));
+  node->on_value(inputs.at(2));
+  node->off_value(inputs.at(3));
  
    const auto *options = op.builtin_options.AsOneHotOptions();
    node->axis(options->axis);
diff --git a/compiler/luci/import/src/Nodes/CirclePRelu.cpp b/compiler/luci/import/src/Nodes/CirclePRelu.cpp

index 0d87cd4..c07920f 100644 (file)
--- a/compiler/luci/import/src/Nodes/CirclePRelu.cpp
+++ b/compiler/luci/import/src/Nodes/CirclePRelu.cpp
@@ -39,8 +39,8 @@ CircleNode *CirclePReluGraphBuilder::build_node(const circle::OperatorT &,
                                                  loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CirclePRelu>();
-  node->input(inputs[0]);
-  node->alpha(inputs[1]);
+  node->input(inputs.at(0));
+  node->alpha(inputs.at(1));
  
    // PRelu options are empty
  
diff --git a/compiler/luci/import/src/Nodes/CirclePad.cpp b/compiler/luci/import/src/Nodes/CirclePad.cpp

index 6abcf2d..999173b 100644 (file)
--- a/compiler/luci/import/src/Nodes/CirclePad.cpp
+++ b/compiler/luci/import/src/Nodes/CirclePad.cpp
@@ -38,8 +38,8 @@ CircleNode *CirclePadGraphBuilder::build_node(const circle::OperatorT &op,
                                                loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CirclePad>();
-  node->input(inputs[0]);
-  node->paddings(inputs[1]);
+  node->input(inputs.at(0));
+  node->paddings(inputs.at(1));
  
    const auto *options = op.builtin_options.AsPadOptions();
    (void)options; // There are no options.
diff --git a/compiler/luci/import/src/Nodes/CirclePow.cpp b/compiler/luci/import/src/Nodes/CirclePow.cpp

index ff98331..def0126 100644 (file)
--- a/compiler/luci/import/src/Nodes/CirclePow.cpp
+++ b/compiler/luci/import/src/Nodes/CirclePow.cpp
@@ -39,8 +39,8 @@ CircleNode *CirclePowGraphBuilder::build_node(const circle::OperatorT &,
                                                loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CirclePow>();
-  node->x(inputs[0]);
-  node->y(inputs[1]);
+  node->x(inputs.at(0));
+  node->y(inputs.at(1));
  
    // Pow options are empty
  
diff --git a/compiler/luci/import/src/Nodes/CircleRange.cpp b/compiler/luci/import/src/Nodes/CircleRange.cpp

index c211916..38dc44e 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleRange.cpp
+++ b/compiler/luci/import/src/Nodes/CircleRange.cpp
@@ -36,9 +36,9 @@ CircleNode *CircleRangeGraphBuilder::build_node(const circle::OperatorT &,
                                                  loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleRange>();
-  node->start(inputs[0]);
-  node->limit(inputs[1]);
-  node->delta(inputs[2]);
+  node->start(inputs.at(0));
+  node->limit(inputs.at(1));
+  node->delta(inputs.at(2));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleRank.cpp b/compiler/luci/import/src/Nodes/CircleRank.cpp

index 705ae01..12658b1 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleRank.cpp
+++ b/compiler/luci/import/src/Nodes/CircleRank.cpp
@@ -38,7 +38,7 @@ CircleNode *CircleRankGraphBuilder::build_node(const circle::OperatorT &,
                                                 loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleRank>();
-  node->input(inputs[0]);
+  node->input(inputs.at(0));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleReduceAny.cpp b/compiler/luci/import/src/Nodes/CircleReduceAny.cpp

index 030c530..21a8219 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleReduceAny.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReduceAny.cpp
@@ -31,8 +31,8 @@ bool CircleReduceAnyGraphBuilder::validate(const ValidateArgs &args) const
      return false;
  
    const auto &tensors = args.reader.tensors();
-  const auto &tensor_0 = tensors.at(inputs[0]);
-  const auto &tensor_1 = tensors.at(inputs[1]);
+  const auto &tensor_0 = tensors.at(inputs.at(0));
+  const auto &tensor_1 = tensors.at(inputs.at(1));
    const auto &tensor_o = tensors.at(outputs[0]);
  
    if (tensor_0->type != circle::TensorType_BOOL)
@@ -57,8 +57,8 @@ CircleNode *CircleReduceAnyGraphBuilder::build_node(const circle::OperatorT &op,
                                                      loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleReduceAny>();
-  node->input(inputs[0]);
-  node->reduction_indices(inputs[1]);
+  node->input(inputs.at(0));
+  node->reduction_indices(inputs.at(1));
  
    const auto *options = op.builtin_options.AsReducerOptions();
    node->keep_dims(options->keep_dims);
diff --git a/compiler/luci/import/src/Nodes/CircleReduceMax.cpp b/compiler/luci/import/src/Nodes/CircleReduceMax.cpp

index 8ca8e2e..05492db 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleReduceMax.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReduceMax.cpp
@@ -33,7 +33,7 @@ bool CircleReduceMaxGraphBuilder::validate(const ValidateArgs &args) const
      return false;
  
    const auto &tensors = args.reader.tensors();
-  const auto &tensor_axis = tensors.at(inputs[1]);
+  const auto &tensor_axis = tensors.at(inputs.at(1));
  
    switch (tensor_axis->type)
    {
@@ -52,8 +52,8 @@ CircleNode *CircleReduceMaxGraphBuilder::build_node(const circle::OperatorT &op,
                                                      loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleReduceMax>();
-  node->input(inputs[0]);
-  node->reduction_indices(inputs[1]);
+  node->input(inputs.at(0));
+  node->reduction_indices(inputs.at(1));
  
    const auto *options = op.builtin_options.AsReducerOptions();
    node->keep_dims(options->keep_dims);
diff --git a/compiler/luci/import/src/Nodes/CircleReduceMin.cpp b/compiler/luci/import/src/Nodes/CircleReduceMin.cpp

index 3020c37..117d529 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleReduceMin.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReduceMin.cpp
@@ -33,7 +33,7 @@ bool CircleReduceMinGraphBuilder::validate(const ValidateArgs &args) const
      return false;
  
    const auto &tensors = args.reader.tensors();
-  const auto &tensor_axis = tensors.at(inputs[1]);
+  const auto &tensor_axis = tensors.at(inputs.at(1));
  
    switch (tensor_axis->type)
    {
@@ -52,8 +52,8 @@ CircleNode *CircleReduceMinGraphBuilder::build_node(const circle::OperatorT &op,
                                                      loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleReduceMin>();
-  node->input(inputs[0]);
-  node->reduction_indices(inputs[1]);
+  node->input(inputs.at(0));
+  node->reduction_indices(inputs.at(1));
  
    const auto *options = op.builtin_options.AsReducerOptions();
    node->keep_dims(options->keep_dims);
diff --git a/compiler/luci/import/src/Nodes/CircleReduceProd.cpp b/compiler/luci/import/src/Nodes/CircleReduceProd.cpp

index 2bb43f6..5f05458 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleReduceProd.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReduceProd.cpp
@@ -30,7 +30,7 @@ bool CircleReduceProdGraphBuilder::validate(const ValidateArgs &args) const
      return false;
  
    const auto &tensors = args.reader.tensors();
-  const auto &tensor_1 = tensors.at(inputs[1]);
+  const auto &tensor_1 = tensors.at(inputs.at(1));
  
    // TODO check input types
  
@@ -52,8 +52,8 @@ CircleNode *CircleReduceProdGraphBuilder::build_node(const circle::OperatorT &op
                                                       loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleReduceProd>();
-  node->input(inputs[0]);
-  node->reduction_indices(inputs[1]);
+  node->input(inputs.at(0));
+  node->reduction_indices(inputs.at(1));
  
    const auto *options = op.builtin_options.AsReducerOptions();
    node->keep_dims(options->keep_dims);
diff --git a/compiler/luci/import/src/Nodes/CircleRelu.cpp b/compiler/luci/import/src/Nodes/CircleRelu.cpp

index 056268a..8e1c32a 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleRelu.cpp
+++ b/compiler/luci/import/src/Nodes/CircleRelu.cpp
@@ -39,7 +39,7 @@ CircleNode *CircleReluGraphBuilder::build_node(const circle::OperatorT &,
                                                 loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleRelu>();
-  node->features(inputs[0]);
+  node->features(inputs.at(0));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleRelu6.cpp b/compiler/luci/import/src/Nodes/CircleRelu6.cpp

index 5b44399..0283d73 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleRelu6.cpp
+++ b/compiler/luci/import/src/Nodes/CircleRelu6.cpp
@@ -39,7 +39,7 @@ CircleNode *CircleRelu6GraphBuilder::build_node(const circle::OperatorT &,
                                                  loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleRelu6>();
-  node->features(inputs[0]);
+  node->features(inputs.at(0));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleReluN1To1.cpp b/compiler/luci/import/src/Nodes/CircleReluN1To1.cpp

index edf662f..7f517bc 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleReluN1To1.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReluN1To1.cpp
@@ -41,7 +41,7 @@ CircleNode *CircleReluN1To1GraphBuilder::build_node(const circle::OperatorT &,
                                                      loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleReluN1To1>();
-  node->features(inputs[0]);
+  node->features(inputs.at(0));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleReshape.cpp b/compiler/luci/import/src/Nodes/CircleReshape.cpp

index f72c152..996ae9d 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleReshape.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReshape.cpp
@@ -62,7 +62,7 @@ CircleNode *CircleReshapeGraphBuilder::build_node(const circle::OperatorT &op,
  {
    // If the second input is not provided, generate it based on the value of the attribute.
    // TODO Presence of the second input is the current requirement of the IR.
-  auto *shape_node = (inputs.size() == 2) ? inputs[1] : nullptr;
+  auto *shape_node = (inputs.size() == 2) ? inputs.at(1) : nullptr;
    if (shape_node == nullptr)
    {
      const auto *options = op.builtin_options.AsReshapeOptions();
@@ -77,7 +77,7 @@ CircleNode *CircleReshapeGraphBuilder::build_node(const circle::OperatorT &op,
    }
  
    auto *node = graph->nodes()->create<CircleReshape>();
-  node->tensor(inputs[0]);
+  node->tensor(inputs.at(0));
    node->shape(shape_node);
  
    const auto *options = op.builtin_options.AsReshapeOptions();
diff --git a/compiler/luci/import/src/Nodes/CircleResizeBilinear.cpp b/compiler/luci/import/src/Nodes/CircleResizeBilinear.cpp

index 6128f1b..0fccb7b 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleResizeBilinear.cpp
+++ b/compiler/luci/import/src/Nodes/CircleResizeBilinear.cpp
@@ -38,8 +38,8 @@ CircleNode *CircleResizeBilinearGraphBuilder::build_node(const circle::OperatorT
                                                           loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleResizeBilinear>();
-  node->input(inputs[0]);
-  node->size(inputs[1]);
+  node->input(inputs.at(0));
+  node->size(inputs.at(1));
  
    const auto *options = op.builtin_options.AsResizeBilinearOptions();
    node->align_corners(options->align_corners);
diff --git a/compiler/luci/import/src/Nodes/CircleResizeNearestNeighbor.cpp b/compiler/luci/import/src/Nodes/CircleResizeNearestNeighbor.cpp

index a1f1ef0..324323f 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleResizeNearestNeighbor.cpp
+++ b/compiler/luci/import/src/Nodes/CircleResizeNearestNeighbor.cpp
@@ -37,8 +37,8 @@ CircleNode *CircleResizeNearestNeighborGraphBuilder::build_node(
      const circle::OperatorT &op, const std::vector<CircleNode *> &inputs, loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleResizeNearestNeighbor>();
-  node->input(inputs[0]);
-  node->size(inputs[1]);
+  node->input(inputs.at(0));
+  node->size(inputs.at(1));
  
    const auto *options = op.builtin_options.AsResizeNearestNeighborOptions();
    node->align_corners(options->align_corners);
diff --git a/compiler/luci/import/src/Nodes/CircleReverseSequence.cpp b/compiler/luci/import/src/Nodes/CircleReverseSequence.cpp

index 72d3b15..ad11d4c 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleReverseSequence.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReverseSequence.cpp
@@ -34,8 +34,8 @@ bool CircleReverseSequenceGraphBuilder::validate(const ValidateArgs &args) const
      return false;
  
    const auto &tensors = args.reader.tensors();
-  const auto &tensor_in = tensors.at(inputs[0]);
-  const auto &tensor_lengths = tensors.at(inputs[1]);
+  const auto &tensor_in = tensors.at(inputs.at(0));
+  const auto &tensor_lengths = tensors.at(inputs.at(1));
    const auto &tensor_out = tensors.at(outputs[0]);
  
    switch (tensor_lengths->type)
@@ -58,8 +58,8 @@ CircleNode *CircleReverseSequenceGraphBuilder::build_node(const circle::Operator
                                                            loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleReverseSequence>();
-  node->input(inputs[0]);
-  node->seq_lengths(inputs[1]);
+  node->input(inputs.at(0));
+  node->seq_lengths(inputs.at(1));
  
    const auto *options = op.builtin_options.AsReverseSequenceOptions();
    node->seq_axis(options->seq_dim);
diff --git a/compiler/luci/import/src/Nodes/CircleReverseV2.cpp b/compiler/luci/import/src/Nodes/CircleReverseV2.cpp

index cd18128..e2e53bb 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleReverseV2.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReverseV2.cpp
@@ -34,8 +34,8 @@ bool CircleReverseV2GraphBuilder::validate(const ValidateArgs &args) const
      return false;
  
    const auto &tensors = args.reader.tensors();
-  const auto &tensor_in = tensors.at(inputs[0]);
-  const auto &tensor_axis = tensors.at(inputs[1]);
+  const auto &tensor_in = tensors.at(inputs.at(0));
+  const auto &tensor_axis = tensors.at(inputs.at(1));
    const auto &tensor_out = tensors.at(outputs[0]);
  
    switch (tensor_axis->type)
@@ -58,8 +58,8 @@ CircleNode *CircleReverseV2GraphBuilder::build_node(const circle::OperatorT &,
                                                      loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleReverseV2>();
-  node->tensor(inputs[0]);
-  node->axis(inputs[1]);
+  node->tensor(inputs.at(0));
+  node->axis(inputs.at(1));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleRound.cpp b/compiler/luci/import/src/Nodes/CircleRound.cpp

index 8964895..ad77f9f 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleRound.cpp
+++ b/compiler/luci/import/src/Nodes/CircleRound.cpp
@@ -37,7 +37,7 @@ bool CircleRoundGraphBuilder::validate(const ValidateArgs &args) const
    // bfloat16, half (float16), float32, float64, complex64, complex128
    // Currently, circle supports float16, float32, complex64
    const auto &tensors = args.reader.tensors();
-  const auto &tensor_in = tensors.at(inputs[0]);
+  const auto &tensor_in = tensors.at(inputs.at(0));
    const auto &tensor_out = tensors.at(outputs[0]);
  
    switch (tensor_in->type)
@@ -63,7 +63,7 @@ CircleNode *CircleRoundGraphBuilder::build_node(const circle::OperatorT &,
                                                  loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleRound>();
-  node->x(inputs[0]);
+  node->x(inputs.at(0));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleRsqrt.cpp b/compiler/luci/import/src/Nodes/CircleRsqrt.cpp

index b5de0b5..ae05fbb 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleRsqrt.cpp
+++ b/compiler/luci/import/src/Nodes/CircleRsqrt.cpp
@@ -33,7 +33,7 @@ bool CircleRsqrtGraphBuilder::validate(const ValidateArgs &args) const
    // bfloat16, half (float16), float32, float64, complex64, complex128
    // Currently, circle supports float16, float32, complex64
    const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs[0]);
+  const auto &tensor = tensors.at(inputs.at(0));
    switch (tensor->type)
    {
      case circle::TensorType_FLOAT16:
@@ -52,7 +52,7 @@ CircleNode *CircleRsqrtGraphBuilder::build_node(const circle::OperatorT &,
                                                  loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleRsqrt>();
-  node->x(inputs[0]);
+  node->x(inputs.at(0));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleScatterNd.cpp b/compiler/luci/import/src/Nodes/CircleScatterNd.cpp

index adcaa00..7f86aeb 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleScatterNd.cpp
+++ b/compiler/luci/import/src/Nodes/CircleScatterNd.cpp
@@ -32,12 +32,12 @@ bool CircleScatterNdGraphBuilder::validate(const ValidateArgs &args) const
    // indices must have the same type as shape
    const auto &tensors = args.reader.tensors();
  
-  if (tensors[inputs[0]]->type != tensors[inputs[2]]->type)
+  if (tensors[inputs.at(0)]->type != tensors[inputs.at(2)]->type)
      return false;
  
    // indices must be either int32 or int64
-  if (tensors[inputs[0]]->type != circle::TensorType_INT32 &&
-      tensors[inputs[0]]->type != circle::TensorType_INT64)
+  if (tensors[inputs.at(0)]->type != circle::TensorType_INT32 &&
+      tensors[inputs.at(0)]->type != circle::TensorType_INT64)
      return false;
  
    return true;
@@ -48,9 +48,9 @@ CircleNode *CircleScatterNdGraphBuilder::build_node(const circle::OperatorT &,
                                                      loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleScatterNd>();
-  node->indices(inputs[0]);
-  node->updates(inputs[1]);
-  node->shape(inputs[2]);
+  node->indices(inputs.at(0));
+  node->updates(inputs.at(1));
+  node->shape(inputs.at(2));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleSegmentSum.cpp b/compiler/luci/import/src/Nodes/CircleSegmentSum.cpp

index 1122bdc..fb84e5d 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSegmentSum.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSegmentSum.cpp
@@ -33,9 +33,9 @@ bool CircleSegmentSumGraphBuilder::validate(const ValidateArgs &args) const
      return false;
  
    const auto &tensors = args.reader.tensors();
-  const auto &tensor_in = tensors.at(inputs[0]);
+  const auto &tensor_in = tensors.at(inputs.at(0));
    const auto &tensor_out = tensors.at(outputs[0]);
-  const auto &tensor_ids = tensors.at(inputs[1]);
+  const auto &tensor_ids = tensors.at(inputs.at(1));
  
    switch (tensor_ids->type)
    {
@@ -59,8 +59,8 @@ CircleNode *CircleSegmentSumGraphBuilder::build_node(const circle::OperatorT &,
                                                       loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleSegmentSum>();
-  node->input(inputs[0]);
-  node->segment_ids(inputs[1]);
+  node->input(inputs.at(0));
+  node->segment_ids(inputs.at(1));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleSelect.cpp b/compiler/luci/import/src/Nodes/CircleSelect.cpp

index ff94212..1e649f1 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSelect.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSelect.cpp
@@ -33,7 +33,7 @@ bool CircleSelectGraphBuilder::validate(const ValidateArgs &args) const
      return false;
  
    const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs[0]);
+  const auto &tensor = tensors.at(inputs.at(0));
    if (tensor->type != circle::TensorType_BOOL)
      return false;
    // TODO check dtypes for input 1, 2
@@ -46,9 +46,9 @@ CircleNode *CircleSelectGraphBuilder::build_node(const circle::OperatorT &,
                                                   loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleSelect>();
-  node->condition(inputs[0]);
-  node->t(inputs[1]);
-  node->e(inputs[2]);
+  node->condition(inputs.at(0));
+  node->t(inputs.at(1));
+  node->e(inputs.at(2));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleSelectV2.cpp b/compiler/luci/import/src/Nodes/CircleSelectV2.cpp

index 78b2e64..e6dd04d 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSelectV2.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSelectV2.cpp
@@ -33,12 +33,12 @@ bool CircleSelectV2GraphBuilder::validate(const ValidateArgs &args) const
      return false;
  
    const auto &tensors = args.reader.tensors();
-  const auto &condition = tensors.at(inputs[0]);
+  const auto &condition = tensors.at(inputs.at(0));
    if (condition->type != circle::TensorType_BOOL)
      return false;
  
-  const auto &t = tensors.at(inputs[1]);
-  const auto &e = tensors.at(inputs[2]);
+  const auto &t = tensors.at(inputs.at(1));
+  const auto &e = tensors.at(inputs.at(2));
    if (t->type != e->type)
      return false;
  
@@ -50,9 +50,9 @@ CircleNode *CircleSelectV2GraphBuilder::build_node(const circle::OperatorT &,
                                                     loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleSelectV2>();
-  node->condition(inputs[0]);
-  node->t(inputs[1]);
-  node->e(inputs[2]);
+  node->condition(inputs.at(0));
+  node->t(inputs.at(1));
+  node->e(inputs.at(2));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleShape.cpp b/compiler/luci/import/src/Nodes/CircleShape.cpp

index 864b5eb..bd7dfc9 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleShape.cpp
+++ b/compiler/luci/import/src/Nodes/CircleShape.cpp
@@ -42,7 +42,7 @@ CircleNode *CircleShapeGraphBuilder::build_node(const circle::OperatorT &op,
                                                  loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleShape>();
-  node->input(inputs[0]);
+  node->input(inputs.at(0));
  
    const auto *options = op.builtin_options.AsShapeOptions();
    node->out_type(luci_datatype(options->out_type));
diff --git a/compiler/luci/import/src/Nodes/CircleSin.cpp b/compiler/luci/import/src/Nodes/CircleSin.cpp

index 61d60c7..4b245ef 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSin.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSin.cpp
@@ -33,7 +33,7 @@ bool CircleSinGraphBuilder::validate(const ValidateArgs &args) const
  
    // input type check
    const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs[0]);
+  const auto &tensor = tensors.at(inputs.at(0));
    switch (tensor->type)
    {
      case circle::TensorType_FLOAT16:
@@ -53,7 +53,7 @@ CircleNode *CircleSinGraphBuilder::build_node(const circle::OperatorT &,
                                                loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleSin>();
-  node->x(inputs[0]);
+  node->x(inputs.at(0));
  
    // No options for Sin
  
diff --git a/compiler/luci/import/src/Nodes/CircleSlice.cpp b/compiler/luci/import/src/Nodes/CircleSlice.cpp

index 313c355..8601fbf 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSlice.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSlice.cpp
@@ -42,9 +42,9 @@ CircleNode *CircleSliceGraphBuilder::build_node(const circle::OperatorT &,
                                                  loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleSlice>();
-  node->input(inputs[0]);
-  node->begin(inputs[1]);
-  node->size(inputs[2]);
+  node->input(inputs.at(0));
+  node->begin(inputs.at(1));
+  node->size(inputs.at(2));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleSoftmax.cpp b/compiler/luci/import/src/Nodes/CircleSoftmax.cpp

index 0d316e1..0ef0b54 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSoftmax.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSoftmax.cpp
@@ -38,7 +38,7 @@ CircleNode *CircleSoftmaxGraphBuilder::build_node(const circle::OperatorT &op,
                                                    loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleSoftmax>();
-  node->logits(inputs[0]);
+  node->logits(inputs.at(0));
  
    const auto *options = op.builtin_options.AsSoftmaxOptions();
    node->beta(options->beta);
diff --git a/compiler/luci/import/src/Nodes/CircleSpaceToBatchND.cpp b/compiler/luci/import/src/Nodes/CircleSpaceToBatchND.cpp

index f1361fb..c1d508e 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSpaceToBatchND.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSpaceToBatchND.cpp
@@ -33,7 +33,7 @@ bool CircleSpaceToBatchNDGraphBuilder::validate(const ValidateArgs &args) const
  
    // input 1 and 2 should have INT32/INT64 type
    const auto &tensors = args.reader.tensors();
-  const auto &tensor_1 = tensors.at(inputs[1]);
+  const auto &tensor_1 = tensors.at(inputs.at(1));
    switch (tensor_1->type)
    {
      case circle::TensorType_INT32:
@@ -42,7 +42,7 @@ bool CircleSpaceToBatchNDGraphBuilder::validate(const ValidateArgs &args) const
      default:
        return false;
    }
-  const auto &tensor_2 = tensors.at(inputs[2]);
+  const auto &tensor_2 = tensors.at(inputs.at(2));
    switch (tensor_2->type)
    {
      case circle::TensorType_INT32:
@@ -53,7 +53,7 @@ bool CircleSpaceToBatchNDGraphBuilder::validate(const ValidateArgs &args) const
    }
  
    // Only support input shape dimension 3 and 4 only
-  const auto &tensor_0 = tensors.at(inputs[0]);
+  const auto &tensor_0 = tensors.at(inputs.at(0));
    const auto t_0_s = tensor_0->shape.size();
    if (t_0_s != 3 && t_0_s != 4)
      return false;
@@ -68,9 +68,9 @@ CircleNode *CircleSpaceToBatchNDGraphBuilder::build_node(const circle::OperatorT
                                                           loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleSpaceToBatchND>();
-  node->input(inputs[0]);
-  node->block_shape(inputs[1]);
-  node->paddings(inputs[2]);
+  node->input(inputs.at(0));
+  node->block_shape(inputs.at(1));
+  node->paddings(inputs.at(2));
  
    // No options for SpaceToBatchND
  
diff --git a/compiler/luci/import/src/Nodes/CircleSpaceToDepth.cpp b/compiler/luci/import/src/Nodes/CircleSpaceToDepth.cpp

index b612c9a..8ccd55d 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSpaceToDepth.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSpaceToDepth.cpp
@@ -41,7 +41,7 @@ CircleNode *CircleSpaceToDepthGraphBuilder::build_node(const circle::OperatorT &
                                                         loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleSpaceToDepth>();
-  node->input(inputs[0]);
+  node->input(inputs.at(0));
  
    const auto *options = op.builtin_options.AsSpaceToDepthOptions();
    node->block_size(options->block_size);
diff --git a/compiler/luci/import/src/Nodes/CircleSparseToDense.cpp b/compiler/luci/import/src/Nodes/CircleSparseToDense.cpp

index bfe790f..26d575e 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSparseToDense.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSparseToDense.cpp
@@ -36,10 +36,10 @@ CircleNode *CircleSparseToDenseGraphBuilder::build_node(const circle::OperatorT
                                                          loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleSparseToDense>();
-  node->indices(inputs[0]);
-  node->output_shape(inputs[1]);
-  node->values(inputs[2]);
-  node->default_value(inputs[3]);
+  node->indices(inputs.at(0));
+  node->output_shape(inputs.at(1));
+  node->values(inputs.at(2));
+  node->default_value(inputs.at(3));
  
    const auto *options = op.builtin_options.AsSparseToDenseOptions();
    node->validate_indices(options->validate_indices);
diff --git a/compiler/luci/import/src/Nodes/CircleSqrt.cpp b/compiler/luci/import/src/Nodes/CircleSqrt.cpp

index 8a90f66..c8beaee 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSqrt.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSqrt.cpp
@@ -36,7 +36,7 @@ CircleNode *CircleSqrtGraphBuilder::build_node(const circle::OperatorT &,
                                                 loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleSqrt>();
-  node->x(inputs[0]);
+  node->x(inputs.at(0));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleSquare.cpp b/compiler/luci/import/src/Nodes/CircleSquare.cpp

index 8398548..b5ba048 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSquare.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSquare.cpp
@@ -33,7 +33,7 @@ bool CircleSquareGraphBuilder::validate(const ValidateArgs &args) const
    // bfloat16, half (float16), float32, float64, complex64, complex128
    // Currently, circle supports float16, float32, complex64
    const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs[0]);
+  const auto &tensor = tensors.at(inputs.at(0));
    switch (tensor->type)
    {
      case circle::TensorType_INT32:
@@ -55,7 +55,7 @@ CircleNode *CircleSquareGraphBuilder::build_node(const circle::OperatorT &,
                                                   loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleSquare>();
-  node->x(inputs[0]);
+  node->x(inputs.at(0));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleSquaredDifference.cpp b/compiler/luci/import/src/Nodes/CircleSquaredDifference.cpp

index 93ce959..6deae94 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSquaredDifference.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSquaredDifference.cpp
@@ -37,7 +37,7 @@ bool CircleSquaredDifferenceGraphBuilder::validate(const ValidateArgs &args) con
    // Inputs must be one of the following types
    // bfloat16, half(float16), float32, float64, int32, int64, complex64, complex128
    const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs[0]);
+  const auto &tensor = tensors.at(inputs.at(0));
    switch (tensor->type)
    {
      case circle::TensorType_FLOAT16:
@@ -53,11 +53,11 @@ bool CircleSquaredDifferenceGraphBuilder::validate(const ValidateArgs &args) con
    }
  
    // Input types must match
-  if (tensors.at(inputs[0])->type != tensors.at(inputs[1])->type)
+  if (tensors.at(inputs.at(0))->type != tensors.at(inputs.at(1))->type)
      return false;
  
    // Input and output types must match
-  if (tensors.at(inputs[0])->type != tensors.at(outputs[0])->type)
+  if (tensors.at(inputs.at(0))->type != tensors.at(outputs[0])->type)
      return false;
  
    return true;
@@ -68,8 +68,8 @@ CircleNode *CircleSquaredDifferenceGraphBuilder::build_node(const circle::Operat
                                                              loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleSquaredDifference>();
-  node->x(inputs[0]);
-  node->y(inputs[1]);
+  node->x(inputs.at(0));
+  node->y(inputs.at(1));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleSqueeze.cpp b/compiler/luci/import/src/Nodes/CircleSqueeze.cpp

index a5252d0..32792c2 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSqueeze.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSqueeze.cpp
@@ -38,7 +38,7 @@ CircleNode *CircleSqueezeGraphBuilder::build_node(const circle::OperatorT &op,
                                                    loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleSqueeze>();
-  node->input(inputs[0]);
+  node->input(inputs.at(0));
  
    const auto *options = op.builtin_options.AsSqueezeOptions();
    assert(options);
diff --git a/compiler/luci/import/src/Nodes/CircleStridedSlice.cpp b/compiler/luci/import/src/Nodes/CircleStridedSlice.cpp

index 95e4467..8f943a6 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleStridedSlice.cpp
+++ b/compiler/luci/import/src/Nodes/CircleStridedSlice.cpp
@@ -42,10 +42,10 @@ CircleNode *CircleStridedSliceGraphBuilder::build_node(const circle::OperatorT &
                                                         loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleStridedSlice>();
-  node->input(inputs[0]);
-  node->begin(inputs[1]);
-  node->end(inputs[2]);
-  node->strides(inputs[3]);
+  node->input(inputs.at(0));
+  node->begin(inputs.at(1));
+  node->end(inputs.at(2));
+  node->strides(inputs.at(3));
  
    const auto *options = op.builtin_options.AsStridedSliceOptions();
    node->begin_mask(options->begin_mask);
diff --git a/compiler/luci/import/src/Nodes/CircleSub.cpp b/compiler/luci/import/src/Nodes/CircleSub.cpp

index 968e9f5..9acf83d 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSub.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSub.cpp
@@ -39,8 +39,8 @@ CircleNode *CircleSubGraphBuilder::build_node(const circle::OperatorT &op,
                                                loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleSub>();
-  node->x(inputs[0]);
-  node->y(inputs[1]);
+  node->x(inputs.at(0));
+  node->y(inputs.at(1));
  
    const auto *options = op.builtin_options.AsSubOptions();
    node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
diff --git a/compiler/luci/import/src/Nodes/CircleSum.cpp b/compiler/luci/import/src/Nodes/CircleSum.cpp

index b4865de..bd3cb62 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSum.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSum.cpp
@@ -34,8 +34,8 @@ CircleNode *CircleSumGraphBuilder::build_node(const circle::OperatorT &op,
                                                loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleSum>();
-  node->input(inputs[0]);
-  node->reduction_indices(inputs[1]);
+  node->input(inputs.at(0));
+  node->reduction_indices(inputs.at(1));
  
    const auto *options = op.builtin_options.AsReducerOptions();
    node->keep_dims(options->keep_dims);
diff --git a/compiler/luci/import/src/Nodes/CircleTanh.cpp b/compiler/luci/import/src/Nodes/CircleTanh.cpp

index 8986378..018f570 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleTanh.cpp
+++ b/compiler/luci/import/src/Nodes/CircleTanh.cpp
@@ -28,21 +28,13 @@ bool CircleTanhGraphBuilder::validate(const ValidateArgs &args) const
    const auto &inputs = args.op.inputs;
    if (inputs.size() != 1)
      return false;
+  const auto &outputs = args.op.outputs;
+  if (outputs.size() != 1)
+    return false;
  
-  // Must be one of the following types
-  // bfloat16, half (float16), float32, float64, complex64, complex128
-  // Currently, circle supports float16, float32, complex64
    const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs[0]);
-  switch (tensor->type)
-  {
-    case circle::TensorType_FLOAT16:
-    case circle::TensorType_FLOAT32:
-    case circle::TensorType_COMPLEX64:
-      break;
-    default:
-      return false;
-  }
+  if (tensors.at(inputs.at(0))->type != tensors.at(outputs[0])->type)
+    return false;
  
    return true;
  }
@@ -52,7 +44,7 @@ CircleNode *CircleTanhGraphBuilder::build_node(const circle::OperatorT &,
                                                 loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleTanh>();
-  node->x(inputs[0]);
+  node->x(inputs.at(0));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleTile.cpp b/compiler/luci/import/src/Nodes/CircleTile.cpp

index 91054ce..bc6f320 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleTile.cpp
+++ b/compiler/luci/import/src/Nodes/CircleTile.cpp
@@ -34,10 +34,10 @@ bool CircleTileGraphBuilder::validate(const ValidateArgs &args) const
    if (outputs.size() != 1)
      return false;
  
-  // Multiples (inputs[1]) must be one of the following types
+  // Multiples (inputs.at(1)) must be one of the following types
    // int32, int64
    const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs[1]);
+  const auto &tensor = tensors.at(inputs.at(1));
    switch (tensor->type)
    {
      case circle::TensorType_INT32:
@@ -48,7 +48,7 @@ bool CircleTileGraphBuilder::validate(const ValidateArgs &args) const
    }
  
    // Type of input and output must be the same
-  if (tensors.at(inputs[0])->type != tensors.at(outputs[0])->type)
+  if (tensors.at(inputs.at(0))->type != tensors.at(outputs[0])->type)
      return false;
  
    return true;
@@ -59,8 +59,8 @@ CircleNode *CircleTileGraphBuilder::build_node(const circle::OperatorT &,
                                                 loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleTile>();
-  node->input(inputs[0]);
-  node->multiples(inputs[1]);
+  node->input(inputs.at(0));
+  node->multiples(inputs.at(1));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleTopKV2.cpp b/compiler/luci/import/src/Nodes/CircleTopKV2.cpp

index 5c1051c..f0677de 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleTopKV2.cpp
+++ b/compiler/luci/import/src/Nodes/CircleTopKV2.cpp
@@ -36,7 +36,7 @@ bool CircleTopKV2GraphBuilder::validate(const ValidateArgs &args) const
      return false;
  
    const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs[1]);
+  const auto &tensor = tensors.at(inputs.at(1));
    if (tensor->type != circle::TensorType_INT32)
      return false;
  
diff --git a/compiler/luci/import/src/Nodes/CircleTranspose.cpp b/compiler/luci/import/src/Nodes/CircleTranspose.cpp

index 8622c8b..cc31530 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleTranspose.cpp
+++ b/compiler/luci/import/src/Nodes/CircleTranspose.cpp
@@ -39,8 +39,8 @@ CircleNode *CircleTransposeGraphBuilder::build_node(const circle::OperatorT &op,
                                                      loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleTranspose>();
-  node->a(inputs[0]);
-  node->perm(inputs[1]);
+  node->a(inputs.at(0));
+  node->perm(inputs.at(1));
  
    const auto *options = op.builtin_options.AsTransposeOptions();
    (void)options;
diff --git a/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp b/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp

index 7bdf46d..ddb1966 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp
+++ b/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp
@@ -30,6 +30,24 @@ bool CircleTransposeConvGraphBuilder::validate(const ValidateArgs &args) const
    if (args.op.inputs.size() != 3)
      return false;
  
+  const auto &inputs = args.op.inputs;
+  const auto &tensors = args.reader.tensors();
+  const auto &filter_tensor = tensors.at(inputs.at(1));
+  const auto &filter_shape = filter_tensor.get()->shape;
+  const auto &ifm_tensor = tensors.at(inputs.at(2));
+  const auto &ifm_shape = ifm_tensor.get()->shape;
+
+  // ifm and filters must be 4-D tensor
+  if (ifm_shape.size() != 4)
+    return false;
+  if (filter_shape.size() != 4)
+    return false;
+
+  // input shape : [batch, height, width, in_channels]
+  // filters shape : [output_channels, height, weight, in_channels]
+  if (ifm_tensor.get()->shape.at(3) != filter_tensor.get()->shape.at(3))
+    return false;
+
    return true;
  }
  
@@ -39,9 +57,9 @@ CircleNode *CircleTransposeConvGraphBuilder::build_node(const circle::OperatorT
  {
    auto *node = graph->nodes()->create<CircleTransposeConv>();
  
-  node->inputSizes(inputs[0]);
-  node->filter(inputs[1]);
-  node->outBackprop(inputs[2]);
+  node->inputSizes(inputs.at(0));
+  node->filter(inputs.at(1));
+  node->outBackprop(inputs.at(2));
  
    const auto *options = op.builtin_options.AsTransposeConvOptions();
    node->padding(luci_padding(options->padding));
diff --git a/compiler/luci/import/src/Nodes/CircleUnique.cpp b/compiler/luci/import/src/Nodes/CircleUnique.cpp

new file mode 100644 (file)

index 0000000..5e79a29
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleUnique.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleUnique.h"
+
+#include <luci/IR/Nodes/CircleUnique.h>
+#include <luci/IR/Nodes/CircleUniqueOut.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CircleUniqueGraphBuilder::validate(const ValidateArgs &args) const
+{
+  if (args.op.inputs.size() != 1)
+    return false;
+
+  if (args.op.outputs.size() != 2)
+    return false;
+
+  return true;
+}
+
+void CircleUniqueGraphBuilder::build(const circle::OperatorT &op,
+                                     GraphBuilderContext *context) const
+{
+  assert(context != nullptr);
+
+  auto graph = context->graph();
+
+  const std::vector<int32_t> &inputs = op.inputs;
+  const std::vector<int32_t> &outputs = op.outputs;
+  const auto &tensors = context->reader()->tensors();
+  auto tensors_ptr = context->reader()->tensors_ptr();
+  assert(tensors_ptr != nullptr);
+
+  std::vector<CircleNode *> input_nodes;
+  for (const int32_t input_tensor_index : inputs)
+  {
+    input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
+  }
+
+  // Create CircleUnique
+  auto node = graph->nodes()->create<CircleUnique>();
+  node->input(input_nodes[0]);
+
+  const auto *options = op.builtin_options.AsUniqueOptions();
+  node->output_type(luci_datatype(options->idx_out_type));
+
+  assert(int32_t(outputs.size()) == 2);
+  // Let's use name of output 0 as Unique name
+  const circle::TensorT &output_tensor = *tensors[outputs[0]];
+  node->name(tensor_name(output_tensor));
+
+  // Create virtual outputs of Unique
+  for (int32_t n = 0; n < 2; ++n)
+  {
+    const circle::TensorT &output_tensor = *tensors[outputs[n]];
+
+    auto *nodeout = graph->nodes()->create<CircleUniqueOut>();
+    copy_tensor_attributes(output_tensor, nodeout);
+    // mark shape_status
+    if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
+      nodeout->shape_status(ShapeStatus::NOSHAPE);
+    else
+      nodeout->shape_status(ShapeStatus::VALID);
+
+    nodeout->input(node);
+    nodeout->index(n);
+
+    context->nodefinder()->enroll(outputs[n], nodeout);
+  }
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleUnpack.cpp b/compiler/luci/import/src/Nodes/CircleUnpack.cpp

index c4282e2..9e7f3d3 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleUnpack.cpp
+++ b/compiler/luci/import/src/Nodes/CircleUnpack.cpp
@@ -59,7 +59,7 @@ bool CircleUnpackGraphBuilder::validate(const ValidateArgs &args) const
      return false;
  
    const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs[0]);
+  const auto &tensor = tensors.at(inputs.at(0));
    const auto &shape = tensor->shape;
    auto shape_size = static_cast<int32_t>(shape.size());
    if (shape_size > 0)
diff --git a/compiler/luci/import/src/Nodes/CircleWhere.cpp b/compiler/luci/import/src/Nodes/CircleWhere.cpp

index a13c4d6..f4c5f0c 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleWhere.cpp
+++ b/compiler/luci/import/src/Nodes/CircleWhere.cpp
@@ -35,7 +35,7 @@ bool CircleWhereGraphBuilder::validate(const ValidateArgs &args) const
      return false;
  
    const auto &tensors = args.reader.tensors();
-  const auto &tensor_condition = tensors.at(inputs[0]);
+  const auto &tensor_condition = tensors.at(inputs.at(0));
    const auto &tensor_out = tensors.at(outputs[0]);
  
    if (tensor_condition->type != circle::TensorType_BOOL)
@@ -52,7 +52,7 @@ CircleNode *CircleWhereGraphBuilder::build_node(const circle::OperatorT &,
                                                  loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleWhere>();
-  node->condition(inputs[0]);
+  node->condition(inputs.at(0));
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleZerosLike.cpp b/compiler/luci/import/src/Nodes/CircleZerosLike.cpp

index 4362925..e60424d 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleZerosLike.cpp
+++ b/compiler/luci/import/src/Nodes/CircleZerosLike.cpp
@@ -39,7 +39,7 @@ CircleNode *CircleZerosLikeGraphBuilder::build_node(const circle::OperatorT &,
                                                      loco::Graph *graph) const
  {
    auto *node = graph->nodes()->create<CircleZerosLike>();
-  node->input(inputs[0]);
+  node->input(inputs.at(0));
  
    // ZerosLikeOptinos are empty
  
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodes.h b/compiler/luci/lang/include/luci/IR/CircleNodes.h

index 3b31149..e57f5bb 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/CircleNodes.h
+++ b/compiler/luci/lang/include/luci/IR/CircleNodes.h
@@ -70,10 +70,12 @@
  #include "Nodes/CircleMirrorPad.h"
  #include "Nodes/CircleMul.h"
  #include "Nodes/CircleNeg.h"
+#include "Nodes/CircleNonMaxSuppressionV4.h"
  #include "Nodes/CircleNotEqual.h"
  #include "Nodes/CircleOneHot.h"
  #include "Nodes/CirclePack.h"
  #include "Nodes/CirclePad.h"
+#include "Nodes/CirclePadV2.h"
  #include "Nodes/CirclePow.h"
  #include "Nodes/CirclePRelu.h"
  #include "Nodes/CircleRange.h"
@@ -117,6 +119,7 @@
  #include "Nodes/CircleTopKV2.h"
  #include "Nodes/CircleTranspose.h"
  #include "Nodes/CircleTransposeConv.h"
+#include "Nodes/CircleUnique.h"
  #include "Nodes/CircleUnpack.h"
  #include "Nodes/CircleWhere.h"
  #include "Nodes/CircleWhile.h"
@@ -130,7 +133,9 @@
  #include "Nodes/CircleOutput.h"
  #include "Nodes/CircleCustomOut.h"
  #include "Nodes/CircleIfOut.h"
+#include "Nodes/CircleNonMaxSuppressionV4Out.h"
  #include "Nodes/CircleUnpackOut.h"
+#include "Nodes/CircleUniqueOut.h"
  #include "Nodes/CircleSplitOut.h"
  #include "Nodes/CircleSplitVOut.h"
  #include "Nodes/CircleTopKV2Out.h"
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodes.lst b/compiler/luci/lang/include/luci/IR/CircleNodes.lst

index 488dcfb..8010518 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/CircleNodes.lst
+++ b/compiler/luci/lang/include/luci/IR/CircleNodes.lst
@@ -22,7 +22,6 @@ CIRCLE_NODE(BATCHMATMUL, luci::CircleBatchMatMul)
  CIRCLE_NODE(CAST, luci::CircleCast)
  CIRCLE_NODE(CEIL, luci::CircleCeil)
  CIRCLE_NODE(CONCATENATION, luci::CircleConcatenation)
-CIRCLE_NODE(CONST, luci::CircleConst)
  CIRCLE_NODE(CONV_2D, luci::CircleConv2D)
  CIRCLE_NODE(COS, luci::CircleCos)
  CIRCLE_NODE(CUSTOM, luci::CircleCustom)
@@ -64,10 +63,12 @@ CIRCLE_NODE(MINIMUM, luci::CircleMinimum)
  CIRCLE_NODE(MIRROR_PAD, luci::CircleMirrorPad)
  CIRCLE_NODE(MUL, luci::CircleMul)
  CIRCLE_NODE(NEG, luci::CircleNeg)
+CIRCLE_NODE(NON_MAX_SUPPRESSION_V4, luci::CircleNonMaxSuppressionV4)
  CIRCLE_NODE(NOT_EQUAL, luci::CircleNotEqual)
  CIRCLE_NODE(ONE_HOT, luci::CircleOneHot)
  CIRCLE_NODE(PACK, luci::CirclePack)
  CIRCLE_NODE(PAD, luci::CirclePad)
+CIRCLE_NODE(PADV2, luci::CirclePadV2)
  CIRCLE_NODE(POW, luci::CirclePow)
  CIRCLE_NODE(PRELU, luci::CirclePRelu)
  CIRCLE_NODE(RANGE, luci::CircleRange)
@@ -111,6 +112,7 @@ CIRCLE_NODE(TILE, luci::CircleTile)
  CIRCLE_NODE(TOPK_V2, luci::CircleTopKV2)
  CIRCLE_NODE(TRANSPOSE, luci::CircleTranspose)
  CIRCLE_NODE(TRANSPOSE_CONV, luci::CircleTransposeConv)
+CIRCLE_NODE(UNIQUE, luci::CircleUnique)
  CIRCLE_NODE(UNPACK, luci::CircleUnpack)
  CIRCLE_NODE(WHERE, luci::CircleWhere)
  CIRCLE_NODE(WHILE, luci::CircleWhile)
@@ -120,14 +122,17 @@ CIRCLE_NODE(BCQ_FULLY_CONNECTED, luci::CircleBCQFullyConnected)
  CIRCLE_NODE(BCQ_GATHER, luci::CircleBCQGather)
  CIRCLE_NODE(INSTANCE_NORM, luci::CircleInstanceNorm)
  // Virtual node(s)
+CIRCLE_NODE(CIRCLECONST, luci::CircleConst)
  CIRCLE_NODE(CIRCLEINPUT, luci::CircleInput)
  CIRCLE_NODE(CIRCLEOUTPUT, luci::CircleOutput)
  CIRCLE_NODE(CIRCLEOUTPUTDUMMY, luci::CircleOutputDummy)
  CIRCLE_NODE(CIRCLEOUTPUTEXCLUDE, luci::CircleOutputExclude)
  CIRCLE_NODE(CIRCLECUSTOMOUT, luci::CircleCustomOut)
  CIRCLE_NODE(CIRCLEIFOUT, luci::CircleIfOut)
+CIRCLE_NODE(CIRCLENONMAXSUPPRESSIONV4OUT, luci::CircleNonMaxSuppressionV4Out)
  CIRCLE_NODE(CIRCLESPLITOUT, luci::CircleSplitOut)
  CIRCLE_NODE(CIRCLESPLITVOUT, luci::CircleSplitVOut)
  CIRCLE_NODE(CIRCLETOPKV2OUT, luci::CircleTopKV2Out)
+CIRCLE_NODE(CIRCLEUNIQUEOUT, luci::CircleUniqueOut)
  CIRCLE_NODE(CIRCLEUNPACKOUT, luci::CircleUnpackOut)
  CIRCLE_NODE(CIRCLEWHILEOUT, luci::CircleWhileOut)
diff --git a/compiler/luci/lang/include/luci/IR/CircleQuantParam.h b/compiler/luci/lang/include/luci/IR/CircleQuantParam.h

index 7253e65..6944373 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/CircleQuantParam.h
+++ b/compiler/luci/lang/include/luci/IR/CircleQuantParam.h
@@ -29,6 +29,7 @@ struct CircleQuantParam
    std::vector<float> max;
    std::vector<float> scale;
    std::vector<int64_t> zerop;
+  int32_t quantized_dimension{0};
  };
  
  } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleConst.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleConst.h

index fc67174..2502820 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleConst.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleConst.h
@@ -31,7 +31,7 @@ namespace luci
   * @brief Class to build tensor data
   * @note  This will not be exported as a specific op
   */
-class CircleConst final : public FixedArityNode<0, CircleNodeImpl<CircleOpcode::CONST>>
+class CircleConst final : public FixedArityNode<0, CircleNodeImpl<CircleOpcode::CIRCLECONST>>
  {
  public:
    CircleConst() = default;
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV4.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV4.h

new file mode 100644 (file)

index 0000000..69f3368
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV4.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_NON_MAX_SUPPRESSION_V4_H__
+#define __LUCI_IR_CIRCLE_NON_MAX_SUPPRESSION_V4_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief NON_MAX_SUPPRESSION_V4 in Circle
+ */
+class CircleNonMaxSuppressionV4 final
+    : public FixedArityNode<5, CircleNodeImpl<CircleOpcode::NON_MAX_SUPPRESSION_V4>>
+{
+public:
+  loco::Node *boxes(void) const { return at(0)->node(); }
+  void boxes(loco::Node *node) { at(0)->node(node); }
+
+  loco::Node *scores(void) const { return at(1)->node(); }
+  void scores(loco::Node *node) { at(1)->node(node); }
+
+  loco::Node *max_output_size(void) const { return at(2)->node(); }
+  void max_output_size(loco::Node *node) { at(2)->node(node); }
+
+  loco::Node *iou_threshold(void) const { return at(3)->node(); }
+  void iou_threshold(loco::Node *node) { at(3)->node(node); }
+
+  loco::Node *score_threshold(void) const { return at(4)->node(); }
+  void score_threshold(loco::Node *node) { at(4)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_NON_MAX_SUPPRESSION_V4_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV4Out.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV4Out.h

new file mode 100644 (file)

index 0000000..a24dc3e
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV4Out.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_NONMAXSUPPRESSIONV4OUT_H__
+#define __LUCI_IR_CIRCLE_NONMAXSUPPRESSIONV4OUT_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief Virtual NONMAXSUPPRESSIONV4OUT in Circle
+ */
+class CircleNonMaxSuppressionV4Out final
+    : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLENONMAXSUPPRESSIONV4OUT>>
+{
+public:
+  CircleNonMaxSuppressionV4Out() = default;
+
+public:
+  loco::Node *input(void) const { return at(0)->node(); }
+  void input(loco::Node *node) { at(0)->node(node); }
+
+public:
+  int32_t index(void) const { return _index; }
+  void index(int32_t index) { _index = index; }
+
+private:
+  int32_t _index{-1};
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_NONMAXSUPPRESSIONV4OUT_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CirclePadV2.h b/compiler/luci/lang/include/luci/IR/Nodes/CirclePadV2.h

new file mode 100644 (file)

index 0000000..563cfd9
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CirclePadV2.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLEPADV2_H__
+#define __LUCI_IR_CIRCLEPADV2_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief PADV2 in Circle
+ */
+class CirclePadV2 final : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::PADV2>>
+{
+public:
+  CirclePadV2() = default;
+
+public:
+  loco::Node *input(void) const { return at(0)->node(); }
+  void input(loco::Node *node) { at(0)->node(node); }
+
+  loco::Node *paddings(void) const { return at(1)->node(); }
+  void paddings(loco::Node *node) { at(1)->node(node); }
+
+  loco::Node *constant_values(void) const { return at(2)->node(); }
+  void constant_values(loco::Node *node) { at(2)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLEPADV2_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleUnique.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleUnique.h

new file mode 100644 (file)

index 0000000..719a723
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleUnique.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCELUNIQUE_H__
+#define __LUCI_IR_CIRCELUNIQUE_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief Unique in Circle
+ */
+class CircleUnique final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::UNIQUE>>
+{
+public:
+  loco::Node *input(void) const { return at(0)->node(); }
+  void input(loco::Node *node) { at(0)->node(node); }
+
+public:
+  loco::DataType idx_out_type(void) const { return _idx_out_type; }
+  void output_type(loco::DataType ot) { _idx_out_type = ot; }
+
+private:
+  loco::DataType _idx_out_type{loco::DataType::S32};
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCELUNIQUE_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleUniqueOut.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleUniqueOut.h

new file mode 100644 (file)

index 0000000..f846403
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleUniqueOut.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_UNIQUEOUT_H__
+#define __LUCI_IR_CIRCLE_UNIQUEOUT_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief Virtual CIRCLEUNIQUEOUT in Circle
+ */
+class CircleUniqueOut final
+    : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLEUNIQUEOUT>>
+{
+public:
+  CircleUniqueOut() = default;
+
+public:
+  loco::Node *input(void) const { return at(0)->node(); }
+  void input(loco::Node *node) { at(0)->node(node); }
+
+public:
+  int32_t index(void) const { return _index; }
+  void index(int32_t index) { _index = index; }
+
+private:
+  int32_t _index{-1};
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_UNIQUEOUT_H__
diff --git a/compiler/luci/lang/src/Module.test.cpp b/compiler/luci/lang/src/Module.test.cpp

index 26bf073..a5973e5 100644 (file)
--- a/compiler/luci/lang/src/Module.test.cpp
+++ b/compiler/luci/lang/src/Module.test.cpp
@@ -22,7 +22,7 @@ TEST(ModuleTest, consturctor)
  {
    auto gs = luci::make_module();
  
-  GTEST_SUCCEED();
+  SUCCEED();
  }
  
  TEST(ModuleTest, add)
diff --git a/compiler/luci/lang/src/Nodes/CircleCustom.test.cpp b/compiler/luci/lang/src/Nodes/CircleCustom.test.cpp

index 74ea82c..c07268c 100644 (file)
--- a/compiler/luci/lang/src/Nodes/CircleCustom.test.cpp
+++ b/compiler/luci/lang/src/Nodes/CircleCustom.test.cpp
@@ -35,7 +35,12 @@ TEST(CircleCustomTest, constructor)
    ASSERT_EQ(0, custom_node.custom_code().size());
  }
  
-TEST(CircleCustomTest, constructor_NEG) { ASSERT_DEBUG_DEATH(luci::CircleCustom{0}, ""); }
+TEST(CircleCustomTest, constructor_NEG)
+{
+  ASSERT_DEBUG_DEATH(luci::CircleCustom{0}, "");
+
+  SUCCEED();
+}
  
  TEST(CircleCustomTest, invalidIndex_NEG)
  {
diff --git a/compiler/luci/lang/src/Nodes/CircleIf.test.cpp b/compiler/luci/lang/src/Nodes/CircleIf.test.cpp

index e3c8c9f..35f28e9 100644 (file)
--- a/compiler/luci/lang/src/Nodes/CircleIf.test.cpp
+++ b/compiler/luci/lang/src/Nodes/CircleIf.test.cpp
@@ -41,11 +41,15 @@ TEST(CircleIfTest, constructor)
  TEST(CircleIfTestDeath, invalid_arity_NEG)
  {
    ASSERT_DEBUG_DEATH(luci::CircleIf very_long_name_if_node(0, 1), "");
+
+  SUCCEED();
  }
  
  TEST(CircleIfTestDeath, invalid_output_count_NEG)
  {
    ASSERT_DEBUG_DEATH(luci::CircleIf if_node(2, 0), "");
+
+  SUCCEED();
  }
  
  TEST(CircleIfTestDeath, invalid_input_get_index_NEG)
diff --git a/compiler/luci/lang/src/Nodes/CircleNonMaxSuppressionV4.test.cpp b/compiler/luci/lang/src/Nodes/CircleNonMaxSuppressionV4.test.cpp

new file mode 100644 (file)

index 0000000..b25ce4d
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleNonMaxSuppressionV4.test.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleNonMaxSuppressionV4.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleNonMaxSuppressionV4Test, constructor)
+{
+  luci::CircleNonMaxSuppressionV4 nmsv4_node;
+
+  ASSERT_EQ(luci::CircleDialect::get(), nmsv4_node.dialect());
+  ASSERT_EQ(luci::CircleOpcode::NON_MAX_SUPPRESSION_V4, nmsv4_node.opcode());
+
+  ASSERT_EQ(nullptr, nmsv4_node.boxes());
+  ASSERT_EQ(nullptr, nmsv4_node.scores());
+  ASSERT_EQ(nullptr, nmsv4_node.max_output_size());
+  ASSERT_EQ(nullptr, nmsv4_node.iou_threshold());
+  ASSERT_EQ(nullptr, nmsv4_node.score_threshold());
+}
+
+TEST(CircleNonMaxSuppressionV4Test, input_NEG)
+{
+  luci::CircleNonMaxSuppressionV4 nmsv4_node;
+  luci::CircleNonMaxSuppressionV4 node;
+
+  nmsv4_node.boxes(&node);
+  nmsv4_node.scores(&node);
+  nmsv4_node.max_output_size(&node);
+  nmsv4_node.iou_threshold(&node);
+  nmsv4_node.score_threshold(&node);
+  ASSERT_NE(nullptr, nmsv4_node.boxes());
+  ASSERT_NE(nullptr, nmsv4_node.scores());
+  ASSERT_NE(nullptr, nmsv4_node.max_output_size());
+  ASSERT_NE(nullptr, nmsv4_node.iou_threshold());
+  ASSERT_NE(nullptr, nmsv4_node.score_threshold());
+
+  nmsv4_node.boxes(nullptr);
+  nmsv4_node.scores(nullptr);
+  nmsv4_node.max_output_size(nullptr);
+  nmsv4_node.iou_threshold(nullptr);
+  nmsv4_node.score_threshold(nullptr);
+  ASSERT_EQ(nullptr, nmsv4_node.boxes());
+  ASSERT_EQ(nullptr, nmsv4_node.scores());
+  ASSERT_EQ(nullptr, nmsv4_node.max_output_size());
+  ASSERT_EQ(nullptr, nmsv4_node.iou_threshold());
+  ASSERT_EQ(nullptr, nmsv4_node.score_threshold());
+}
+
+TEST(CircleNonMaxSuppressionV4Test, arity_NEG)
+{
+  luci::CircleNonMaxSuppressionV4 nmsv4_node;
+
+  ASSERT_NO_THROW(nmsv4_node.arg(4));
+  ASSERT_THROW(nmsv4_node.arg(5), std::out_of_range);
+}
+
+TEST(CircleNonMaxSuppressionV4Test, visit_mutable_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeMutableVisitor<void>
+  {
+  };
+
+  luci::CircleNonMaxSuppressionV4 nmsv4_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(nmsv4_node.accept(&tv), std::exception);
+}
+
+TEST(CircleNonMaxSuppressionV4Test, visit_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeVisitor<void>
+  {
+  };
+
+  luci::CircleNonMaxSuppressionV4 nmsv4_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(nmsv4_node.accept(&tv), std::exception);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleNonMaxSuppressionV4Out.test.cpp b/compiler/luci/lang/src/Nodes/CircleNonMaxSuppressionV4Out.test.cpp

new file mode 100644 (file)

index 0000000..c6cef4e
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleNonMaxSuppressionV4Out.test.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleNonMaxSuppressionV4Out.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleNonMaxSuppressionV4OutTest, constructor)
+{
+  luci::CircleNonMaxSuppressionV4Out vout_node;
+
+  ASSERT_EQ(luci::CircleDialect::get(), vout_node.dialect());
+  ASSERT_EQ(luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV4OUT, vout_node.opcode());
+
+  ASSERT_EQ(nullptr, vout_node.input());
+  ASSERT_EQ(-1, vout_node.index());
+}
diff --git a/compiler/luci/lang/src/Nodes/CirclePadV2.test.cpp b/compiler/luci/lang/src/Nodes/CirclePadV2.test.cpp

new file mode 100644 (file)

index 0000000..e09d517
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CirclePadV2.test.cpp
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CirclePadV2.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+TEST(CirclePadV2Test, constructor_P)
+{
+  luci::CirclePadV2 node;
+
+  ASSERT_EQ(luci::CircleDialect::get(), node.dialect());
+  ASSERT_EQ(luci::CircleOpcode::PADV2, node.opcode());
+
+  ASSERT_EQ(nullptr, node.input());
+  ASSERT_EQ(nullptr, node.paddings());
+  ASSERT_EQ(nullptr, node.constant_values());
+}
+
+TEST(CirclePadV2Test, input_NEG)
+{
+  luci::CirclePadV2 pad_node;
+  luci::CirclePadV2 node;
+
+  pad_node.input(&node);
+  pad_node.paddings(&node);
+  pad_node.constant_values(&node);
+  ASSERT_NE(nullptr, pad_node.input());
+  ASSERT_NE(nullptr, pad_node.paddings());
+  ASSERT_NE(nullptr, pad_node.constant_values());
+
+  pad_node.input(nullptr);
+  pad_node.paddings(nullptr);
+  pad_node.constant_values(nullptr);
+  ASSERT_EQ(nullptr, pad_node.input());
+  ASSERT_EQ(nullptr, pad_node.paddings());
+  ASSERT_EQ(nullptr, pad_node.constant_values());
+}
+
+TEST(CirclePadV2Test, arity_NEG)
+{
+  luci::CirclePadV2 pad_node;
+
+  ASSERT_NO_THROW(pad_node.arg(2));
+  ASSERT_THROW(pad_node.arg(3), std::out_of_range);
+}
+
+TEST(CirclePadV2Test, visit_mutable_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeMutableVisitor<void>
+  {
+  };
+
+  luci::CirclePadV2 pad_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(pad_node.accept(&tv), std::exception);
+}
+
+TEST(CirclePadV2Test, visit_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeVisitor<void>
+  {
+  };
+
+  luci::CirclePadV2 pad_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(pad_node.accept(&tv), std::exception);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleUnique.test.cpp b/compiler/luci/lang/src/Nodes/CircleUnique.test.cpp

new file mode 100644 (file)

index 0000000..517ee97
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleUnique.test.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleUnique.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleUniqueTest, constructor)
+{
+  luci::CircleUnique unique_node;
+
+  ASSERT_EQ(luci::CircleDialect::get(), unique_node.dialect());
+  ASSERT_EQ(luci::CircleOpcode::UNIQUE, unique_node.opcode());
+
+  ASSERT_EQ(nullptr, unique_node.input());
+}
+
+TEST(CircleUniqueTest, input_NEG)
+{
+  luci::CircleUnique unique_node;
+  luci::CircleUnique node;
+
+  unique_node.input(&node);
+  ASSERT_NE(nullptr, unique_node.input());
+
+  unique_node.input(nullptr);
+  ASSERT_EQ(nullptr, unique_node.input());
+}
+
+TEST(CircleUniqueTest, arity_NEG)
+{
+  luci::CircleUnique unique_node;
+
+  ASSERT_NO_THROW(unique_node.arg(0));
+  ASSERT_THROW(unique_node.arg(1), std::out_of_range);
+}
+
+TEST(CircleUniqueTest, visit_mutable_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeMutableVisitor<void>
+  {
+  };
+
+  luci::CircleUnique unique_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(unique_node.accept(&tv), std::exception);
+}
+
+TEST(CircleUniqueTest, visit_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeVisitor<void>
+  {
+  };
+
+  luci::CircleUnique unique_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(unique_node.accept(&tv), std::exception);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleWhile.test.cpp b/compiler/luci/lang/src/Nodes/CircleWhile.test.cpp

index 19290c0..913686f 100644 (file)
--- a/compiler/luci/lang/src/Nodes/CircleWhile.test.cpp
+++ b/compiler/luci/lang/src/Nodes/CircleWhile.test.cpp
@@ -41,11 +41,15 @@ TEST(CircleWhileTest, constructor)
  TEST(CircleWhileTestDeath, invalid_arity_NEG)
  {
    ASSERT_DEBUG_DEATH(luci::CircleWhile very_long_name_while_node(0, 1), "");
+
+  SUCCEED();
  }
  
  TEST(CircleWhileTestDeath, invalid_output_count_NEG)
  {
    ASSERT_DEBUG_DEATH(luci::CircleWhile while_node(2, 0), "");
+
+  SUCCEED();
  }
  
  TEST(CircleWhileTestDeath, invalid_input_get_index_NEG)
diff --git a/compiler/luci/logex/src/FormattedGraph.cpp b/compiler/luci/logex/src/FormattedGraph.cpp

index 4725ee3..f04a418 100644 (file)
--- a/compiler/luci/logex/src/FormattedGraph.cpp
+++ b/compiler/luci/logex/src/FormattedGraph.cpp
@@ -244,6 +244,7 @@ private:
    IMPLEMENT(luci::CircleMirrorPad)
    IMPLEMENT(luci::CircleMul)
    IMPLEMENT(luci::CircleNeg)
+  IMPLEMENT(luci::CircleNonMaxSuppressionV4)
    IMPLEMENT(luci::CircleNotEqual)
    IMPLEMENT(luci::CircleOneHot)
    IMPLEMENT(luci::CirclePack)
@@ -291,6 +292,7 @@ private:
    IMPLEMENT(luci::CircleTopKV2)
    IMPLEMENT(luci::CircleTranspose)
    IMPLEMENT(luci::CircleTransposeConv)
+  IMPLEMENT(luci::CircleUnique)
    IMPLEMENT(luci::CircleUnpack)
    IMPLEMENT(luci::CircleWhere)
    IMPLEMENT(luci::CircleWhile)
@@ -303,9 +305,11 @@ private:
    IMPLEMENT(luci::CircleInput)
    IMPLEMENT(luci::CircleOutput)
    IMPLEMENT(luci::CircleIfOut)
+  IMPLEMENT(luci::CircleNonMaxSuppressionV4Out)
    IMPLEMENT(luci::CircleSplitOut)
    IMPLEMENT(luci::CircleSplitVOut)
    IMPLEMENT(luci::CircleTopKV2Out)
+  IMPLEMENT(luci::CircleUniqueOut)
    IMPLEMENT(luci::CircleUnpackOut)
    IMPLEMENT(luci::CircleWhileOut)
  #undef IMPLEMENT
@@ -823,6 +827,19 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleNeg *node, locop::NodeS
    return use_x(tbl(), node, s);
  }
  
+bool CircleNodeSummaryBuilder::summary(const luci::CircleNonMaxSuppressionV4 *node,
+                                       locop::NodeSummary &s) const
+{
+  s.args().append("boxes", pepper::str(node->boxes()));
+  s.args().append("scores", pepper::str(node->scores()));
+  s.args().append("max_output_size", pepper::str(node->max_output_size()));
+  s.args().append("iou_threshold", pepper::str(node->iou_threshold()));
+  s.args().append("score_threshold", pepper::str(node->score_threshold()));
+
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
  bool CircleNodeSummaryBuilder::summary(const luci::CircleNotEqual *node,
                                         locop::NodeSummary &s) const
  {
@@ -1227,6 +1244,14 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleTransposeConv *node,
    return true;
  }
  
+bool CircleNodeSummaryBuilder::summary(const luci::CircleUnique *node, locop::NodeSummary &s) const
+{
+  s.args().append("input", tbl()->lookup(node->input()));
+  s.args().append("idx_out_type", to_str(node->idx_out_type()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
  bool CircleNodeSummaryBuilder::summary(const luci::CircleUnpack *node, locop::NodeSummary &s) const
  {
    s.args().append("value", tbl()->lookup(node->value()));
@@ -1293,6 +1318,16 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleTopKV2Out *node,
    return true;
  }
  
+bool CircleNodeSummaryBuilder::summary(const luci::CircleUniqueOut *node,
+                                       locop::NodeSummary &s) const
+{
+  s.args().append("unique", tbl()->lookup(node->input()));
+
+  s.state(locop::NodeSummary::State::Complete);
+
+  return true;
+}
+
  bool CircleNodeSummaryBuilder::summary(const luci::CircleUnpackOut *node,
                                         locop::NodeSummary &s) const
  {
@@ -1308,6 +1343,12 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleIfOut *node, locop::Nod
    return use_input(tbl(), node, s);
  }
  
+bool CircleNodeSummaryBuilder::summary(const luci::CircleNonMaxSuppressionV4Out *node,
+                                       locop::NodeSummary &s) const
+{
+  return use_input(tbl(), node, s);
+}
+
  bool CircleNodeSummaryBuilder::summary(const luci::CircleWhileOut *node,
                                         locop::NodeSummary &s) const
  {
diff --git a/compiler/luci/pass/src/CircleOptimizer.cpp b/compiler/luci/pass/src/CircleOptimizer.cpp

index 90fbe90..2edf7a9 100644 (file)
--- a/compiler/luci/pass/src/CircleOptimizer.cpp
+++ b/compiler/luci/pass/src/CircleOptimizer.cpp
@@ -145,7 +145,7 @@ void CircleOptimizer::quantize(loco::Graph *g) const
    {
      static const std::vector<std::string> fakeq_supported_input_dtype{"float32"};
      static const std::vector<std::string> fakeq_supported_output_dtype{"uint8"};
-    static const std::vector<std::string> fakeq_supported_granularity{"layer"};
+    static const std::vector<std::string> fakeq_supported_granularity{"layer", "channel"};
  
      auto input_dtype = _options->param(Options::AlgorithmParameters::Quantize_input_dtype);
      auto output_dtype = _options->param(Options::AlgorithmParameters::Quantize_output_dtype);
@@ -173,7 +173,7 @@ void CircleOptimizer::quantize(loco::Graph *g) const
    {
      static const std::vector<std::string> qwmm_supported_input_dtype{"float32"};
      static const std::vector<std::string> qwmm_supported_output_dtype{"uint8"};
-    static const std::vector<std::string> qwmm_supported_granularity{"layer"};
+    static const std::vector<std::string> qwmm_supported_granularity{"layer", "channel"};
  
      auto input_dtype = _options->param(Options::AlgorithmParameters::Quantize_input_dtype);
      auto output_dtype = _options->param(Options::AlgorithmParameters::Quantize_output_dtype);
diff --git a/compiler/luci/pass/src/FuseBCQPass.cpp b/compiler/luci/pass/src/FuseBCQPass.cpp

index b81db88..260de5b 100644 (file)
--- a/compiler/luci/pass/src/FuseBCQPass.cpp
+++ b/compiler/luci/pass/src/FuseBCQPass.cpp
@@ -53,6 +53,11 @@ const std::string node_name_prefix(luci::NodeName node_name)
      const auto index = prefix.find("Tensordot/");
      prefix = prefix.substr(0, index - 1);
    }
+  else if (prefix.find("/MatMul") != std::string::npos)
+  {
+    const auto index = prefix.find("/MatMul");
+    prefix = prefix.substr(0, index);
+  }
    else if (prefix.find("kernel/") != std::string::npos)
    {
      const auto index = prefix.find("kernel/");
@@ -67,14 +72,190 @@ const std::string node_name_prefix(luci::NodeName node_name)
    return prefix;
  }
  
+/**
+ * @brief Create CircleOutputExclude operation, which has same shape and dtype with
+ *        original circle_node.
+ */
+luci::CircleOutputExclude *createNoOp(luci::CircleNode *circle_node)
+{
+  auto graph = circle_node->graph();
+  auto noOp = graph->nodes()->create<luci::CircleOutputExclude>();
+
+  if (circle_node->shape_status() == luci::ShapeStatus::VALID)
+  {
+    noOp->dtype(circle_node->dtype());
+    noOp->rank(circle_node->rank());
+    for (uint32_t i = 0; i < circle_node->rank(); ++i)
+      noOp->dim(i) = circle_node->dim(i);
+  }
+  else
+  {
+    // For type inference
+    noOp->dtype(loco::DataType::FLOAT32);
+  }
+
+  return noOp;
+};
+
  } // namespace
  
  namespace
  {
  
-class BCQConverter final
+// V means the version of BCQ.
+template <int32_t V> class BCQFuser;
+
+template <> class BCQFuser<1>
  {
  public:
+  bool fuseBCQ(loco::Graph *g)
+  {
+    bool changed = false;
+
+    for (auto node : loco::all_nodes(g))
+    {
+      if (auto circle_const = dynamic_cast<luci::CircleConst *>(node))
+      {
+        add_BCQ_info_node(circle_const);
+      }
+    }
+
+    if (!is_bcqinfo_valid())
+      return false;
+
+    for (auto node : loco::active_nodes(loco::output_nodes(g)))
+    {
+      if (auto gather = dynamic_cast<luci::CircleGather *>(node))
+      {
+        auto params = dynamic_cast<luci::CircleConst *>(gather->params());
+        if (params != nullptr && has_BCQ_info(params))
+        {
+          auto bcq_gather = g->nodes()->create<luci::CircleBCQGather>();
+
+          bcq_gather->op_version(1);
+          bcq_gather->input_scales(get_alpha(params));
+          bcq_gather->input_binary(get_packed_binary_code(params));
+          bcq_gather->indices(gather->indices());
+          bcq_gather->input_clusters(packed_clusters(params));
+
+          // input_binary shape : [output_size, hidden_size]
+          const auto binary_hidden_size =
+              loco::must_cast<luci::CircleConst *>(bcq_gather->input_binary())->dim(1).value() * 32;
+          bcq_gather->input_hidden_size(binary_hidden_size);
+
+          if (do_w_x(params))
+          {
+            bcq_gather->axis(gather->axis());
+          }
+          else
+          {
+            const auto axis_transpose = (gather->axis() == 0) ? 1 : 0;
+            bcq_gather->axis(axis_transpose);
+          }
+
+          loco::replace(gather).with(bcq_gather);
+
+          changed = true;
+        }
+      }
+      else if (auto fully_connected = dynamic_cast<luci::CircleFullyConnected *>(node))
+      {
+        auto weights = dynamic_cast<luci::CircleConst *>(fully_connected->weights());
+        if (weights != nullptr && has_BCQ_info(weights))
+        {
+          auto bcq_fc = g->nodes()->create<luci::CircleBCQFullyConnected>();
+
+          bcq_fc->op_version(1);
+          bcq_fc->weights_scales(get_alpha(weights));
+          bcq_fc->weights_binary(get_packed_binary_code(weights));
+          bcq_fc->bias(fully_connected->bias());
+          bcq_fc->weights_clusters(packed_clusters(weights));
+          bcq_fc->fusedActivationFunction(fully_connected->fusedActivationFunction());
+
+          loco::Node *bcq_input = fully_connected->input();
+          int32_t batch_rank = 0;
+
+          // If input of BCQFullyConnected has more than rank 2, we should reshape it as rank 2
+          const auto original_input = loco::must_cast<luci::CircleNode *>(fully_connected->input());
+          if (original_input->shape_status() == luci::ShapeStatus::VALID &&
+              original_input->rank() > 2)
+          {
+            auto new_shape = g->nodes()->create<luci::CircleConst>();
+            new_shape->dtype(loco::DataType::S32);
+            new_shape->size<loco::DataType::S32>(2);
+            new_shape->rank(1);
+            new_shape->dim(0) = 2;
+
+            auto batch_size = 1;
+            for (uint32_t i = 0; i < original_input->rank() - 1; ++i)
+              batch_size *= original_input->dim(i).value();
+
+            new_shape->at<loco::DataType::S32>(0) = batch_size;
+            new_shape->at<loco::DataType::S32>(1) =
+                original_input->dim(original_input->rank() - 1).value();
+            new_shape->shape_status(luci::ShapeStatus::VALID);
+
+            auto reshape = g->nodes()->create<luci::CircleReshape>();
+            reshape->tensor(original_input);
+            reshape->shape(new_shape);
+
+            bcq_input = reshape;
+            batch_rank = original_input->rank() - 2;
+          }
+
+          // If x_w formation, we should insert Transpose in front and back of BCQFullyConnected
+          if (do_w_x(weights))
+          {
+            const auto binary_hidden_size =
+                loco::must_cast<luci::CircleNode *>(fully_connected->input())
+                    ->dim(batch_rank)
+                    .value();
+            bcq_fc->weights_hidden_size(binary_hidden_size);
+            bcq_fc->input(bcq_input);
+            loco::replace(fully_connected).with(bcq_fc);
+          }
+          else
+          {
+            const auto binary_hidden_size =
+                loco::must_cast<luci::CircleNode *>(fully_connected->input())
+                    ->dim(1 + batch_rank)
+                    .value();
+            bcq_fc->weights_hidden_size(binary_hidden_size);
+
+            auto perm = g->nodes()->create<luci::CircleConst>();
+            perm->dtype(loco::DataType::S32);
+            perm->size<loco::DataType::S32>(2);
+            perm->rank(1);
+            perm->dim(0) = 2;
+            perm->at<loco::DataType::S32>(0) = 1;
+            perm->at<loco::DataType::S32>(1) = 0;
+            perm->shape_status(luci::ShapeStatus::VALID);
+
+            auto input_transpose = g->nodes()->create<luci::CircleTranspose>();
+            input_transpose->a(bcq_input);
+            input_transpose->perm(perm);
+
+            bcq_fc->input(input_transpose);
+
+            auto output_transpose = g->nodes()->create<luci::CircleTranspose>();
+            output_transpose->a(bcq_fc);
+            output_transpose->perm(perm);
+
+            loco::replace(fully_connected).with(output_transpose);
+          }
+
+          changed = true;
+        }
+      }
+    }
+
+    if (changed)
+      clear_BCQ_nodes();
+
+    return changed;
+  }
+
+private:
    void add_BCQ_info_node(luci::CircleConst *node)
    {
      const auto node_name = node->name();
@@ -119,16 +300,65 @@ public:
      return has_info;
    }
  
+  /**
+   * @brief Exclude BCQ information nodes which are used for fusing BCQ operations
+   *        from graph output by using CircleOutputExclude
+   */
+  void clear_BCQ_nodes()
+  {
+    auto clear_nodes = [](std::map<std::string, luci::CircleConst *> &nodes) {
+      for (auto &n : nodes)
+      {
+        auto node = n.second;
+
+        for (auto s : loco::succs(node))
+        {
+          if (auto outnode = dynamic_cast<luci::CircleOutput *>(s))
+          {
+            outnode->from(createNoOp(node));
+          }
+          else if (auto reshape_node = dynamic_cast<luci::CircleReshape *>(s))
+          {
+            for (auto o : loco::succs(reshape_node))
+            {
+              auto circle_output = loco::must_cast<luci::CircleOutput *>(o);
+              circle_output->from(createNoOp(reshape_node));
+            }
+          }
+        }
+      }
+    };
+
+    clear_nodes(_do_w_x);
+    clear_nodes(_alpha);
+    clear_nodes(_packed_binary_code);
+    clear_nodes(_number_of_clusters);
+    clear_nodes(_size_of_clusters);
+    clear_nodes(_qbits_of_clusters);
+    clear_nodes(_dequant_weight);
+  }
+
+  bool is_bcqinfo_valid()
+  {
+    // do_w_x should be int32 or bool type
+    for (auto n : _do_w_x)
+    {
+      if (n.second->dtype() != loco::DataType::BOOL && n.second->dtype() != loco::DataType::S32)
+        return false;
+    }
+
+    return true;
+  }
+
+private:
    bool do_w_x(luci::CircleConst *node)
    {
      const auto prefix = node_name_prefix(node->name());
  
      if (_do_w_x[prefix]->dtype() == loco::DataType::S32)
        return _do_w_x[prefix]->at<loco::DataType::S32>(0) == 1;
-    else if (_do_w_x[prefix]->dtype() == loco::DataType::BOOL)
-      return _do_w_x[prefix]->at<loco::DataType::BOOL>(0);
      else
-      throw std::runtime_error("do_w_x should be int or bool");
+      return _do_w_x[prefix]->at<loco::DataType::BOOL>(0);
    }
  
    luci::CircleConst *get_alpha(luci::CircleConst *node)
@@ -187,64 +417,6 @@ public:
      return packed_clusters;
    }
  
-  /**
-   * @brief Exclude BCQ information nodes which are used for fusing BCQ operations
-   *        from graph output by using CircleOutputExclude
-   */
-  void clear_BCQ_nodes()
-  {
-    auto createNoOp = [](luci::CircleNode *circle_node) {
-      auto graph = circle_node->graph();
-      auto noOp = graph->nodes()->create<luci::CircleOutputExclude>();
-
-      if (circle_node->shape_status() == luci::ShapeStatus::VALID)
-      {
-        noOp->dtype(circle_node->dtype());
-        noOp->rank(circle_node->rank());
-        for (uint32_t i = 0; i < circle_node->rank(); ++i)
-          noOp->dim(i) = circle_node->dim(i);
-      }
-      else
-      {
-        // For type inference
-        noOp->dtype(loco::DataType::FLOAT32);
-      }
-
-      return noOp;
-    };
-
-    auto clear_nodes = [createNoOp](std::map<std::string, luci::CircleConst *> &nodes) {
-      for (auto &n : nodes)
-      {
-        auto node = n.second;
-
-        for (auto s : loco::succs(node))
-        {
-          if (auto outnode = dynamic_cast<luci::CircleOutput *>(s))
-          {
-            outnode->from(createNoOp(node));
-          }
-          else if (auto reshape_node = dynamic_cast<luci::CircleReshape *>(s))
-          {
-            for (auto o : loco::succs(reshape_node))
-            {
-              auto circle_output = loco::must_cast<luci::CircleOutput *>(o);
-              circle_output->from(createNoOp(reshape_node));
-            }
-          }
-        }
-      }
-    };
-
-    clear_nodes(_do_w_x);
-    clear_nodes(_alpha);
-    clear_nodes(_packed_binary_code);
-    clear_nodes(_number_of_clusters);
-    clear_nodes(_size_of_clusters);
-    clear_nodes(_qbits_of_clusters);
-    clear_nodes(_dequant_weight);
-  }
-
  private:
    std::map<std::string, luci::CircleConst *> _do_w_x;
    std::map<std::string, luci::CircleConst *> _alpha;
@@ -262,143 +434,42 @@ namespace luci
  
  bool FuseBCQPass::run(loco::Graph *g)
  {
-  BCQConverter converter;
-
    bool changed = false;
  
+  // Find BCQ version information and check validity.
+  luci::CircleConst *version_node = nullptr;
    for (auto node : loco::all_nodes(g))
    {
      if (auto circle_const = dynamic_cast<luci::CircleConst *>(node))
      {
-      converter.add_BCQ_info_node(circle_const);
-    }
-  }
-
-  for (auto node : loco::active_nodes(loco::output_nodes(g)))
-  {
-    if (auto gather = dynamic_cast<luci::CircleGather *>(node))
-    {
-      auto params = dynamic_cast<luci::CircleConst *>(gather->params());
-      if (params != nullptr && converter.has_BCQ_info(params))
+      if (circle_const->name().find("/bcqinfo_version") != std::string::npos)
        {
-        auto bcq_gather = g->nodes()->create<luci::CircleBCQGather>();
-
-        bcq_gather->input_scales(converter.get_alpha(params));
-        bcq_gather->input_binary(converter.get_packed_binary_code(params));
-        bcq_gather->indices(gather->indices());
-        bcq_gather->input_clusters(converter.packed_clusters(params));
-
-        const auto binary_hidden_size =
-            loco::must_cast<luci::CircleConst *>(bcq_gather->input_binary())->dim(1).value() * 32;
-        bcq_gather->input_hidden_size(binary_hidden_size);
-
-        if (converter.do_w_x(params))
-        {
-          bcq_gather->axis(gather->axis());
-        }
-        else
+        // There should be only one bcqinfo_version in the model
+        if (version_node != nullptr)
          {
-          const auto axis_transpose = (gather->axis() == 0) ? 1 : 0;
-          bcq_gather->axis(axis_transpose);
+          assert(false && "Multiple version information found");
+          return false;
          }
  
-        loco::replace(gather).with(bcq_gather);
-
-        changed = true;
+        version_node = circle_const;
        }
      }
-    else if (auto fully_connected = dynamic_cast<luci::CircleFullyConnected *>(node))
-    {
-      auto weights = dynamic_cast<luci::CircleConst *>(fully_connected->weights());
-      if (weights != nullptr && converter.has_BCQ_info(weights))
-      {
-        auto bcq_fc = g->nodes()->create<luci::CircleBCQFullyConnected>();
-
-        bcq_fc->weights_scales(converter.get_alpha(weights));
-        bcq_fc->weights_binary(converter.get_packed_binary_code(weights));
-        bcq_fc->bias(fully_connected->bias());
-        bcq_fc->weights_clusters(converter.packed_clusters(weights));
-        bcq_fc->fusedActivationFunction(fully_connected->fusedActivationFunction());
-
-        loco::Node *bcq_input = fully_connected->input();
-        int32_t batch_rank = 0;
+  }
  
-        // If input of BCQFullyConnected has more than rank 2, we should reshape it as rank 2
-        const auto original_input = loco::must_cast<luci::CircleNode *>(fully_connected->input());
-        if (original_input->shape_status() == ShapeStatus::VALID && original_input->rank() > 2)
-        {
-          auto new_shape = g->nodes()->create<luci::CircleConst>();
-          new_shape->dtype(loco::DataType::S32);
-          new_shape->size<loco::DataType::S32>(2);
-          new_shape->rank(1);
-          new_shape->dim(0) = 2;
-
-          auto batch_size = 1;
-          for (uint32_t i = 0; i < original_input->rank() - 1; ++i)
-            batch_size *= original_input->dim(i).value();
-
-          new_shape->at<loco::DataType::S32>(0) = batch_size;
-          new_shape->at<loco::DataType::S32>(1) =
-              original_input->dim(original_input->rank() - 1).value();
-          new_shape->shape_status(ShapeStatus::VALID);
-
-          auto reshape = g->nodes()->create<luci::CircleReshape>();
-          reshape->tensor(original_input);
-          reshape->shape(new_shape);
-
-          bcq_input = reshape;
-          batch_rank = original_input->rank() - 2;
-        }
+  // If version node is not found, regard it as version 1.
+  int32_t bcq_version = (version_node != nullptr) ? version_node->at<loco::DataType::S32>(0) : 1;
  
-        // If x_w formation, we should insert Transpose in front and back of BCQFullyConnected
-        if (converter.do_w_x(weights))
-        {
-          const auto binary_hidden_size =
-              loco::must_cast<luci::CircleNode *>(fully_connected->input())
-                  ->dim(batch_rank)
-                  .value();
-          bcq_fc->weights_hidden_size(binary_hidden_size);
-          bcq_fc->input(bcq_input);
-          loco::replace(fully_connected).with(bcq_fc);
-        }
-        else
-        {
-          const auto binary_hidden_size =
-              loco::must_cast<luci::CircleNode *>(fully_connected->input())
-                  ->dim(1 + batch_rank)
-                  .value();
-          bcq_fc->weights_hidden_size(binary_hidden_size);
-
-          auto perm = g->nodes()->create<luci::CircleConst>();
-          perm->dtype(loco::DataType::S32);
-          perm->size<loco::DataType::S32>(2);
-          perm->rank(1);
-          perm->dim(0) = 2;
-          perm->at<loco::DataType::S32>(0) = 1;
-          perm->at<loco::DataType::S32>(1) = 0;
-          perm->shape_status(ShapeStatus::VALID);
-
-          auto input_transpose = g->nodes()->create<luci::CircleTranspose>();
-          input_transpose->a(bcq_input);
-          input_transpose->perm(perm);
-
-          bcq_fc->input(input_transpose);
-
-          auto output_transpose = g->nodes()->create<luci::CircleTranspose>();
-          output_transpose->a(bcq_fc);
-          output_transpose->perm(perm);
-
-          loco::replace(fully_connected).with(output_transpose);
-        }
+  if (bcq_version == 1)
+    changed = BCQFuser<1>().fuseBCQ(g);
+  else
+    assert(false && "Not supported BCQ version");
  
-        changed = true;
-      }
-    }
+  if (changed && version_node != nullptr)
+  {
+    // If BCQ is applied and version node was found, remove the node.
+    loco::replace(version_node).with(createNoOp(version_node));
    }
  
-  if (changed)
-    converter.clear_BCQ_nodes();
-
    return changed;
  }
  
diff --git a/compiler/luci/pass/src/QuantizationUtils.cpp b/compiler/luci/pass/src/QuantizationUtils.cpp

index 6726ce7..e186906 100644 (file)
--- a/compiler/luci/pass/src/QuantizationUtils.cpp
+++ b/compiler/luci/pass/src/QuantizationUtils.cpp
@@ -24,6 +24,13 @@
  namespace luci
  {
  
+uint8_t fp32_to_uint8_cast(float f)
+{
+  assert(std::numeric_limits<uint8_t>::min() <= f);
+  assert(f <= std::numeric_limits<uint8_t>::max());
+  return static_cast<uint8_t>(f);
+}
+
  void compute_sym_scale_zp(float min, float max, float &scaling_factor, int64_t &zp,
                            float &nudged_min, float &nudged_max)
  {
@@ -78,7 +85,7 @@ void compute_asym_scale_zp(float min, float max, float &scaling_factor, int64_t
    }
    else
      zero_point_double = qmin_double - rmin / scale;
-  if (zero_point_double <= qmin_double)
+  if (min >= 0)
    {
      assert(min >= 0 && max >= 0);
      nudged_zero_point = kMinScale;
@@ -86,7 +93,7 @@ void compute_asym_scale_zp(float min, float max, float &scaling_factor, int64_t
      if (min > 0 && max > 0)
        WARN(l) << "The minimum and maximum values are all positive." << std::endl;
    }
-  else if (zero_point_double >= qmax_double)
+  else if (max < 0)
    {
      assert(min < 0 && max < 0);
      nudged_zero_point = kMaxScale;
@@ -96,7 +103,14 @@ void compute_asym_scale_zp(float min, float max, float &scaling_factor, int64_t
    else
    {
      assert(min < 0 && max >= 0);
-    nudged_zero_point = static_cast<uint8_t>(std::round(zero_point_double));
+    nudged_zero_point = fp32_to_uint8_cast(std::round(zero_point_double));
+  }
+
+  // protect scale from being very low due to overflow
+  if (scale < 1e-5)
+  {
+    scale = 1e-5;
+    nudged_zero_point = fp32_to_uint8_cast(std::round(qmin_double - rmin / scale));
    }
  
    nudged_min = static_cast<float>((qmin_double - nudged_zero_point) * scale);
diff --git a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp

index f8abee7..b335a53 100644 (file)
--- a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
+++ b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
@@ -138,7 +138,8 @@ bool is_quantized(const CircleNode *node)
           node->dtype() == loco::DataType::S32;  // bias
  }
  
-void sym_wquant_per_channel(CircleConst *node, std::vector<float> &scaling_factor)
+void sym_wquant_per_channel(CircleConst *node, std::vector<float> &scaling_factor,
+                            int32_t &channel_dim_index)
  {
    assert(node->dtype() == loco::DataType::FLOAT32);
  
@@ -153,7 +154,6 @@ void sym_wquant_per_channel(CircleConst *node, std::vector<float> &scaling_facto
    uint32_t indices[4] = {
        0,
    };
-  int channel_dim_index{0};
  
    if (!get_channel_dim_index(node, dimension, channel_dim_index))
    {
@@ -189,7 +189,7 @@ void sym_wquant_per_channel(CircleConst *node, std::vector<float> &scaling_facto
  }
  
  void asym_wquant_per_channel(CircleConst *node, std::vector<float> &min,
-                             std::vector<float> &scaling_factor)
+                             std::vector<float> &scaling_factor, int32_t &channel_dim_index)
  {
    assert(node->dtype() == loco::DataType::FLOAT32);
  
@@ -204,7 +204,6 @@ void asym_wquant_per_channel(CircleConst *node, std::vector<float> &min,
    uint32_t indices[4] = {
        0,
    };
-  int channel_dim_index{0};
  
    if (!get_channel_dim_index(node, dimension, channel_dim_index))
    {
@@ -282,6 +281,10 @@ bool is_weights(CircleNode *node)
      if (dw_conv != nullptr && dw_conv->filter() == circle_const)
        return true;
  
+    auto t_conv = dynamic_cast<CircleTransposeConv *>(out);
+    if (t_conv != nullptr && t_conv->filter() == circle_const && circle_const->rank() == 4)
+      return true;
+
      auto fc = dynamic_cast<CircleFullyConnected *>(out);
      if (fc != nullptr && fc->weights() == circle_const)
        return true;
@@ -350,8 +353,8 @@ struct QuantizeActivation final : public luci::CircleNodeMutableVisitor<bool>
            circle_node->dtype(loco::DataType::S16);
          }
  
-        circle_node->quantparam()->max[0] = nudged_max;
-        circle_node->quantparam()->min[0] = nudged_min;
+        circle_node->quantparam()->min.clear();
+        circle_node->quantparam()->max.clear();
          circle_node->quantparam()->scale.push_back(scaling_factor);
          circle_node->quantparam()->zerop.push_back(zp);
        }
@@ -472,15 +475,19 @@ struct QuantizeWeights final : public luci::CircleNodeMutableVisitor<bool>
            assert(quantparam != nullptr);
            auto min = quantparam->min;
            auto scaling_factor = quantparam->scale;
+          int32_t channel_dim_index = 0;
  
            if (output_type == loco::DataType::U8)
            {
-            asym_wquant_per_channel(circle_const, min, scaling_factor);
+            asym_wquant_per_channel(circle_const, min, scaling_factor, channel_dim_index);
            }
            else
            {
-            sym_wquant_per_channel(circle_const, scaling_factor);
+            sym_wquant_per_channel(circle_const, scaling_factor, channel_dim_index);
            }
+          quantparam->min.clear();
+          quantparam->max.clear();
+          quantparam->quantized_dimension = channel_dim_index;
          }
          // Find min/max per layer-wise
          else
@@ -493,6 +500,8 @@ struct QuantizeWeights final : public luci::CircleNodeMutableVisitor<bool>
            auto min = quantparam->min[0];
            auto scaling_factor = quantparam->scale[0];
            asym_wquant_per_layer(circle_const, min, scaling_factor);
+          quantparam->min.clear();
+          quantparam->max.clear();
          }
        }
      }
diff --git a/compiler/luci/service/src/CircleShapeInferenceRule.cpp b/compiler/luci/service/src/CircleShapeInferenceRule.cpp

index a291cfe..6355ec5 100644 (file)
--- a/compiler/luci/service/src/CircleShapeInferenceRule.cpp
+++ b/compiler/luci/service/src/CircleShapeInferenceRule.cpp
@@ -1010,6 +1010,12 @@ public:
  
    loco::NodeShape visit(const luci::CircleNeg *node) final { return use_x(node); }
  
+  loco::NodeShape visit(const luci::CircleNonMaxSuppressionV4 *node) final
+  {
+    const auto boxes_shape = loco::shape_get(node->boxes()).as<loco::TensorShape>();
+    return loco::NodeShape{boxes_shape};
+  }
+
    loco::NodeShape visit(const luci::CircleNotEqual *node) final { return broadcast_xy(node); }
  
    loco::NodeShape visit(const luci::CircleOneHot *node) final
@@ -1818,6 +1824,18 @@ public:
      return output_shape;
    }
  
+  loco::NodeShape visit(const luci::CircleUnique *node) final
+  {
+    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+
+    assert(input_shape.rank() == 1);
+
+    loco::TensorShape shape_output;
+    shape_output = own_shape(node);
+
+    return loco::NodeShape{shape_output};
+  }
+
    loco::NodeShape visit(const luci::CircleTransposeConv *node) final
    {
      // TransposeConv's output shape is written in its 'inputSizes' argument
@@ -2019,6 +2037,34 @@ public:
      return loco::NodeShape{*then_graph_output->shape()};
    }
  
+  loco::NodeShape visit(const luci::CircleNonMaxSuppressionV4Out *node) final
+  {
+    const loco::DataType S32 = loco::DataType::S32;
+
+    auto nmsv4 = dynamic_cast<const luci::CircleNonMaxSuppressionV4 *>(node->input());
+    if (nmsv4 == nullptr)
+      INTERNAL_EXN("CircleNonMaxSuppressionV4 IR is not configured correctly");
+
+    auto index = node->index();
+    if (index == 1)
+      return loco::TensorShape({0});
+
+    assert(index == 0);
+
+    auto unknown = loco::TensorShape{loco::Dimension()};
+    auto max_output_size = dynamic_cast<const luci::CircleConst *>(nmsv4->max_output_size());
+    if (max_output_size == nullptr)
+      return unknown; // we need CircleConst for max output size
+
+    LUCI_ASSERT(max_output_size->dtype() == S32, "Only support int32 for max_output_size");
+
+    if (max_output_size->size<S32>() < 1)
+      return unknown;
+
+    auto max_output_size_value = uint32_t(max_output_size->at<S32>(0));
+    return loco::TensorShape{max_output_size_value};
+  }
+
    loco::NodeShape visit(const luci::CircleSplitOut *node) final
    {
      const loco::DataType S32 = loco::DataType::S32;
@@ -2142,6 +2188,19 @@ public:
      return loco::NodeShape{output_shape};
    }
  
+  loco::NodeShape visit(const luci::CircleUniqueOut *node) final
+  {
+    auto unique = dynamic_cast<const luci::CircleUnique *>(node->input());
+    if (unique == nullptr)
+    {
+      INTERNAL_EXN("CircleUnique IR is not configured correctly");
+    }
+
+    auto unique_shape = loco::shape_get(unique).as<loco::TensorShape>();
+
+    return loco::NodeShape{unique_shape};
+  }
+
    loco::NodeShape visit(const luci::CircleUnpackOut *node) final
    {
      auto unpack = dynamic_cast<const luci::CircleUnpack *>(node->input());
diff --git a/compiler/luci/service/src/CircleTypeInferenceRule.cpp b/compiler/luci/service/src/CircleTypeInferenceRule.cpp

index de2ba3e..e7910bf 100644 (file)
--- a/compiler/luci/service/src/CircleTypeInferenceRule.cpp
+++ b/compiler/luci/service/src/CircleTypeInferenceRule.cpp
@@ -252,6 +252,11 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
  
    loco::DataType visit(const luci::CircleNeg *node) final { return loco::dtype_get(node->x()); }
  
+  loco::DataType visit(const luci::CircleNonMaxSuppressionV4 *node) final
+  {
+    return loco::dtype_get(node->boxes());
+  }
+
    loco::DataType visit(const luci::CircleNotEqual *) final { return loco::DataType::BOOL; }
  
    loco::DataType visit(const luci::CirclePack *node) final
@@ -345,7 +350,10 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
      return loco::dtype_get(node->tensor());
    }
  
-  loco::DataType visit(const luci::CircleResizeBilinear *) final { return loco::DataType::FLOAT32; }
+  loco::DataType visit(const luci::CircleResizeBilinear *node) final
+  {
+    return loco::dtype_get(node->input());
+  }
  
    loco::DataType visit(const luci::CircleResizeNearestNeighbor *node) final
    {
@@ -472,6 +480,11 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
      return loco::dtype_get(node->outBackprop());
    }
  
+  loco::DataType visit(const luci::CircleUnique *node) final
+  {
+    return loco::dtype_get(node->input());
+  }
+
    loco::DataType visit(const luci::CircleUnpack *node) final
    {
      return loco::dtype_get(node->value());
@@ -569,6 +582,13 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
      return then_graph_output->dtype();
    }
  
+  loco::DataType visit(const luci::CircleNonMaxSuppressionV4Out *node) final
+  {
+    (void)node;
+    assert(node->index() == 0 || node->index() == 1);
+    return loco::DataType::S32;
+  }
+
    loco::DataType visit(const luci::CircleSplitOut *node) final
    {
      return loco::dtype_get(node->input());
@@ -589,6 +609,17 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
      return loco::DataType::S32;
    }
  
+  loco::DataType visit(const luci::CircleUniqueOut *node) final
+  {
+    if (node->index() == 0)
+    {
+      return loco::dtype_get(node->input());
+    }
+    assert(node->index() == 1);
+    auto unique = loco::must_cast<luci::CircleUnique *>(node->input());
+    return unique->idx_out_type();
+  }
+
    loco::DataType visit(const luci::CircleUnpackOut *node) final
    {
      return loco::dtype_get(node->input());
diff --git a/compiler/luci/tests/test.lst b/compiler/luci/tests/test.lst

index 188e298..9fd42ed 100644 (file)
--- a/compiler/luci/tests/test.lst
+++ b/compiler/luci/tests/test.lst
@@ -20,6 +20,7 @@ addread(ArgMin_U8_001)
  addread(ArgMin_U8_002)
  addread(ArgMin_U8_003)
  addread(AveragePool2D_000)
+addread(AveragePool2D_U8_000)
  addread(BatchMatMul_000)
  addread(BatchMatMulV2_000)
  addread(BatchMatMulV2_001)
@@ -30,13 +31,16 @@ addread(Ceil_000)
  addread(Concatenation_000)
  addread(Concatenation_U8_000)
  addread(Conv2D_000)
+addread(Conv2D_001)
  addread(Conv2D_002)
  addread(Conv2D_003)
  addread(Conv2D_U8_000)
+addread(Conv2D_U8_001)
  addread(Cos_000)
  addread(DepthToSpace_000)
  addread(DepthwiseConv2D_000)
  addread(DepthwiseConv2D_U8_000)
+addread(DepthwiseConv2D_U8_001)
  addread(DepthwiseConv2D_001)
  addread(Div_000)
  addread(ELU_000)
@@ -64,6 +68,7 @@ addread(GreaterEqual_000)
  addread(If_000)
  addread(If_001)
  addread(L2Normalize_000)
+addread(L2Normalize_U8_000)
  addread(L2Pool2D_000)
  addread(L2Pool2D_U8_000)
  addread(LeakyRelu_000)
@@ -75,6 +80,7 @@ addread(LogicalAnd_000)
  addread(LogicalNot_000)
  addread(LogicalOr_000)
  addread(Logistic_000)
+addread(Logistic_U8_000)
  addread(LogSoftmax_000)
  addread(MatMul_000)
  addread(MatrixDiag_000)
@@ -84,6 +90,7 @@ addread(MaxPool2D_000)
  addread(MaxPool2D_U8_000)
  addread(Mean_000)
  addread(Mean_001)
+addread(Mean_U8_000)
  addread(Minimum_000)
  addread(MirrorPad_000)
  addread(Mul_000)
@@ -97,6 +104,7 @@ addread(OneHot_003)
  addread(Pack_000)
  addread(Pack_U8_000)
  addread(Pad_000)
+addread(Pad_U8_000)
  addread(Pow_000)
  addread(PRelu_000)
  addread(Range_000)
@@ -212,6 +220,7 @@ addwrite(ArgMin_U8_001)
  addwrite(ArgMin_U8_002)
  addwrite(ArgMin_U8_003)
  addwrite(AveragePool2D_000)
+addwrite(AveragePool2D_U8_000)
  addwrite(BatchMatMul_000)
  addwrite(BatchMatMulV2_000)
  addwrite(BatchMatMulV2_001)
@@ -222,13 +231,16 @@ addwrite(Ceil_000)
  addwrite(Concatenation_000)
  addwrite(Concatenation_U8_000)
  addwrite(Conv2D_000)
+addwrite(Conv2D_001)
  addwrite(Conv2D_002)
  addwrite(Conv2D_003)
  addwrite(Conv2D_U8_000)
+addwrite(Conv2D_U8_001)
  addwrite(Cos_000)
  addwrite(DepthToSpace_000)
  addwrite(DepthwiseConv2D_000)
  addwrite(DepthwiseConv2D_U8_000)
+addwrite(DepthwiseConv2D_U8_001)
  addwrite(DepthwiseConv2D_001)
  addwrite(Div_000)
  addwrite(ELU_000)
@@ -256,6 +268,7 @@ addwrite(GreaterEqual_000)
  addwrite(If_000)
  addwrite(If_001)
  addwrite(L2Normalize_000)
+addwrite(L2Normalize_U8_000)
  addwrite(L2Pool2D_000)
  addwrite(L2Pool2D_U8_000)
  addwrite(LeakyRelu_000)
@@ -267,6 +280,7 @@ addwrite(LogicalAnd_000)
  addwrite(LogicalNot_000)
  addwrite(LogicalOr_000)
  addwrite(Logistic_000)
+addwrite(Logistic_U8_000)
  addwrite(LogSoftmax_000)
  addwrite(MatMul_000)
  addwrite(MatrixDiag_000)
@@ -276,6 +290,7 @@ addwrite(MaxPool2D_000)
  addwrite(MaxPool2D_U8_000)
  addwrite(Mean_000)
  addwrite(Mean_001)
+addwrite(Mean_U8_000)
  addwrite(Minimum_000)
  addwrite(MirrorPad_000)
  addwrite(Mul_000)
diff --git a/compiler/mio-tflite/CMakeLists.txt b/compiler/mio-tflite/CMakeLists.txt

index 2cfed14..9ef2859 100644 (file)
--- a/compiler/mio-tflite/CMakeLists.txt
+++ b/compiler/mio-tflite/CMakeLists.txt
@@ -5,11 +5,7 @@ if(NOT FlatBuffers_FOUND)
    return()
  endif(NOT FlatBuffers_FOUND)
  
-# TODO recover official release version
-# NOTE we cannot use version number like "2.3.0-rc0" for find_package()
-#      use TensorFlowSource-2.3.0-rc0 as config itself
-# nnas_find_package(TensorFlowSource EXACT 2.3.0 QUIET)
-nnas_find_package(TensorFlowSource-2.3.0-rc0 QUIET)
+nnas_find_package(TensorFlowSource EXACT 2.3.0 QUIET)
  
  if(NOT TensorFlowSource_FOUND)
    return()
diff --git a/compiler/one-cmds/CMakeLists.txt b/compiler/one-cmds/CMakeLists.txt

index 7d73d9b..173b8b4 100644 (file)
--- a/compiler/one-cmds/CMakeLists.txt
+++ b/compiler/one-cmds/CMakeLists.txt
@@ -1,5 +1,6 @@
  set(ONE_COMMAND_FILES
      one-import
+    one-import-bcq
      one-import-tf
      one-import-tflite
      one-optimize
@@ -26,7 +27,7 @@ foreach(ONE_COMMAND IN ITEMS ${ONE_COMMAND_FILES})
  
    install(FILES ${ONE_COMMAND}
            PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
-                      GROUP_READ GROUP_WRITE GROUP_EXECUTE
+                      GROUP_READ GROUP_EXECUTE
                        WORLD_READ WORLD_EXECUTE
            DESTINATION bin)
            
diff --git a/compiler/one-cmds/how-to-prepare-virtualenv.txt b/compiler/one-cmds/how-to-prepare-virtualenv.txt

index 41fff3a..62a9496 100644 (file)
--- a/compiler/one-cmds/how-to-prepare-virtualenv.txt
+++ b/compiler/one-cmds/how-to-prepare-virtualenv.txt
@@ -1,12 +1,12 @@
  About
  -----
  
-Last update: 2020-07-14
+Last update: 2020-08-03
  
  This document explains about 'one-prepare-venv' command.
  
  'one-prepare-venv' will prepare python3 virtual environment with tensorflow-cpu
-version 2.3.0rc0, recommanded 2.x version as of now, so that 'one-import-tf'
+version 2.3.0, recommanded 2.x version as of now, so that 'one-import-tf'
  command can execute properly.
  
  
diff --git a/compiler/one-cmds/how-to-use-one-commands.txt b/compiler/one-cmds/how-to-use-one-commands.txt

index 6c2176a..0ee69e0 100644 (file)
--- a/compiler/one-cmds/how-to-use-one-commands.txt
+++ b/compiler/one-cmds/how-to-use-one-commands.txt
@@ -1,7 +1,7 @@
  About
  -----
  
-Last update: 2020-07-14
+Last update: 2020-07-31
  
  This document briefly explains how to use one-* commands.
  Detailed options are not explained here. Run the command to see options.
@@ -30,6 +30,27 @@ Syntax: one-import [framework] [options]
  Currently supported frameworks are 'tf', 'tflite' for TensorFlow and TensorFlow
  lite.
  
+one-import-bcq
+-------------
+
+This will convert Tensorflow model file (.pb) to our circle model file with applying BCQ.
+To execute this command, original Tensorflow model file must include BCQ information.
+
+This command invokes following scripts internally.
+- preserve_bcq_info : Prevent BCQ information vanishing problem
+- generate_bcq_info : Designate BCQ information nodes as model output automatically
+- tf2tfliteV2 : Convert Tensorflow model to tflite model
+- tflite2circle : Convert Tensorflow Lite model to circle model
+When this command is finished, BCQ information nodes will be removed if BCQ information
+was valid and applying BCQ is done correctly without any errors.
+
+As tf2tfliteV2.py runs TensorFlow lite converter, you need to have TensorFlow
+installed in your system. We recommand to use 2.3.0 for now.
+
+We provide python virtual environment and one-import-bcq will enter and leave
+this environment so that you don't need to explictly 'activate' virtual
+environment.
+
  
  one-import-tf
  -------------
@@ -40,7 +61,7 @@ will internally use TensorFlow lite converter and then invoke tflite2circle
  converter to convert tflite model to circle model. 
  
  As tf2tfliteV2.py runs TensorFlow lite converter, you need to have TensorFlow
-installed in your system. We recommand to use 2.3.0rc0 for now.
+installed in your system. We recommand to use 2.3.0 for now.
  
  We provide python virtual environment and one-import-tf will enter and leave
  this environment so that you don't need to explictly 'activate' virtual
diff --git a/compiler/one-cmds/one-codegen b/compiler/one-cmds/one-codegen

index 2c80664..820b6d8 100644 (file)
--- a/compiler/one-cmds/one-codegen
+++ b/compiler/one-cmds/one-codegen
@@ -18,7 +18,7 @@ DRIVER_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  
  function Usage()
  {
-  echo "Usage: $0 [BACKEND] ..."
+  echo "Usage: one-codegen [BACKEND] ..."
    echo "Available BACKEND drivers:"
    backend_exist=0
    for file in `find $DRIVER_PATH -name *-compile -type f`;
@@ -33,23 +33,34 @@ function Usage()
    if [ $backend_exist == 0 ]; then
      echo "  (There is no available backend drivers)"
    fi
+
+  exit 255
  }
  
-# Get command from command-line
-BACKEND=$1; shift
-BACKEND_DRIVER="$BACKEND-compile"
+function version()
+{
+  $DRIVER_PATH/one-version one-codegen
+  exit 255
+}
  
-if [[ -z "${BACKEND_DRIVER}" ]]; then
+# Get command from command-line
+BACKEND=$1
+if [[ -z ${BACKEND} ]]; then
    Usage
-  exit 255
  fi
+shift
+
+if [[ "${BACKEND}" == "--version" ]]; then
+  version
+fi
+
+BACKEND_DRIVER="${BACKEND}-compile"
  
  BACKEND_DRIVER_CMD="${DRIVER_PATH}/${BACKEND_DRIVER}"
  
  if [[ ! -f "${BACKEND_DRIVER_CMD}" ]]; then
    echo "ERROR: '${BACKEND_DRIVER}' is not supported"
    Usage
-  exit 255
  fi
  
  "${BACKEND_DRIVER_CMD}" "$@"
diff --git a/compiler/one-cmds/one-import b/compiler/one-cmds/one-import

index dbf4af5..b1dd8f4 100644 (file)
--- a/compiler/one-cmds/one-import
+++ b/compiler/one-cmds/one-import
@@ -18,7 +18,7 @@ DRIVER_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  
  function Usage()
  {
-  echo "Usage: $0 [FRAMEWORK] ..."
+  echo "Usage: one-import [FRAMEWORK] ..."
    echo "Available FRAMEWORK drivers:"
    framework_exist=0
    for file in "$DRIVER_PATH"/one-import-*;
@@ -31,23 +31,34 @@ function Usage()
    if [ $framework_exist == 0 ]; then
      echo "  (There is no available import drivers)"
    fi
+
+  exit 255
  }
  
-# Get command from command-line
-FRAMEWORK=$1; shift
-FRAMEWORK_DRIVER="one-import-$FRAMEWORK"
+function version()
+{
+  $DRIVER_PATH/one-version one-import-tf
+  exit 255
+}
  
-if [[ -z "${FRAMEWORK_DRIVER}" ]]; then
+# Get command from command-line
+FRAMEWORK=$1
+if [[ -z ${FRAMEWORK} ]]; then
    Usage
-  exit 255
+fi
+shift
+
+if [ ${FRAMEWORK} = "--version" ]; then
+  version
  fi
  
+FRAMEWORK_DRIVER="one-import-$FRAMEWORK"
+
  FRAMEWORK_DRIVER_CMD="${DRIVER_PATH}/${FRAMEWORK_DRIVER}"
  
  if [[ ! -f "${FRAMEWORK_DRIVER_CMD}" ]]; then
    echo "ERROR: '${FRAMEWORK_DRIVER}' is not supported"
    Usage
-  exit 255
  fi
  
  "${FRAMEWORK_DRIVER_CMD}" "$@"
diff --git a/compiler/one-cmds/one-import-bcq b/compiler/one-cmds/one-import-bcq

new file mode 100644 (file)

index 0000000..98dd1ef
--- /dev/null
+++ b/compiler/one-cmds/one-import-bcq
@@ -0,0 +1,150 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e
+
+DRIVER_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+usage()
+{
+  echo "Convert TensorFlow model with BCQ to circle."
+  echo "Usage: one-import-bcq"
+  echo "    --version Show version information and exit"
+  echo "    --input_path <path/to/tfmodel/with/BCQ>"
+  echo "    --output_path <path/to/circle>"
+  echo "    --input_arrays <names of the input arrays, comma-separated>"
+  echo "    --input_shapes <input shapes, colon-separated>"
+  echo "    --output_arrays <names of the output arrays, comma-separated>"
+  echo "    --v2 Use TensorFlow 2.x interface (default is 1.x interface)"
+  exit 255
+}
+
+version()
+{
+  $DRIVER_PATH/one-version one-import-bcq
+  exit 255
+}
+
+TF_INTERFACE="--v1"
+
+# Parse command-line arguments
+#
+while [ "$#" -ne 0 ]; do
+  CUR="$1"
+
+  case $CUR in
+    '--help')
+      usage
+      ;;
+    '--version')
+      version
+      ;;
+    '--input_path')
+      export INPUT_PATH="$2"
+      shift 2
+      ;;
+    '--output_path')
+      export OUTPUT_PATH="$2"
+      shift 2
+      ;;
+    '--input_arrays')
+      export INPUT_ARRAYS="$2"
+      shift 2
+      ;;
+    '--input_shapes')
+      export INPUT_SHAPES="$2"
+      shift 2
+      ;;
+    '--output_arrays')
+      export OUTPUT_ARRAYS="$2"
+      shift 2
+      ;;
+    '--v2')
+      TF_INTERFACE="--v2"
+      shift
+      ;;
+    *)
+      echo "Unknown parameter: ${CUR}"
+      shift
+      ;;
+  esac
+done
+
+if [ -z ${INPUT_PATH} ] || [ ! -e ${INPUT_PATH} ]; then
+  echo "Error: input model not found"
+  echo ""
+  usage
+  exit 2
+fi
+
+FILE_BASE=$(basename ${OUTPUT_PATH})
+MODEL_NAME="${FILE_BASE%.*}"
+
+TMPDIR=$(mktemp -d)
+trap "{ rm -rf $TMPDIR; }" EXIT
+
+# activate python virtual environment
+VIRTUALENV_LINUX="${DRIVER_PATH}/venv/bin/activate"
+VIRTUALENV_WINDOWS="${DRIVER_PATH}/venv/Scripts/activate"
+
+if [ -e ${VIRTUALENV_LINUX} ]; then
+  source ${VIRTUALENV_LINUX}
+elif [ -e ${VIRTUALENV_WINDOWS} ]; then
+  source ${VIRTUALENV_WINDOWS}
+fi
+
+# remove previous log
+rm -rf "${OUTPUT_PATH}.log"
+
+# generate temporary preserved pb file
+echo "${DRIVER_PATH}/preserve_bcq_info" --input_path ${INPUT_PATH} \
+--output_path "${TMPDIR}/${MODEL_NAME}_preserved.pb"  > "${OUTPUT_PATH}.log"
+echo " " >> "${OUTPUT_PATH}.log"
+
+"${DRIVER_PATH}/preserve_bcq_info" --input_path ${INPUT_PATH} \
+--output_path "${TMPDIR}/${MODEL_NAME}_preserved.pb" >> "${OUTPUT_PATH}.log" 2>&1
+
+# generate output_arrays automatically
+echo "${DRIVER_PATH}/generate_bcq_output_arrays" \
+--input_path "${TMPDIR}/${MODEL_NAME}_preserved.pb" \
+--output_path "${TMPDIR}/${MODEL_NAME}_output_arrays.txt" > "${OUTPUT_PATH}.log"
+echo " " >> "${OUTPUT_PATH}.log"
+
+"${DRIVER_PATH}/generate_bcq_output_arrays" \
+--input_path "${TMPDIR}/${MODEL_NAME}_preserved.pb" \
+--output_path "${TMPDIR}/${MODEL_NAME}_output_arrays.txt" >> "${OUTPUT_PATH}.log" 2>&1
+
+# generate temporary tflite file
+CONVERT_SCRIPT="python ${DRIVER_PATH}/tf2tfliteV2.py ${TF_INTERFACE} "
+CONVERT_SCRIPT+="--input_path ${TMPDIR}/${MODEL_NAME}_preserved.pb "
+CONVERT_SCRIPT+="--input_arrays ${INPUT_ARRAYS} "
+CONVERT_SCRIPT+="--output_path ${TMPDIR}/${MODEL_NAME}.tflite "
+CONVERT_SCRIPT+="--output_arrays ${OUTPUT_ARRAYS}$(cat ${TMPDIR}/${MODEL_NAME}_output_arrays.txt) "
+if [ ! -z ${INPUT_SHAPES} ]; then
+  CONVERT_SCRIPT+="--input_shapes ${INPUT_SHAPES} "
+fi
+
+echo ${CONVERT_SCRIPT} > "${OUTPUT_PATH}.log"
+$CONVERT_SCRIPT >> "${OUTPUT_PATH}.log" 2>&1
+
+# convert .tflite to .circle
+echo " " >> "${OUTPUT_PATH}.log"
+echo "${DRIVER_PATH}/tflite2circle" "${TMPDIR}/${MODEL_NAME}.tflite" \
+"${OUTPUT_PATH}" >> "${OUTPUT_PATH}.log"
+echo " " >> "${OUTPUT_PATH}.log"
+
+"${DRIVER_PATH}/tflite2circle" "${TMPDIR}/${MODEL_NAME}.tflite" \
+"${OUTPUT_PATH}" >> "${OUTPUT_PATH}.log" 2>&1
diff --git a/compiler/one-cmds/one-import-tf b/compiler/one-cmds/one-import-tf

index c048a4e..d59e1c5 100644 (file)
--- a/compiler/one-cmds/one-import-tf
+++ b/compiler/one-cmds/one-import-tf
@@ -22,14 +22,24 @@ usage()
  {
    echo "Convert TensorFlow model to circle."
    echo "Usage: one-import-tf"
+  echo "    --version Show version information and exit"
    echo "    --input_path <path/to/tfmodel>"
    echo "    --output_path <path/to/circle>"
    echo "    --input_arrays <names of the input arrays, comma-separated>"
    echo "    --input_shapes <input shapes, colon-separated>"
    echo "    --output_arrays <names of the output arrays, comma-separated>"
-  exit 0
+  echo "    --v2 Use TensorFlow 2.x interface (default is 1.x interface)"
+  exit 255
  }
  
+version()
+{
+  $DRIVER_PATH/one-version one-import-tf
+  exit 255
+}
+
+TF_INTERFACE="--v1"
+
  # Parse command-line arguments
  #
  while [ "$#" -ne 0 ]; do
@@ -39,6 +49,9 @@ while [ "$#" -ne 0 ]; do
      '--help')
        usage
        ;;
+    '--version')
+      version
+      ;;
      '--input_path')
        export INPUT_PATH="$2"
        shift 2
@@ -59,6 +72,10 @@ while [ "$#" -ne 0 ]; do
        export OUTPUT_ARRAYS="$2"
        shift 2
        ;;
+    '--v2')
+      TF_INTERFACE="--v2"
+      shift
+      ;;
      *)
        echo "Unknown parameter: ${CUR}"
        shift
@@ -92,14 +109,21 @@ fi
  # remove previous log
  rm -rf "${OUTPUT_PATH}.log"
  
+show_err_onexit()
+{
+  cat "${OUTPUT_PATH}.log"
+}
+
+trap show_err_onexit ERR
+
  # generate temporary tflite file
-echo "python" "${DRIVER_PATH}/tf2tfliteV2.py" --v2 --input_path ${INPUT_PATH} \
+echo "python" "${DRIVER_PATH}/tf2tfliteV2.py" ${TF_INTERFACE} --input_path ${INPUT_PATH} \
  --input_arrays ${INPUT_ARRAYS} --input_shapes ${INPUT_SHAPES} \
  --output_path "${TMPDIR}/${MODEL_NAME}.tflite" \
  --output_arrays ${OUTPUT_ARRAYS} > "${OUTPUT_PATH}.log"
  echo " " >> "${OUTPUT_PATH}.log"
  
-python "${DRIVER_PATH}/tf2tfliteV2.py" --v2 --input_path ${INPUT_PATH} \
+python "${DRIVER_PATH}/tf2tfliteV2.py" ${TF_INTERFACE} --input_path ${INPUT_PATH} \
  --input_arrays ${INPUT_ARRAYS} --input_shapes ${INPUT_SHAPES} \
  --output_path "${TMPDIR}/${MODEL_NAME}.tflite" \
  --output_arrays ${OUTPUT_ARRAYS} >> "${OUTPUT_PATH}.log" 2>&1
diff --git a/compiler/one-cmds/one-import-tflite b/compiler/one-cmds/one-import-tflite

index 31ed5af..053489c 100644 (file)
--- a/compiler/one-cmds/one-import-tflite
+++ b/compiler/one-cmds/one-import-tflite
@@ -22,9 +22,16 @@ usage()
  {
    echo "Convert TensorFlow lite model to circle."
    echo "Usage: one-import-tflite"
+  echo "    --version Show version information and exit"
    echo "    --input_path <path/to/tflitemodel>"
    echo "    --output_path <path/to/circle>"
-  exit 0
+  exit 255
+}
+
+version()
+{
+  $DRIVER_PATH/one-version one-import-tflite
+  exit 255
  }
  
  # Parse command-line arguments
@@ -36,6 +43,9 @@ while [ "$#" -ne 0 ]; do
      '--help')
        usage
        ;;
+    '--version')
+      version
+      ;;
      '--input_path')
        export INPUT_PATH="$2"
        shift 2
@@ -55,12 +65,18 @@ if [ -z ${INPUT_PATH} ] || [ ! -e ${INPUT_PATH} ]; then
    echo "Error: input model not found"
    echo ""
    usage
-  exit 2
  fi
  
  # remove previous log
  rm -rf "${OUTPUT_PATH}.log"
  
+show_err_onexit()
+{
+  cat "${OUTPUT_PATH}.log"
+}
+
+trap show_err_onexit ERR
+
  # convert .tflite to .circle
  echo "${DRIVER_PATH}/tflite2circle" "${INPUT_PATH}" "${OUTPUT_PATH}" > "${OUTPUT_PATH}.log"
  
diff --git a/compiler/one-cmds/one-optimize b/compiler/one-cmds/one-optimize

index 95384c1..17b6b98 100644 (file)
--- a/compiler/one-cmds/one-optimize
+++ b/compiler/one-cmds/one-optimize
@@ -22,6 +22,7 @@ usage()
  {
    echo "Optimize circle model."
    echo "Usage: one-optimize"
+  echo "    --version       Show version information and exit"
    echo "    --all           Enable all optimization algorithms"
    echo "    --fuse_bcq      Enable FuseBCQ Pass"
    echo "    --fuse_instnorm Enable FuseInstanceNormalization Pass"
@@ -33,7 +34,13 @@ usage()
    echo "                    Enable ResolveCustomOpMatMulPass Pass"
    echo "    --input_path <path/to/input/circle>"
    echo "    --output_path <path/to/output/circle>"
-  exit 0
+  exit 255
+}
+
+version()
+{
+  $DRIVER_PATH/one-version one-optimize
+  exit 255
  }
  
  OPTIMIZE_all=0
@@ -52,6 +59,9 @@ while [ "$#" -ne 0 ]; do
      '--help')
        usage
        ;;
+    '--version')
+      version
+      ;;
      '--all')
        OPTIMIZE_all=1
        shift
@@ -96,7 +106,6 @@ if [ -z ${INPUT_PATH} ] || [ ! -e ${INPUT_PATH} ]; then
    echo "Error: input model not found"
    echo ""
    usage
-  exit 2
  fi
  
  OPTIMIZE_OPTIONS=""
@@ -123,6 +132,13 @@ fi
  # remove previous log
  rm -rf "${OUTPUT_PATH}.log"
  
+show_err_onexit()
+{
+  cat "${OUTPUT_PATH}.log"
+}
+
+trap show_err_onexit ERR
+
  # NOTE do not wrap ${OPTIMIZE_OPTIONS} with ""
  # optimize circle
  echo "${DRIVER_PATH}/circle2circle" ${OPTIMIZE_OPTIONS} \
diff --git a/compiler/one-cmds/one-pack b/compiler/one-cmds/one-pack

index 2bc4c60..023b0a8 100644 (file)
--- a/compiler/one-cmds/one-pack
+++ b/compiler/one-cmds/one-pack
@@ -22,9 +22,16 @@ usage()
  {
    echo "Package circle to nnpkg"
    echo "Usage: one-pack"
+  echo "    -v, --version Show version information and exit"
    echo "    -i <path/to/circle>"
    echo "    -o <path/to/nnpackage/folder>"
-  exit 0
+  exit 255
+}
+
+version()
+{
+  $DRIVER_PATH/one-version one-pack
+  exit 255
  }
  
  # Parse command-line arguments
@@ -36,6 +43,12 @@ while [ "$#" -ne 0 ]; do
      '--help')
        usage
        ;;
+    '-v')
+      version
+      ;;
+    '--version')
+      version
+      ;;
      '-i')
        export INPUT_PATH="$2"
        shift 2
@@ -55,13 +68,22 @@ if [ -z ${INPUT_PATH} ] || [ ! -e ${INPUT_PATH} ]; then
    echo "Error: input model not found"
    echo ""
    usage
-  exit 2
  fi
  
+INPUT_FILE=$(basename "${INPUT_PATH}")
+LOG_FILE="${INPUT_FILE%.*}.pack.log"
+
  # remove previous log
-rm -rf "${OUTPUT_PATH}.log"
+rm -rf "${LOG_FILE}"
+
+show_err_onexit()
+{
+  cat "${LOG_FILE}"
+}
+
+trap show_err_onexit ERR
  
  # Package circle model file to nnpkg
-echo "${DRIVER_PATH}/model2nnpkg.sh" -o "${OUTPUT_PATH}" "${INPUT_PATH}" > "${OUTPUT_PATH}.log"
+echo "${DRIVER_PATH}/model2nnpkg.sh" -o "${OUTPUT_PATH}" "${INPUT_PATH}" > "${LOG_FILE}"
  
-"${DRIVER_PATH}/model2nnpkg.sh" -o "${OUTPUT_PATH}" "${INPUT_PATH}" >> "${OUTPUT_PATH}.log" 2>&1
+"${DRIVER_PATH}/model2nnpkg.sh" -o "${OUTPUT_PATH}" "${INPUT_PATH}" >> "${LOG_FILE}" 2>&1
diff --git a/compiler/one-cmds/one-prepare-venv b/compiler/one-cmds/one-prepare-venv

index fce838d..0a53bd3 100644 (file)
--- a/compiler/one-cmds/one-prepare-venv
+++ b/compiler/one-cmds/one-prepare-venv
@@ -26,7 +26,19 @@ if [ -f ${VENV_ACTIVATE} ]; then
  fi
  
  # Install prerequisites
-python3 -m pip install -U virtualenv
+python3 -m pip install --user -U virtualenv
+
+function error_no_ensurepip ()
+{
+  echo "ERROR: python3 'ensurepip' module is not found."
+  echo "       On ubuntu, try following command:"
+  echo
+  echo "         apt install python$(python3 --version | awk '{print $2}' | awk -F. '{print $1"."$2}')-venv"
+  echo
+  echo "       You may need root privilege for this."
+  exit 1
+}
+python3 -m ensurepip --version > /dev/null 2>&1 || error_no_ensurepip
  
  # Create python virtual enviornment
  python3 -m venv "${DRIVER_PATH}/venv"
@@ -37,4 +49,4 @@ source "${VENV_ACTIVATE}"
  python -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
    install -U pip setuptools
  python -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
-  install tensorflow-cpu==2.3.0rc0
+  install tensorflow-cpu==2.3.0
diff --git a/compiler/one-cmds/one-quantize b/compiler/one-cmds/one-quantize

index ff9e266..c74b2c2 100644 (file)
--- a/compiler/one-cmds/one-quantize
+++ b/compiler/one-cmds/one-quantize
@@ -22,16 +22,23 @@ usage()
  {
    echo "Quantize circle model."
    echo "Usage: one-quantize"
+  echo "    --version         Show version information and exit"
    echo "    --input_dtype     Input data type (supported: float32, default=float32)"
    echo "    --quantized_dtype Output quantized data type (supported: uint8, default=uint8)"
-  echo "    --granularity     Quantize granularity (supported: layer, default=layer)"
+  echo "    --granularity     Quantize granularity (supported: layer, channel, default=layer)"
    echo "    --min_percentile  Minimum percentile (0.0~100.0, default=1.0)"
    echo "    --max_percentile  Maximum percentile (0.0~100.0, default=99.0)"
    echo "    --mode            Record mode (supported: percentile/moving_average, default=percentile)"
    echo "    --input_path <path/to/input/circle>"
    echo "    --input_data <path/to/input/data>"
    echo "    --output_path <path/to/output/circle>"
-  exit 0
+  exit 255
+}
+
+version()
+{
+  $DRIVER_PATH/one-version one-quantize
+  exit 255
  }
  
  INPUT_DTYPE=float32
@@ -50,6 +57,9 @@ while [ "$#" -ne 0 ]; do
      '--help')
        usage
        ;;
+    '--version')
+      version
+      ;;
  
      '--input_dtype')
        INPUT_DTYPE="$2"
@@ -100,13 +110,11 @@ if [ -z ${INPUT_PATH} ] || [ ! -e ${INPUT_PATH} ]; then
    echo "Error: input model not found"
    echo ""
    usage
-  exit 2
  fi
  if [ -z ${INPUT_DATA} ] || [ ! -e ${INPUT_DATA} ]; then
    echo "Error: input data not found"
    echo ""
    usage
-  exit 2
  fi
  
  FILE_BASE=$(basename ${OUTPUT_PATH})
@@ -118,6 +126,13 @@ trap "{ rm -rf $TMPDIR; }" EXIT
  # remove previous log
  rm -rf "${OUTPUT_PATH}.log"
  
+show_err_onexit()
+{
+  cat "${OUTPUT_PATH}.log"
+}
+
+trap show_err_onexit ERR
+
  # quantize circle
  echo "${DRIVER_PATH}/circle-quantizer" \
  --quantize_dequantize_weights ${INPUT_DTYPE} ${QUANTIZED_DTYPE} ${GRANULARITY} \
diff --git a/compiler/one-cmds/requires.cmake b/compiler/one-cmds/requires.cmake

index 9b858ad..50c2457 100644 (file)
--- a/compiler/one-cmds/requires.cmake
+++ b/compiler/one-cmds/requires.cmake
@@ -3,3 +3,5 @@ require("tflite2circle")
  require("circle2circle")
  require("circle-quantizer")
  require("record-minmax")
+require("vconone")
+require("bcq-tools")
diff --git a/compiler/pota-quantization-value-test/CMakeLists.txt b/compiler/pota-quantization-value-test/CMakeLists.txt

index d97ffc1..73b9ead 100644 (file)
--- a/compiler/pota-quantization-value-test/CMakeLists.txt
+++ b/compiler/pota-quantization-value-test/CMakeLists.txt
@@ -49,21 +49,21 @@ add_test(
            ${QUANTIZATION_VALUE_TEST_WITH_PARAM}
  )
  
-#add_test(
-#  NAME pota_record_minmax_test
-#  COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/test_record_minmax.sh"
-#          "${TEST_CONFIG}"
-#          "${ARTIFACTS_BIN_PATH}"
-#          ${QUANTIZATION_VALUE_TEST_WITH_PARAM}
-#)
+add_test(
+  NAME pota_record_minmax_test
+  COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/test_record_minmax.sh"
+          "${TEST_CONFIG}"
+          "${ARTIFACTS_BIN_PATH}"
+          ${QUANTIZATION_VALUE_TEST_WITH_PARAM}
+)
  
-#add_test(
-#  NAME pota_quantization_test
-#  COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/test_quantization.sh"
-#          "${TEST_CONFIG}"
-#          "${ARTIFACTS_BIN_PATH}"
-#          ${QUANTIZATION_VALUE_TEST_WITH_PARAM}
-#)
+add_test(
+  NAME pota_quantization_test
+  COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/test_quantization.sh"
+          "${TEST_CONFIG}"
+          "${ARTIFACTS_BIN_PATH}"
+          ${QUANTIZATION_VALUE_TEST_WITH_PARAM}
+)
  
-#set_tests_properties(pota_record_minmax_test PROPERTIES DEPENDS pota_fake_wquant_test)
-#set_tests_properties(pota_quantization_test PROPERTIES DEPENDS pota_record_minmax_test)
+set_tests_properties(pota_record_minmax_test PROPERTIES DEPENDS pota_fake_wquant_test)
+set_tests_properties(pota_quantization_test PROPERTIES DEPENDS pota_record_minmax_test)
diff --git a/compiler/pota-quantization-value-test/compare_tensors.py b/compiler/pota-quantization-value-test/compare_tensors.py

index 258d46d..7d95d18 100755 (executable)
--- a/compiler/pota-quantization-value-test/compare_tensors.py
+++ b/compiler/pota-quantization-value-test/compare_tensors.py
@@ -69,7 +69,7 @@ def compare_quantization(tensor, tensor_name, expect_dir):
          if key == "weights":
              expected_weights = np.array(json_load["weights"])
              input_weights = tensor["weights"][:]
-            if np.allclose(input_weights, expected_weights, rtol=0, atol=0) == False:
+            if np.allclose(input_weights, expected_weights, rtol=0, atol=1) == False:
                  print("Quantized weights of " + tensor_name + " (" + str(input_weights) +
                        ") do not match with expected value (" + str(expected_weights) +
                        ").")
@@ -87,7 +87,7 @@ def compare_quantization(tensor, tensor_name, expect_dir):
              expected_zero_point = np.array(json_load["zero_point"])
              input_zero_point = tensor["zero_point"][:]
              if np.allclose(
-                    input_zero_point, expected_zero_point, rtol=0, atol=0) == False:
+                    input_zero_point, expected_zero_point, rtol=0, atol=1) == False:
                  print("Quantized zero_point of " + tensor_name + " (" +
                        str(input_zero_point) + ") do not match with expected value (" +
                        str(expected_zero_point) + ").")
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/layer/uint8/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/layer/uint8/fake_quantization/ker.json

index 21b8eca..2558bb2 100644 (file)
--- a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/layer/uint8/fake_quantization/ker.json
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/layer/uint8/fake_quantization/ker.json
@@ -3,44 +3,44 @@
      [
        [
          [
-          1.003921627998352, 
-          2.007843255996704
-        ], 
+          1.0039215087890625,
+          2.007843017578125
+        ],
          [
-          -3.0117647647857666, 
+          -3.0117650032043457,
            -4.015686511993408
          ]
-      ], 
+      ],
        [
          [
-          -5.019608020782471, 
-          6.023529529571533
-        ], 
+          -5.019608497619629,
+          6.023530006408691
+        ],
          [
-          -7.027451038360596, 
-          7.968627452850342
+          -7.027451515197754,
+          7.9686279296875
          ]
        ]
-    ], 
+    ],
      [
        [
          [
-          4.015686511993408, 
-          -2.007843255996704
-        ], 
+          4.01568603515625,
+          -2.007843494415283
+        ],
          [
-          3.0117647647857666, 
-          -1.003921627998352
+          3.0117645263671875,
+          -1.0039215087890625
          ]
-      ], 
+      ],
        [
          [
-          -7.968627452850342, 
-          -6.023529529571533
-        ], 
+          -7.9686279296875,
+          -6.023530006408691
+        ],
          [
-          7.027451038360596, 
-          5.019608020782471
+          7.027451515197754,
+          5.019608497619629
          ]
        ]
      ]
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/layer/uint8/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/layer/uint8/quantization/bias.json

index 462d0d3..50d44ec 100644 (file)
--- a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/layer/uint8/quantization/bias.json
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/layer/uint8/quantization/bias.json
@@ -1,7 +1,7 @@
-  {
-    "scale": 0.0059054209919261825, 
-    "weights": [
-      169.0, 
-      339.0
-    ]
-  }
+{
+  "weights": [
+    4069,
+    8138
+  ],
+  "scale": 0.0002457468386200985
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/layer/uint8/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/layer/uint8/quantization/ifm.json

index 107117b..2450886 100644 (file)
--- a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/layer/uint8/quantization/ifm.json
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/layer/uint8/quantization/ifm.json
@@ -1,4 +1,4 @@
  {
-  "scale": 0.09411764705882353, 
+  "scale": 0.003916590008884668,
    "zero_point": 0.0
  }
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/layer/uint8/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/layer/uint8/quantization/ker.json

index 3a6e171..b249a0c 100644 (file)
--- a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/layer/uint8/quantization/ker.json
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/layer/uint8/quantization/ker.json
@@ -1,52 +1,52 @@
  {
-  "max": 7.968627450980392, 
-  "scale": 0.06274509803921569, 
    "weights": [
      [
        [
          [
-          144, 
-          160
-        ], 
+          143,
+          159
+        ],
          [
-          80, 
-          64
+          79,
+          63
          ]
-      ], 
+      ],
        [
          [
-          48, 
-          224
-        ], 
+          47,
+          223
+        ],
          [
-          16, 
-          255
+          15,
+          254
          ]
        ]
-    ], 
+    ],
      [
        [
          [
-          192, 
-          96
-        ], 
+          191,
+          95
+        ],
          [
-          176, 
-          112
+          175,
+          111
          ]
-      ], 
+      ],
        [
          [
-          1, 
-          32
-        ], 
+          0,
+          31
+        ],
          [
-          240, 
-          208
+          239,
+          207
          ]
        ]
      ]
-  ], 
-  "min": -8.031372549019608, 
-  "zero_point": 128.0
+  ],
+  "scale": 0.062745101749897,
+  "zero_point": 127.0,
+  "min": -7.9686279296875,
+  "max": 8.031373023986816
  }
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/layer/uint8/quantization/ofm.json

index 2374639..a2dd668 100644 (file)
--- a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/layer/uint8/quantization/ofm.json
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/layer/uint8/quantization/ofm.json
@@ -1,4 +1,4 @@
  {
-  "scale": 0.17836222929113052, 
+  "scale": 0.037479765713214874,
    "zero_point": 0.0
  }
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/layer/uint8/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/layer/uint8/record_minmax/ifm.json

index 563c042..42f8b56 100644 (file)
--- a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/layer/uint8/record_minmax/ifm.json
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/layer/uint8/record_minmax/ifm.json
@@ -1,4 +1,4 @@
  {
-  "max": 24.0, 
-  "min": 1.0
+  "min": 0.005472412034869194,
+  "max": 0.9987304735183716
  }
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/layer/uint8/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/layer/uint8/record_minmax/ofm.json

index fd0c6dc..1862e8c 100644 (file)
--- a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/layer/uint8/record_minmax/ofm.json
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/layer/uint8/record_minmax/ofm.json
@@ -1,4 +1,4 @@
  {
-  "max": 45.48236846923828, 
-  "min": 0.0
+  "min": 0.0,
+  "max": 9.557340850830078
  }
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/layer/uint8/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/layer/uint8/fake_quantization/ker.json

index 11e91ca..cd34797 100644 (file)
--- a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/layer/uint8/fake_quantization/ker.json
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/layer/uint8/fake_quantization/ker.json
@@ -3,29 +3,29 @@
      [
        [
          [
-          0.9725490212440491, 
-          1.9450980424880981, 
-          3.0392158031463623, 
+          0.9725494384765625,
+          1.945098876953125,
+          3.039216995239258,
            4.0117645263671875
-        ], 
+        ],
          [
-          -8.996078491210938, 
-          9.968626976013184, 
-          -10.941176414489746, 
-          12.035294532775879
+          -8.996077537536621,
+          9.9686279296875,
+          -10.94117546081543,
+          12.035295486450195
          ]
-      ], 
+      ],
        [
          [
-          4.984313488006592, 
-          5.956862926483154, 
-          7.050980567932129, 
-          8.023529052734375
-        ], 
+          4.98431396484375,
+          5.9568634033203125,
+          7.050981521606445,
+          8.023530960083008
+        ],
          [
-          13.007843017578125, 
-          -13.980392456054688, 
-          14.952940940856934, 
+          13.007843017578125,
+          -13.980391502380371,
+          14.95294189453125,
            -16.04705810546875
          ]
        ]
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/layer/uint8/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/layer/uint8/quantization/bias.json

index df7cb14..e60ff31 100644 (file)
--- a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/layer/uint8/quantization/bias.json
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/layer/uint8/quantization/bias.json
@@ -1,9 +1,9 @@
  {
-  "scale": 0.007627835447904652, 
    "weights": [
-    131.0, 
-    262.0, 
-    393.0, 
-    524.0
-  ]
+    2156,
+    4312,
+    6468,
+    8624
+  ],
+  "scale": 0.0004638272181067826
  }
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/layer/uint8/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/layer/uint8/quantization/ifm.json

index 254ce89..4ec4ef2 100644 (file)
--- a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/layer/uint8/quantization/ifm.json
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/layer/uint8/quantization/ifm.json
@@ -1,4 +1,4 @@
  {
-  "scale": 0.06274509803921569, 
+  "scale": 0.0038153529167175293,
    "zero_point": 0.0
  }
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/layer/uint8/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/layer/uint8/quantization/ker.json

index 3d14da1..01835fb 100644 (file)
--- a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/layer/uint8/quantization/ker.json
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/layer/uint8/quantization/ker.json
@@ -1,38 +1,38 @@
  {
-  "max": 14.952941176470588, 
-  "scale": 0.12156862745098039, 
    "weights": [
      [
        [
          [
-          140, 
-          148, 
-          157, 
+          140,
+          148,
+          157,
            165
-        ], 
+        ],
          [
-          58, 
-          214, 
-          42, 
+          58,
+          214,
+          42,
            231
          ]
-      ], 
+      ],
        [
          [
-          173, 
-          181, 
-          190, 
+          173,
+          181,
+          190,
            198
-        ], 
+        ],
          [
-          239, 
-          17, 
-          255, 
+          239,
+          17,
+          255,
            0
          ]
        ]
      ]
-  ], 
-  "min": -16.04705882352941, 
-  "zero_point": 132.0
+  ],
+  "scale": 0.12156862765550613,
+  "zero_point": 132.0,
+  "min": -16.04705810546875,
+  "max": 14.952940940856934
  }
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/layer/uint8/quantization/ofm.json

index 85dd4d9..39c64f3 100644 (file)
--- a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/layer/uint8/quantization/ofm.json
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/layer/uint8/quantization/ofm.json
@@ -1,4 +1,4 @@
  {
-  "scale": 0.893733185412837, 
+  "scale": 0.07362665981054306,
    "zero_point": 0.0
  }
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/layer/uint8/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/layer/uint8/record_minmax/ifm.json

index 9aee7bc..bb4292e 100644 (file)
--- a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/layer/uint8/record_minmax/ifm.json
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/layer/uint8/record_minmax/ifm.json
@@ -1,4 +1,4 @@
  {
-  "max": 16.0, 
-  "min": 1.0
+  "min": 0.02638142943382263,
+  "max": 0.9729149651527405
  }
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/layer/uint8/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/layer/uint8/record_minmax/ofm.json

index aa42a66..1c118e1 100644 (file)
--- a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/layer/uint8/record_minmax/ofm.json
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/layer/uint8/record_minmax/ofm.json
@@ -1,4 +1,4 @@
  {
-  "max": 227.90196228027344, 
-  "min": 0.0
+  "min": 0.0,
+  "max": 18.77479721069336
  }
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/layer/uint8/fake_quantization/weight.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/layer/uint8/fake_quantization/weight.json

new file mode 100644 (file)

index 0000000..e1da53a
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/layer/uint8/fake_quantization/weight.json
@@ -0,0 +1,76 @@
+{
+  "weights": [
+    [
+      1.0039215087890625,
+      2.007843017578125,
+      -3.0117650032043457,
+      -4.015686511993408,
+      -5.019608497619629,
+      6.023530006408691,
+      -7.027451515197754,
+      7.9686279296875,
+      4.01568603515625,
+      -2.007843494415283,
+      3.0117645263671875,
+      -1.0039215087890625,
+      -7.9686279296875,
+      -6.023530006408691,
+      7.027451515197754,
+      5.019608497619629
+    ],
+    [
+      1.0039215087890625,
+      2.007843017578125,
+      -3.0117650032043457,
+      -4.015686511993408,
+      -5.019608497619629,
+      6.023530006408691,
+      -7.027451515197754,
+      7.9686279296875,
+      4.01568603515625,
+      -2.007843494415283,
+      3.0117645263671875,
+      -1.0039215087890625,
+      -7.9686279296875,
+      -6.023530006408691,
+      7.027451515197754,
+      5.019608497619629
+    ],
+    [
+      1.0039215087890625,
+      2.007843017578125,
+      -3.0117650032043457,
+      -4.015686511993408,
+      -5.019608497619629,
+      6.023530006408691,
+      -7.027451515197754,
+      7.9686279296875,
+      4.01568603515625,
+      -2.007843494415283,
+      3.0117645263671875,
+      -1.0039215087890625,
+      -7.9686279296875,
+      -6.023530006408691,
+      7.027451515197754,
+      5.019608497619629
+    ],
+    [
+      1.0039215087890625,
+      2.007843017578125,
+      -3.0117650032043457,
+      -4.015686511993408,
+      -5.019608497619629,
+      6.023530006408691,
+      -7.027451515197754,
+      7.9686279296875,
+      4.01568603515625,
+      -2.007843494415283,
+      3.0117645263671875,
+      -1.0039215087890625,
+      -7.9686279296875,
+      -6.023530006408691,
+      7.027451515197754,
+      5.019608497619629
+    ]
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/layer/uint8/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/layer/uint8/quantization/bias.json

new file mode 100644 (file)

index 0000000..ecb49bb
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/layer/uint8/quantization/bias.json
@@ -0,0 +1,9 @@
+{
+  "weights": [
+    415,
+    -829,
+    -1244,
+    1658
+  ],
+  "scale": 0.00241205753304663
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/layer/uint8/quantization/in.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/layer/uint8/quantization/in.json

new file mode 100644 (file)

index 0000000..654824b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/layer/uint8/quantization/in.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.03844216465950012,
+  "zero_point": 126.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/layer/uint8/quantization/out.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/layer/uint8/quantization/out.json

new file mode 100644 (file)

index 0000000..3baa421
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/layer/uint8/quantization/out.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.741962730884552,
+  "zero_point": 156.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/layer/uint8/quantization/weight.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/layer/uint8/quantization/weight.json

new file mode 100644 (file)

index 0000000..9402240
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/layer/uint8/quantization/weight.json
@@ -0,0 +1,80 @@
+{
+  "weights": [
+    [
+      143,
+      159,
+      79,
+      63,
+      47,
+      223,
+      15,
+      254,
+      191,
+      95,
+      175,
+      111,
+      0,
+      31,
+      239,
+      207
+    ],
+    [
+      143,
+      159,
+      79,
+      63,
+      47,
+      223,
+      15,
+      254,
+      191,
+      95,
+      175,
+      111,
+      0,
+      31,
+      239,
+      207
+    ],
+    [
+      143,
+      159,
+      79,
+      63,
+      47,
+      223,
+      15,
+      254,
+      191,
+      95,
+      175,
+      111,
+      0,
+      31,
+      239,
+      207
+    ],
+    [
+      143,
+      159,
+      79,
+      63,
+      47,
+      223,
+      15,
+      254,
+      191,
+      95,
+      175,
+      111,
+      0,
+      31,
+      239,
+      207
+    ]
+  ],
+  "scale": 0.062745101749897,
+  "zero_point": 127.0,
+  "min": -7.9686279296875,
+  "max": 8.031373023986816
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/layer/uint8/record_minmax/in.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/layer/uint8/record_minmax/in.json

new file mode 100644 (file)

index 0000000..a8ec5b2
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/layer/uint8/record_minmax/in.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.832756385803223,
+  "max": 4.969995346069336
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/layer/uint8/record_minmax/out.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/layer/uint8/record_minmax/out.json

new file mode 100644 (file)

index 0000000..de3b415
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/layer/uint8/record_minmax/out.json
@@ -0,0 +1,4 @@
+{
+  "min": -115.99438369750976,
+  "max": 73.20612327575684
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/layer/uint8/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/layer/uint8/fake_quantization/ker.json

new file mode 100644 (file)

index 0000000..76a0440
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/layer/uint8/fake_quantization/ker.json
@@ -0,0 +1,48 @@
+{
+  "weights": [
+    [
+      [
+        [
+          0.960784912109375,
+          2.0588245391845703
+        ],
+        [
+          -3.0196075439453125,
+          -3.980391502380371
+        ],
+        [
+          4.9411773681640625,
+          -6.039215087890625
+        ]
+      ],
+      [
+        [
+          7.0,
+          7.960784912109375
+        ],
+        [
+          -9.058823585510254,
+          -10.019607543945312
+        ],
+        [
+          10.980392456054688,
+          -11.941176414489746
+        ]
+      ],
+      [
+        [
+          13.039216995239258,
+          14.000001907348633
+        ],
+        [
+          -14.960784912109375,
+          -16.05882453918457
+        ],
+        [
+          17.019607543945312,
+          -17.980392456054688
+        ]
+      ]
+    ]
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/layer/uint8/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/layer/uint8/quantization/ifm.json

new file mode 100644 (file)

index 0000000..dc5ca8d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/layer/uint8/quantization/ifm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.03869570419192314,
+  "zero_point": 126.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/layer/uint8/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/layer/uint8/quantization/ker.json

new file mode 100644 (file)

index 0000000..bc150bb
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/layer/uint8/quantization/ker.json
@@ -0,0 +1,52 @@
+{
+  "weights": [
+    [
+      [
+        [
+          138,
+          146
+        ],
+        [
+          109,
+          102
+        ],
+        [
+          167,
+          87
+        ]
+      ],
+      [
+        [
+          182,
+          189
+        ],
+        [
+          65,
+          58
+        ],
+        [
+          211,
+          44
+        ]
+      ],
+      [
+        [
+          226,
+          233
+        ],
+        [
+          22,
+          14
+        ],
+        [
+          255,
+          0
+        ]
+      ]
+    ]
+  ],
+  "scale": 0.13725490868091583,
+  "zero_point": 131.0,
+  "min": -17.980392456054688,
+  "max": 17.019609451293945
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/layer/uint8/quantization/ofm.json

new file mode 100644 (file)

index 0000000..bfd8621
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 1.6333034038543701,
+  "zero_point": 127.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/layer/uint8/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/layer/uint8/record_minmax/ifm.json

new file mode 100644 (file)

index 0000000..2d2af08
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/layer/uint8/record_minmax/ifm.json
@@ -0,0 +1,4 @@
+{
+  "min": -4.890846576690674,
+  "max": 4.976558513641357
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/layer/uint8/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/layer/uint8/record_minmax/ofm.json

new file mode 100644 (file)

index 0000000..24598f0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/layer/uint8/record_minmax/ofm.json
@@ -0,0 +1,4 @@
+{
+  "min": -207.54233032226563,
+  "max": 208.95002136230468
+}
diff --git a/compiler/pota-quantization-value-test/test.lst b/compiler/pota-quantization-value-test/test.lst

index 65613ff..9eb3489 100644 (file)
--- a/compiler/pota-quantization-value-test/test.lst
+++ b/compiler/pota-quantization-value-test/test.lst
@@ -1,2 +1,4 @@
  addTest(Conv2D_004 layer uint8)
  addTest(DepthwiseConv2D_002 layer uint8)
+addTest(FullyConnected_003 layer uint8)
+addTest(TransposeConv_001 layer uint8)
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/layer/uint8/0.txt

index 8803cb1..0614b5e 100644 (file)
--- a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/layer/uint8/0.txt
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/layer/uint8/0.txt
@@ -1 +1 @@
-1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24
+0.01090685,0.0581577 ,0.637094  ,0.64067715,0.26264507,0.13692169,0.9649414 ,0.5117181 ,0.18012471,0.07855253,0.6358017 ,0.62257963,0.41469443,0.93169045,0.20763828,0.7634293 ,0.75929826,0.72708374,0.23463063,0.58222896,0.6351517 ,0.68781173,0.5558012 ,0.7652179 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/layer/uint8/1.txt

new file mode 100644 (file)

index 0000000..b1c3938
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/layer/uint8/1.txt
@@ -0,0 +1 @@
+0.57017624,0.08235867,0.03672464,0.40372616,0.7353964 ,0.59611887,0.7675548 ,0.21004233,0.09803218,0.20009473,0.8821493 ,0.17015271,0.14840214,0.99910176,0.37003204,0.22893582,0.43173164,0.3105084 ,0.41997132,0.43714985,0.08115962,0.71896386,0.7810953 ,0.00524598
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/layer/uint8/2.txt

new file mode 100644 (file)

index 0000000..7e562de
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/layer/uint8/2.txt
@@ -0,0 +1 @@
+0.65292275,0.79842275,0.97853714,0.6711518 ,0.607567  ,0.40971732,0.74838483,0.95853555,0.32158023,0.911524  ,0.66938365,0.8573132 ,0.3047727 ,0.5561248 ,0.914098  ,0.07650814,0.37868017,0.29269257,0.19652605,0.63025194,0.61496884,0.32011527,0.8204132 ,0.21866946
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/layer/uint8/3.txt

new file mode 100644 (file)

index 0000000..2958a7f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/layer/uint8/3.txt
@@ -0,0 +1 @@
+0.4548901 ,0.56957537,0.0252368 ,0.4884317 ,0.7516498 ,0.02631272,0.22107519,0.95249426,0.34902394,0.11520014,0.808911  ,0.4148615 ,0.63615656,0.84020686,0.3633697 ,0.23993976,0.54176176,0.86938345,0.81628686,0.6380988 ,0.91891205,0.0406627 ,0.90289026,0.9429013 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/layer/uint8/4.txt

new file mode 100644 (file)

index 0000000..fc96930
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/layer/uint8/4.txt
@@ -0,0 +1 @@
+0.9309136 ,0.02123719,0.64467335,0.6910113 ,0.47402772,0.54622203,0.31527275,0.81530565,0.98981965,0.36102158,0.03114039,0.1902339 ,0.45183742,0.60178596,0.4683102 ,0.59810966,0.40558222,0.5420302 ,0.72699505,0.9575108 ,0.46746576,0.08518691,0.40302262,0.69213694
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/layer/uint8/0.txt

index c210774..44f0ff1 100644 (file)
--- a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/layer/uint8/0.txt
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/layer/uint8/0.txt
@@ -1 +1 @@
-1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12, 13, 14, 15, 16
+0.31365377,0.6127105 ,0.7047126 ,0.2511918 ,0.16652136,0.36075932,0.44332707,0.77615815,0.60456425,0.26207635,0.28714025,0.11579613,0.89698446,0.67223394,0.3757766 ,0.11787009
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/layer/uint8/1.txt

new file mode 100644 (file)

index 0000000..98e8104
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/layer/uint8/1.txt
@@ -0,0 +1 @@
+0.9409595 ,0.3991174 ,0.43546647,0.221152  ,0.7794665 ,0.8619514 ,0.5903087 ,0.24476172,0.5932698 ,0.2727837 ,0.3980262 ,0.13329633,0.4319272 ,0.37872055,0.1721639 ,0.92437047
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/layer/uint8/2.txt

new file mode 100644 (file)

index 0000000..e986752
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/layer/uint8/2.txt
@@ -0,0 +1 @@
+0.6484028 ,0.09222967,0.76285905,0.02265582,0.2564394 ,0.11219095,0.22529566,0.09101159,0.15937322,0.3540595 ,0.25971088,0.4681136 ,0.4279646 ,0.5386553 ,0.11397707,0.7413688 
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/layer/uint8/3.txt

new file mode 100644 (file)

index 0000000..9b36fb5
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/layer/uint8/3.txt
@@ -0,0 +1 @@
+0.9182678 ,0.8253187 ,0.6572848 ,0.46436486,0.45208713,0.42112917,0.24383743,0.16039051,0.24649048,0.63431305,0.31141657,0.25664324,0.721266  ,0.18996912,0.35422477,0.8826148 
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/layer/uint8/4.txt

new file mode 100644 (file)

index 0000000..6b8957d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/layer/uint8/4.txt
@@ -0,0 +1 @@
+0.97424644,0.9360494 ,0.6849295 ,0.21313633,0.23943195,0.32497332,0.5091704 ,0.67543274,0.49667478,0.73460567,0.5866559 ,0.5312464 ,0.8252662 ,0.36093768,0.7143621 ,0.7234413 
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/layer/uint8/0.txt

new file mode 100644 (file)

index 0000000..233e5ea
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/layer/uint8/0.txt
@@ -0,0 +1 @@
+ 2.7731526 , 2.451602  , 3.7535272 ,-1.2774152 , 1.5482912 , 1.3402948 , 4.4792123 ,-4.4954367 , 3.354679  ,-3.3615496 ,-4.619757  ,-3.3659618 , 4.7626247 ,-1.3596478 ,-4.835548  , 0.78964525
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/layer/uint8/1.txt

new file mode 100644 (file)

index 0000000..6a12608
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/layer/uint8/1.txt
@@ -0,0 +1 @@
+ 0.5400839 ,-3.2621996 ,-3.4817135 , 3.8183312 , 0.48498327, 2.9812584 , 4.111276  , 0.11223658, 4.7201405 , 2.4256718 , 1.4895477 , 4.7596602 ,-0.32709372, 1.3507305 ,-0.30043927,-1.8077502 
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/layer/uint8/2.txt

new file mode 100644 (file)

index 0000000..eccd2c6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/layer/uint8/2.txt
@@ -0,0 +1 @@
+ 3.8758078 , 4.978636  ,-0.22925885,-2.6760504 ,-1.9160627 ,-4.609644  ,-0.9515802 , 3.558274  , 2.9096057 , 0.3340422 , 0.38608226,-0.32168412, 4.688853  ,-4.583811  ,-2.5113506 ,-4.6688786 
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/layer/uint8/3.txt

new file mode 100644 (file)

index 0000000..0da0527
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/layer/uint8/3.txt
@@ -0,0 +1 @@
+-2.9868221 , 2.4237797 , 1.0833962 ,-0.9231426 ,-2.1091506 ,-2.6163697 ,-0.23101932,-1.9252896 , 4.7034135 , 3.1088963 ,-2.345823  ,-2.7866168 ,-3.186763  ,-4.431844  , 3.3113294 , 0.9501982 
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/layer/uint8/4.txt

new file mode 100644 (file)

index 0000000..ace24f7
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/layer/uint8/4.txt
@@ -0,0 +1 @@
+ 3.9716747 ,-2.254871  , 1.1943274 ,-2.212602  , 3.4311683 , 1.114989  , 4.0739036 , 0.47244295,-3.5793104 ,-3.359908  ,-4.7657595 , 2.0369127 ,-2.5619278 ,-3.4452975 ,-4.5852203 ,-1.137643  
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/layer/uint8/0.txt

new file mode 100644 (file)

index 0000000..e9db48f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/layer/uint8/0.txt
@@ -0,0 +1 @@
+-1.4124781 , 0.42694193, 1.1734594 ,-3.5111153 ,-2.9756174 , 1.3682148 ,-2.318465  , 2.198896  ,-4.5043235 , 3.1775594 ,-0.42802384,-1.4872279 , 1.3821319 ,-4.771963  ,-0.12837897, 4.132799  , 3.697655  , 2.0807178 ,-3.621293  , 2.121878  ,-0.25654107, 0.42100102,-1.4009671 ,-2.9733627 ,-0.7058871 ,-2.831215  , 3.5669627 , 2.1420689 ,-1.8789555 , 0.8104939 ,-2.0503597 , 1.7788508 
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/layer/uint8/1.txt

new file mode 100644 (file)

index 0000000..479d062
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/layer/uint8/1.txt
@@ -0,0 +1 @@
+ 3.4726453 , 3.0497985 ,-4.234619  ,-1.0526706 , 1.7278554 ,-3.341614  , 4.54768   , 3.0954597 ,-3.735109  , 2.8810751 ,-2.5381427 ,-3.2360535 ,-1.5378917 , 2.3052745 ,-3.170938  ,-3.327242  , 2.0654576 ,-2.2294598 ,-1.881382  , 0.13216451,-4.2825613 , 0.26616526, 4.6196365 ,-0.88623226, 1.7103885 ,-1.5865034 ,-3.9114466 ,-3.2227128 , 4.909618  , 2.3318915 , 0.84300846, 0.760918  
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/layer/uint8/2.txt

new file mode 100644 (file)

index 0000000..ae28234
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/layer/uint8/2.txt
@@ -0,0 +1 @@
+-4.6097918,-4.21991  ,-3.9955974, 3.6492047, 2.9191775, 2.8082933, 1.6189331, 0.2730309,-1.5029653,-1.9471445, 4.8758197, 3.3177438, 3.1338058,-2.1281245,-1.7526287,-2.5518703,-1.7746793, 4.0455256,-0.5839861,-4.408046 ,-4.0034447, 1.5858272,-4.5896654, 4.7211285,-4.677515 ,-2.6027086,-4.7896166,-3.5512326,-1.9068764,-2.9705904,-4.854087 ,-4.892111 
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/layer/uint8/3.txt

new file mode 100644 (file)

index 0000000..fd40f84
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/layer/uint8/3.txt
@@ -0,0 +1 @@
+ 2.1514777e-02, 2.6526773e+00,-3.0477784e+00, 1.3287724e+00,-4.1414630e-01,-1.7295350e-01, 7.6649576e-01,-1.8028022e+00,-7.0781744e-01,-2.5262204e-01,-3.0970418e+00,-1.3165286e+00,-4.6649928e+00, 2.0809033e+00,-1.5739973e+00,-4.0531826e-01,-2.1718202e+00, 2.0146034e+00, 2.5044403e+00,-1.1256610e+00, 1.3536702e+00, 1.0283234e-03,-1.8823910e+00, 4.7122188e+00, 9.4781297e-01, 3.2012525e+00,-5.5164534e-01,-2.6158772e+00,-1.8771547e+00,-3.1689723e+00, 4.9054880e+00,-3.4560370e+00
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/layer/uint8/4.txt

new file mode 100644 (file)

index 0000000..e81c3b8
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/layer/uint8/4.txt
@@ -0,0 +1 @@
+-2.0927553 ,-2.107511  ,-1.6963564 , 1.7006218 , 1.4575784 , 0.06095728, 1.2659966 , 4.1905265 , 1.3035946 , 4.9793477 ,-4.3388166 ,-0.23496658, 1.9831208 , 2.6154642 ,-0.2790228 ,-3.1774354 ,-3.178935  ,-1.1564373 ,-0.8199472 ,-2.245698  ,-4.8605046 ,-3.569018  ,-1.4226891 ,-4.1067843 , 2.6078918 ,-3.5830674 , 1.9065963 , 2.435578  ,-3.3216476 , 4.5930347 , 2.9191844 , 1.7885648 
diff --git a/compiler/pota-quantization-value-test/test_record_minmax.sh b/compiler/pota-quantization-value-test/test_record_minmax.sh

index eaa462d..acb7574 100755 (executable)
--- a/compiler/pota-quantization-value-test/test_record_minmax.sh
+++ b/compiler/pota-quantization-value-test/test_record_minmax.sh
@@ -59,9 +59,9 @@ while [ "$1" != "" ]; do
  
      # Run record-minmax
      "${RECORD_MINMAX_PATH}" \
-      "${TEST_RESULT_FILE}.fake_quantized.circle" \
-      "${TEST_RESULT_FILE}.input.h5" \
-      "${TEST_RESULT_FILE}.minmax_recorded.circle" 
+      --input_model "${TEST_RESULT_FILE}.fake_quantized.circle" \
+      --input_data "${TESTCASE_FILE}.input.h5" \
+      --output_model "${TEST_RESULT_FILE}.minmax_recorded.circle" 
  
      # Dump min/max values (circle-tensordump)
      "${CIRCLE_TENSORDUMP_PATH}" \
diff --git a/compiler/record-minmax/CMakeLists.txt b/compiler/record-minmax/CMakeLists.txt

index 862660e..f8a165b 100644 (file)
--- a/compiler/record-minmax/CMakeLists.txt
+++ b/compiler/record-minmax/CMakeLists.txt
@@ -19,9 +19,14 @@ target_link_libraries(record-minmax safemain)
  target_link_libraries(record-minmax luci_import)
  target_link_libraries(record-minmax luci_export)
  target_link_libraries(record-minmax luci_interpreter)
+target_link_libraries(record-minmax vconone)
  
  install(TARGETS record-minmax DESTINATION bin)
  
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
  nnas_find_package(GTest REQUIRED)
  GTest_AddTest(record_minmax_function_test "${CMAKE_CURRENT_SOURCE_DIR}/tests/RecordFunction.test.cpp")
  target_include_directories(record_minmax_function_test PRIVATE include)
diff --git a/compiler/record-minmax/driver/Driver.cpp b/compiler/record-minmax/driver/Driver.cpp

index ae4fcb7..8b09498 100644 (file)
--- a/compiler/record-minmax/driver/Driver.cpp
+++ b/compiler/record-minmax/driver/Driver.cpp
@@ -17,6 +17,13 @@
  #include "RecordMinMax.h"
  
  #include <arser/arser.h>
+#include <vconone/vconone.h>
+
+void print_version(void)
+{
+  std::cout << "record-minmax version " << vconone::get_string() << std::endl;
+  std::cout << vconone::get_copyright() << std::endl;
+}
  
  int entry(const int argc, char **argv)
  {
@@ -25,6 +32,13 @@ int entry(const int argc, char **argv)
    arser::Arser arser(
        "Embedding min/max values of activations to the circle model for post-training quantization");
  
+  arser.add_argument("--version")
+      .nargs(0)
+      .required(false)
+      .default_value(false)
+      .help("Show version information and exit")
+      .exit_with(print_version);
+
    arser.add_argument("--input_model")
        .nargs(1)
        .type(arser::DataType::STR)
@@ -66,7 +80,7 @@ int entry(const int argc, char **argv)
    {
      std::cout << err.what() << std::endl;
      std::cout << arser;
-    return 0;
+    return 255;
    }
  
    auto input_model_path = arser.get<std::string>("--input_model");
diff --git a/compiler/record-minmax/requires.cmake b/compiler/record-minmax/requires.cmake

index 0545035..f6804ce 100644 (file)
--- a/compiler/record-minmax/requires.cmake
+++ b/compiler/record-minmax/requires.cmake
@@ -1,3 +1,4 @@
  require("luci")
  require("safemain")
  require("arser")
+require("vconone")
diff --git a/compiler/record-minmax/src/HDF5Importer.cpp b/compiler/record-minmax/src/HDF5Importer.cpp

index cf30cd8..a0e65ee 100644 (file)
--- a/compiler/record-minmax/src/HDF5Importer.cpp
+++ b/compiler/record-minmax/src/HDF5Importer.cpp
@@ -20,6 +20,7 @@
  
  #include <string>
  #include <cassert>
+#include <stdexcept>
  
  using Shape = luci_interpreter::Shape;
  using DataType = luci_interpreter::DataType;
diff --git a/compiler/record-minmax/src/MinMaxObserver.cpp b/compiler/record-minmax/src/MinMaxObserver.cpp

index 45f0197..c22cb41 100644 (file)
--- a/compiler/record-minmax/src/MinMaxObserver.cpp
+++ b/compiler/record-minmax/src/MinMaxObserver.cpp
@@ -38,7 +38,7 @@ void MinMaxObserver::postTensorWrite(const luci::CircleNode *node,
    assert(node->opcode() != luci::CircleOpcode::UNPACK);
    assert(node->opcode() != luci::CircleOpcode::WHILE);
  
-  if (node->opcode() == luci::CircleOpcode::CONST)
+  if (node->opcode() == luci::CircleOpcode::CIRCLECONST)
    {
      // node is not activation. Do nothing.
      return;
diff --git a/compiler/record-minmax/src/RecordMinMax.cpp b/compiler/record-minmax/src/RecordMinMax.cpp

index d12a0d3..17c6aa6 100644 (file)
--- a/compiler/record-minmax/src/RecordMinMax.cpp
+++ b/compiler/record-minmax/src/RecordMinMax.cpp
@@ -158,7 +158,7 @@ void RecordMinMax::profileData(const std::string &mode, const std::string &input
      auto node = iter->first;
      auto minmax = iter->second;
  
-    float min, max;
+    float min{0.0f}, max{0.0f};
      if (mode == "percentile")
      {
        min = getNthPercentile(minmax.min_vector, min_percentile);
diff --git a/compiler/record-minmax/tests/RecordFunction.test.cpp b/compiler/record-minmax/tests/RecordFunction.test.cpp

index 13b464d..e2f135a 100644 (file)
--- a/compiler/record-minmax/tests/RecordFunction.test.cpp
+++ b/compiler/record-minmax/tests/RecordFunction.test.cpp
@@ -32,6 +32,8 @@ TEST(GetNthPercentileTest, Edge)
  
    EXPECT_FLOAT_NEAR(0, getNthPercentile(input, 0));
    EXPECT_FLOAT_NEAR(9, getNthPercentile(input, 100));
+
+  SUCCEED();
  }
  
  TEST(GetNthPercentileTest, Simple)
@@ -47,6 +49,8 @@ TEST(GetNthPercentileTest, Simple)
    {
      EXPECT_FLOAT_NEAR(0.09 * std::floor(i) + 0.045, getNthPercentile(input, i));
    }
+
+  SUCCEED();
  }
  
  TEST(GetNthPercentileTest, Float)
@@ -61,6 +65,8 @@ TEST(GetNthPercentileTest, Float)
    EXPECT_FLOAT_NEAR(2.799942346802177, getNthPercentile(input, 1));
    EXPECT_FLOAT_NEAR(7.768503955476342, getNthPercentile(input, 3.14));
    EXPECT_FLOAT_NEAR(99.40456084968194, getNthPercentile(input, 99));
+
+  SUCCEED();
  }
  
  TEST(GetNthPercentileTest, FloatWithNegative)
@@ -75,6 +81,8 @@ TEST(GetNthPercentileTest, FloatWithNegative)
    EXPECT_FLOAT_NEAR(-47.20005765319782, getNthPercentile(input, 1));
    EXPECT_FLOAT_NEAR(-42.23149604452366, getNthPercentile(input, 3.14));
    EXPECT_FLOAT_NEAR(49.40456084968194, getNthPercentile(input, 99));
+
+  SUCCEED();
  }
  
  TEST(GetNthPercentileTest, SigleElement)
@@ -84,6 +92,8 @@ TEST(GetNthPercentileTest, SigleElement)
    EXPECT_FLOAT_NEAR(33, getNthPercentile(input, 0));
    EXPECT_FLOAT_NEAR(33, getNthPercentile(input, 50));
    EXPECT_FLOAT_NEAR(33, getNthPercentile(input, 100));
+
+  SUCCEED();
  }
  
  TEST(GetNthPercentileTest, OutOfBoundary_NEG)
@@ -92,6 +102,8 @@ TEST(GetNthPercentileTest, OutOfBoundary_NEG)
  
    EXPECT_THROW(getNthPercentile(input, -1), std::runtime_error);
    EXPECT_THROW(getNthPercentile(input, 101), std::runtime_error);
+
+  SUCCEED();
  }
  
  TEST(GetNthPercentileTest, EmptyVector_NEG)
@@ -99,6 +111,8 @@ TEST(GetNthPercentileTest, EmptyVector_NEG)
    std::vector<float> input;
  
    EXPECT_THROW(getNthPercentile(input, 10), std::runtime_error);
+
+  SUCCEED();
  }
  
  } // namespace record_minmax
diff --git a/compiler/tf2circle-value-pbtxt-remote-test/CMakeLists.txt b/compiler/tf2circle-value-pbtxt-remote-test/CMakeLists.txt

index 64dcc28..852018e 100644 (file)
--- a/compiler/tf2circle-value-pbtxt-remote-test/CMakeLists.txt
+++ b/compiler/tf2circle-value-pbtxt-remote-test/CMakeLists.txt
@@ -141,7 +141,6 @@ add_custom_command(
    COMMAND ${CMAKE_COMMAND} -E echo 'HDF5_EXPORT_ACTION_PATH=\"$<TARGET_FILE:nnkit_HDF5_export_action>\"' >> ${TEST_CONFIG}
    COMMAND ${CMAKE_COMMAND} -E echo 'HDF5_IMPORT_ACTION_PATH=\"$<TARGET_FILE:nnkit_HDF5_import_action>\"' >> ${TEST_CONFIG}
    COMMAND ${CMAKE_COMMAND} -E echo 'MODEL2NNPKG_PATH=\"${NNAS_PROJECT_SOURCE_DIR}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh\"' >> ${TEST_CONFIG}
-  COMMAND ${CMAKE_COMMAND} -E echo 'NNPKG_TEST_PATH=\"${NNAS_PROJECT_SOURCE_DIR}/tests/scripts/nnpkg_test.sh\"' >> ${TEST_CONFIG}
    COMMAND ${CMAKE_COMMAND} -E echo 'RUNTIME_LIBRARY_PATH=\"${NNAS_PROJECT_SOURCE_DIR}/Product/out/\"' >> ${TEST_CONFIG}
    DEPENDS
      nnkit-run
diff --git a/compiler/tf2circle-value-pbtxt-remote-test/README.md b/compiler/tf2circle-value-pbtxt-remote-test/README.md

index 5546cc8..0d41b0a 100644 (file)
--- a/compiler/tf2circle-value-pbtxt-remote-test/README.md
+++ b/compiler/tf2circle-value-pbtxt-remote-test/README.md
@@ -36,13 +36,13 @@
          #--------------- Remote Machine Setting ---------------#
          set(REMOTE_IP "xxx.xxx.xxx.xxx")
          set(REMOTE_USER "remote_username")
-        
+
          #--------------------- Tests list ---------------------#
          add(UNIT_Add_000)
          add(UNIT_Add_001)
          ...
          ```
-    - If any Tensorflow model is added, or if `REMOTE_IP` and `REMOTE_USER` is not given, `tf2circle-value-pbtxt-remote-test` will not be created. 
+    - If any Tensorflow model is added, or if `REMOTE_IP` and `REMOTE_USER` is not given, `tf2circle-value-pbtxt-remote-test` will not be created.
  1. (Optional) ssh authentication
      - This test uses `ssh` and `scp` commands, and those commands require a password of remote machine whenever they are called. This means that you should enter the password everytime when `ssh` and `scp` require.
      - This test resolves the problem by using `ssh-copy-id`, which copies the public key of host machine to `authorized_keys` of remote machine. Because of that, this test will ask the password of remote machine only once, at the first time. This is the only user interaction while running this test.
@@ -71,7 +71,7 @@
          ├ Result_latest -> Result_YYMMDD_hhmmss.csv
          ├ Result_YYMMDD_hhmmss.csv
          ├ ...
-        | 
+        |
          ├ UNIT_Add_000
          |     ├ metadata
          |     |     ├ MANIFEST
@@ -91,16 +91,16 @@
          |
          ├ ...
      ```
-- `nnpkg_test.sh`, runtime products and each nnpackage are sent to `REMOTE_WORKDIR` in remote machine.
+- Runtime products and each nnpackage are sent to `REMOTE_WORKDIR` in remote machine.
  - (TBD) Modify script not to remove obtained h5 file.
      ```
      REMOTE_WORKDIR
-        ├ nnpkg_test.sh
          |
          ├ Product
          |     └ out
          |        ├ bin
          |        ├ lib
+        |        ├ test
          |        ├ ...
          |
          ├ UNIT_Add_000
diff --git a/compiler/tf2circle-value-pbtxt-remote-test/testall.sh b/compiler/tf2circle-value-pbtxt-remote-test/testall.sh

index ca6fb49..c80b00a 100755 (executable)
--- a/compiler/tf2circle-value-pbtxt-remote-test/testall.sh
+++ b/compiler/tf2circle-value-pbtxt-remote-test/testall.sh
@@ -30,7 +30,6 @@ echo "-- Found nnkit-run: ${NNKIT_RUN_PATH}"
  echo "-- Found TF backend: ${TF_BACKEND_PATH}"
  echo "-- Found TF2CIRCLE: ${TF2CIRCLE_PATH}"
  echo "-- Found MODEL2NNPKG: ${MODEL2NNPKG_PATH}"
-echo "-- Found nnpkg_test: ${NNPKG_TEST_PATH}"
  echo "-- Found Runtime library: ${RUNTIME_LIBRARY_PATH}"
  echo "-- Found randomize action: ${RANDOMIZE_ACTION_PATH}"
  echo "-- Found HDF5 export action: ${HDF5_EXPORT_ACTION_PATH}"
@@ -42,11 +41,6 @@ if [ -z ${MODEL2NNPKG_PATH} ] || [ ! -f ${MODEL2NNPKG_PATH} ]; then
    exit 3
  fi
  
-if [ -z ${NNPKG_TEST_PATH} ] || [ ! -f ${NNPKG_TEST_PATH} ]; then
-  echo "nnpkg_test is not found"
-  exit 4
-fi
-
  # Register remote machine ssh information
  cat /dev/zero | ssh-keygen -q -N ""
  ssh-copy-id -o ConnectTimeout=5 "${REMOTE_USER}@${REMOTE_IP}"
@@ -61,9 +55,6 @@ fi
  ssh "${REMOTE_USER}@${REMOTE_IP}" "mkdir -p ${REMOTE_WORKDIR}/Product/"
  scp -r "${RUNTIME_LIBRARY_PATH}" "${REMOTE_USER}@${REMOTE_IP}:${REMOTE_WORKDIR}/Product/"
  
-# Send nnpkg_test.sh
-scp "${NNPKG_TEST_PATH}" "${REMOTE_USER}@${REMOTE_IP}:${REMOTE_WORKDIR}/"
-
  TESTED=()
  PASSED=()
  FAILED=()
@@ -120,8 +111,8 @@ while [[ $# -ne 0 ]]; do
  
      # Run test_arm_nnpkg in remote machine
      scp -r "${WORKDIR}/${PREFIX}/" "${REMOTE_USER}@${REMOTE_IP}:${REMOTE_WORKDIR}/${PREFIX}/"
-    ssh "${REMOTE_USER}@${REMOTE_IP}" "cd ${REMOTE_WORKDIR}; ./nnpkg_test.sh -i . -o  ${PREFIX}/metadata/tc ${PREFIX}"
-    
+    ssh "${REMOTE_USER}@${REMOTE_IP}" "cd ${REMOTE_WORKDIR}; ./Product/out/test/onert-test nnpkg-test -i . -o  ${PREFIX}/metadata/tc ${PREFIX}"
+
      if [[ $? -eq 0 ]]; then
        touch "${PASSED_TAG}"
      fi
diff --git a/compiler/tf2nnpackage-value-remote-test/CMakeLists.txt b/compiler/tf2nnpackage-value-remote-test/CMakeLists.txt

index 4a59e88..255806c 100644 (file)
--- a/compiler/tf2nnpackage-value-remote-test/CMakeLists.txt
+++ b/compiler/tf2nnpackage-value-remote-test/CMakeLists.txt
@@ -33,12 +33,12 @@ endforeach()
  
  get_target_property(ARTIFACTS_SRC_PATH testDataGenerator SOURCE_DIR)
  
-# In this test, only the runtime test is performed because the test from tf to 
-# nnpackage is done in common-artifacts, and for this runtime test, generation of 
+# In this test, only the runtime test is performed because the test from tf to
+# nnpackage is done in common-artifacts, and for this runtime test, generation of
  # test data is required. And, tcgenerate in ${ARTIFACTS_SRC_PATH}/exclude.lst
  # means it won't generate test data, which is why below "tcgenerate" macro excludes
-# specific opearators from runtime test. 
-# Also, since circlize and optimize macro included in `exclude.lst` file is only 
+# specific opearators from runtime test.
+# Also, since circlize and optimize macro included in `exclude.lst` file is only
  # needed in common-artifacts, it has no function here.
  macro(circlize)
  endmacro()
@@ -72,7 +72,6 @@ set(TEST_CONFIG "${CMAKE_CURRENT_BINARY_DIR}/test.config")
  add_custom_command(
    OUTPUT ${TEST_CONFIG}
    COMMAND ${CMAKE_COMMAND} -E remove -f ${TEST_CONFIG}
-  COMMAND ${CMAKE_COMMAND} -E echo 'NNPKG_TEST_PATH=\"${NNAS_PROJECT_SOURCE_DIR}/tests/scripts/nnpkg_test.sh\"' >> ${TEST_CONFIG}
    COMMAND ${CMAKE_COMMAND} -E echo 'RUNTIME_LIBRARY_PATH=\"${NNAS_PROJECT_SOURCE_DIR}/Product/out/\"' >> ${TEST_CONFIG}
    COMMENT "Generate test configuration"
  )
diff --git a/compiler/tf2nnpackage-value-remote-test/README.md b/compiler/tf2nnpackage-value-remote-test/README.md

index 36436fc..65f307b 100644 (file)
--- a/compiler/tf2nnpackage-value-remote-test/README.md
+++ b/compiler/tf2nnpackage-value-remote-test/README.md
@@ -15,7 +15,7 @@
          set(REMOTE_IP "xxx.xxx.xxx.xxx")
          set(REMOTE_USER "remote_username")
          ```
-    - If any recipe is added, or if `REMOTE_IP` and `REMOTE_USER` is not given, `tf2nnpackage-value-remote-test` will not be created. 
+    - If any recipe is added, or if `REMOTE_IP` and `REMOTE_USER` is not given, `tf2nnpackage-value-remote-test` will not be created.
  1. (Optional) ssh authentication
      - This test uses `ssh` and `scp` commands, and those commands require a password of remote machine whenever they are called. This means that you should enter the password everytime when `ssh` and `scp` require.
      - This test resolves the problem by using `ssh-copy-id`, which copies the public key of host machine to `authorized_keys` of remote machine. Because of that, this test will ask the password of remote machine only once, at the first time. This is the only user interaction while running this test.
@@ -39,7 +39,7 @@
  ### Generated Files While Running
  
  - All related files(`pb`, `circle`, `h5` ... etc.) are taken from `build/compiler/common-artifacts` folder.
-- `nnpkg_test.sh`, runtime products and each nnpackage are sent to `REMOTE_WORKDIR` in remote machine.
+- Runtime products and each nnpackage are sent to `REMOTE_WORKDIR` in remote machine.
  - Each test result is generated in `build/compiler/common-artifacts` with the name `${RECIPE}.log`
  
  ### Check Test Result
diff --git a/compiler/tf2nnpackage-value-remote-test/testall.sh b/compiler/tf2nnpackage-value-remote-test/testall.sh

index f1c9789..ca672a3 100755 (executable)
--- a/compiler/tf2nnpackage-value-remote-test/testall.sh
+++ b/compiler/tf2nnpackage-value-remote-test/testall.sh
@@ -27,15 +27,9 @@ RESULT_CSV="${BINDIR}/Result_${CURRENT_DATETIME}.csv"
  
  source "${CONFIG_PATH}"
  
-echo "-- Found nnpkg_test: ${NNPKG_TEST_PATH}"
  echo "-- Found Runtime library: ${RUNTIME_LIBRARY_PATH}"
  echo "-- Found workdir: ${WORKDIR}"
  
-if [ -z ${NNPKG_TEST_PATH} ] || [ ! -f ${NNPKG_TEST_PATH} ]; then
-  echo "nnpkg_test is not found"
-  exit 4
-fi
-
  # Register remote machine ssh information
  cat /dev/zero | ssh-keygen -q -N ""
  ssh-copy-id -o ConnectTimeout=5 "${REMOTE_USER}@${REMOTE_IP}"
@@ -50,9 +44,6 @@ fi
  ssh "${REMOTE_USER}@${REMOTE_IP}" "mkdir -p ${REMOTE_WORKDIR}/Product/"
  scp -r "${RUNTIME_LIBRARY_PATH}" "${REMOTE_USER}@${REMOTE_IP}:${REMOTE_WORKDIR}/Product/"
  
-# Send nnpkg_test.sh
-scp "${NNPKG_TEST_PATH}" "${REMOTE_USER}@${REMOTE_IP}:${REMOTE_WORKDIR}/"
-
  TESTED=()
  PASSED=()
  FAILED=()
@@ -84,8 +75,8 @@ while [[ $# -ne 0 ]]; do
      PREFIX=${PREFIX}.opt ;
      fi
      scp -r "${PREFIX}/" "${REMOTE_USER}@${REMOTE_IP}:${REMOTE_WORKDIR}/${PREFIX}/"
-    ssh "${REMOTE_USER}@${REMOTE_IP}" "cd ${REMOTE_WORKDIR}; ./nnpkg_test.sh ${PREFIX}"
-    
+    ssh "${REMOTE_USER}@${REMOTE_IP}" "cd ${REMOTE_WORKDIR}; ./Product/out/test/onert-test nnpkg-test ${PREFIX}"
+
      if [[ $? -eq 0 ]]; then
        touch "${BINDIR}/${PASSED_TAG}"
      fi
diff --git a/compiler/tf2tfliteV2/README.md b/compiler/tf2tfliteV2/README.md

index 13359aa..0a90735 100644 (file)
--- a/compiler/tf2tfliteV2/README.md
+++ b/compiler/tf2tfliteV2/README.md
@@ -47,6 +47,9 @@ python tf2tfliteV2.py \
    -h, --help            show this help message and exit
    --v1                  Use TensorFlow Lite Converter 1.x
    --v2                  Use TensorFlow Lite Converter 2.x
+  --graph_def           Use graph def file(default)
+  --saved_model         Use saved model
+  --keras_model         Use keras model
    -i INPUT_PATH, --input_path INPUT_PATH
                          Full filepath of the input file.
    -o OUTPUT_PATH, --output_path OUTPUT_PATH
@@ -55,7 +58,8 @@ python tf2tfliteV2.py \
                          Names of the input arrays, comma-separated.
    -s INPUT_SHAPES, --input_shapes INPUT_SHAPES
                          Shapes corresponding to --input_arrays, colon-
-                        separated.
+                        separated.(ex:"1,4,4,3:1,20,20,3")
    -O OUTPUT_ARRAYS, --output_arrays OUTPUT_ARRAYS
                          Names of the output arrays, comma-separated.
+
  ```
diff --git a/compiler/tf2tfliteV2/tf2tfliteV2.py b/compiler/tf2tfliteV2/tf2tfliteV2.py

index 82d6ee2..c51dabd 100755 (executable)
--- a/compiler/tf2tfliteV2/tf2tfliteV2.py
+++ b/compiler/tf2tfliteV2/tf2tfliteV2.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
  
  # Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
  # Copyright (C) 2018 The TensorFlow Authors
@@ -48,6 +48,27 @@ def _get_parser():
      converter_version.add_argument(
          "--v2", action="store_true", help="Use TensorFlow Lite Converter 2.x")
  
+    # Input model format
+    model_format_arg = parser.add_mutually_exclusive_group()
+    model_format_arg.add_argument(
+        "--graph_def",
+        action="store_const",
+        dest="model_format",
+        const="graph_def",
+        help="Use graph def file(default)")
+    model_format_arg.add_argument(
+        "--saved_model",
+        action="store_const",
+        dest="model_format",
+        const="saved_model",
+        help="Use saved model")
+    model_format_arg.add_argument(
+        "--keras_model",
+        action="store_const",
+        dest="model_format",
+        const="keras_model",
+        help="Use keras model")
+
      # Input and output path.
      parser.add_argument(
          "-i",
@@ -83,6 +104,8 @@ def _get_parser():
          help="Names of the output arrays, comma-separated.",
          required=True)
  
+    # Set default value
+    parser.set_defaults(model_format="graph_def")
      return parser
  
  
@@ -122,17 +145,26 @@ def _parse_array(arrays, type_fn=str):
  
  
  def _v1_convert(flags):
-    input_shapes = None
-    if flags.input_shapes:
-        input_arrays = _parse_array(flags.input_arrays)
-        input_shapes_list = [
-            _parse_array(shape, type_fn=int) for shape in flags.input_shapes.split(":")
-        ]
-        input_shapes = dict(list(zip(input_arrays, input_shapes_list)))
-
-    converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(
-        flags.input_path, _parse_array(flags.input_arrays),
-        _parse_array(flags.output_arrays), input_shapes)
+    if flags.model_format == "graph_def":
+        input_shapes = None
+        if flags.input_shapes:
+            input_arrays = _parse_array(flags.input_arrays)
+            input_shapes_list = [
+                _parse_array(shape, type_fn=int)
+                for shape in flags.input_shapes.split(":")
+            ]
+            input_shapes = dict(list(zip(input_arrays, input_shapes_list)))
+
+        converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(
+            flags.input_path, _parse_array(flags.input_arrays),
+            _parse_array(flags.output_arrays), input_shapes)
+
+    if flags.model_format == "saved_model":
+        converter = tf.compat.v1.lite.TFLiteConverter.from_saved_model(flags.input_path)
+
+    if flags.model_format == "keras_model":
+        converter = tf.compat.v1.lite.TFLiteConverter.from_keras_model_file(
+            flags.input_path)
  
      converter.allow_custom_ops = True
  
@@ -141,27 +173,35 @@ def _v1_convert(flags):
  
  
  def _v2_convert(flags):
-    file_content = open(flags.input_path, 'rb').read()
-    try:
-        graph_def = tf.compat.v1.GraphDef()
-        graph_def.ParseFromString(file_content)
-    except (_text_format.ParseError, DecodeError):
+    if flags.model_format == "graph_def":
+        file_content = open(flags.input_path, 'rb').read()
          try:
-            _text_format.Merge(file_content, graph_def)
+            graph_def = tf.compat.v1.GraphDef()
+            graph_def.ParseFromString(file_content)
          except (_text_format.ParseError, DecodeError):
-            raise IOError("Unable to parse input file '{}'.".format(flags.input_path))
-
-    wrap_func = wrap_frozen_graph(
-        graph_def,
-        inputs=[
-            _str + ":0" if len(_str.split(":")) == 1 else _str
-            for _str in _parse_array(flags.input_arrays)
-        ],
-        outputs=[
-            _str + ":0" if len(_str.split(":")) == 1 else _str
-            for _str in _parse_array(flags.output_arrays)
-        ])
-    converter = tf.lite.TFLiteConverter.from_concrete_functions([wrap_func])
+            try:
+                _text_format.Merge(file_content, graph_def)
+            except (_text_format.ParseError, DecodeError):
+                raise IOError("Unable to parse input file '{}'.".format(flags.input_path))
+
+        wrap_func = wrap_frozen_graph(
+            graph_def,
+            inputs=[
+                _str + ":0" if len(_str.split(":")) == 1 else _str
+                for _str in _parse_array(flags.input_arrays)
+            ],
+            outputs=[
+                _str + ":0" if len(_str.split(":")) == 1 else _str
+                for _str in _parse_array(flags.output_arrays)
+            ])
+        converter = tf.lite.TFLiteConverter.from_concrete_functions([wrap_func])
+
+    if flags.model_format == "saved_model":
+        converter = tf.lite.TFLiteConverter.from_saved_model(flags.input_path)
+
+    if flags.model_format == "keras_model":
+        keras_model = tf.keras.models.load_model(flags.input_path)
+        converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
  
      converter.allow_custom_ops = True
      converter.experimental_new_converter = True
diff --git a/compiler/tfl-verify/CMakeLists.txt b/compiler/tfl-verify/CMakeLists.txt

index d33059f..4421a46 100644 (file)
--- a/compiler/tfl-verify/CMakeLists.txt
+++ b/compiler/tfl-verify/CMakeLists.txt
@@ -6,6 +6,7 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
  
  add_executable(tfl-verify ${SOURCES})
  target_include_directories(tfl-verify PRIVATE src)
+target_link_libraries(tfl-verify arser)
  target_link_libraries(tfl-verify foder)
  target_link_libraries(tfl-verify mio_tflite)
  target_link_libraries(tfl-verify safemain)
diff --git a/compiler/tfl-verify/requires.cmake b/compiler/tfl-verify/requires.cmake

index ed6b84d..79503f3 100644 (file)
--- a/compiler/tfl-verify/requires.cmake
+++ b/compiler/tfl-verify/requires.cmake
@@ -1,3 +1,4 @@
+require("arser")
  require("foder")
  require("mio-tflite")
  require("safemain")
diff --git a/compiler/tfl-verify/src/Driver.cpp b/compiler/tfl-verify/src/Driver.cpp

index 81f6d54..6d18976 100644 (file)
--- a/compiler/tfl-verify/src/Driver.cpp
+++ b/compiler/tfl-verify/src/Driver.cpp
@@ -16,22 +16,31 @@
  
  #include "VerifyFlatBuffers.h"
  
+#include <arser/arser.h>
+
  #include <iostream>
  #include <memory>
  #include <string>
  
  int entry(int argc, char **argv)
  {
-  if (argc != 2)
+  arser::Arser arser;
+  arser.add_argument("tflite").type(arser::DataType::STR).help("TFLite file path to verify");
+
+  try
    {
-    std::cerr << "ERROR: Failed to parse arguments" << std::endl;
-    std::cerr << std::endl;
-    std::cerr << "USAGE: " << argv[0] << " [tflite]" << std::endl;
+    arser.parse(argc, argv);
+  }
+  catch (const std::runtime_error &err)
+  {
+    std::cout << err.what() << std::endl;
+    std::cout << arser;
      return 255;
    }
+
    auto verifier = std::make_unique<VerifyFlatbuffers>();
  
-  std::string model_file = argv[argc - 1];
+  std::string model_file = arser.get<std::string>("tflite");
  
    std::cout << "[ RUN       ] Check " << model_file << std::endl;
  
diff --git a/compiler/tflchef/core/src/ModelChef.cpp b/compiler/tflchef/core/src/ModelChef.cpp

index 932a649..692ce48 100644 (file)
--- a/compiler/tflchef/core/src/ModelChef.cpp
+++ b/compiler/tflchef/core/src/ModelChef.cpp
@@ -413,6 +413,7 @@ template <typename T> void cook_graph(const T &graph, CookParams &cp)
        quant_builder.add_min(quant_min);
        quant_builder.add_scale(quant_scale);
        quant_builder.add_zero_point(quant_zero_point);
+      quant_builder.add_quantized_dimension(quant.quantized_dimension());
  
        // Update QuantizationParameters Index
        quant_index = quant_builder.Finish();
diff --git a/compiler/tflchef/core/src/Op/NonMaxSuppressionV4.cpp b/compiler/tflchef/core/src/Op/NonMaxSuppressionV4.cpp

new file mode 100644 (file)

index 0000000..eadd62c
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/NonMaxSuppressionV4.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NonMaxSuppressionV4.h"
+
+flatbuffers::Offset<void> NonMaxSuppressionV4Chef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+  tflite::NonMaxSuppressionV4OptionsBuilder options_builder{fbb};
+
+  return options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef>
+NonMaxSuppressionV4ChefFactory::create(const tflchef::Operation *operation) const
+{
+  return std::unique_ptr<OpChef>{new NonMaxSuppressionV4Chef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/NonMaxSuppressionV4.h b/compiler/tflchef/core/src/Op/NonMaxSuppressionV4.h

new file mode 100644 (file)

index 0000000..a8e783d
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/NonMaxSuppressionV4.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_NON_MAX_SUPPRESSION_V4_H__
+#define __OP_NON_MAX_SUPPRESSION_V4_H__
+
+#include "OpChef.h"
+
+class NonMaxSuppressionV4Chef final : public OpChef
+{
+public:
+  explicit NonMaxSuppressionV4Chef(const tflchef::Operation *operation) : _operation{operation}
+  {
+    // DO NOTHING
+  }
+
+public:
+  tflite::BuiltinOperator code(void) const override
+  {
+    return tflite::BuiltinOperator_NON_MAX_SUPPRESSION_V4;
+  }
+
+  tflite::BuiltinOptions type(void) const override
+  {
+    return tflite::BuiltinOptions_NonMaxSuppressionV4Options;
+  }
+
+  flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+  const tflchef::Operation *_operation;
+};
+
+struct NonMaxSuppressionV4ChefFactory final : public OpChefFactory
+{
+  std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_NON_MAX_SUPPRESSION_V4_H__
diff --git a/compiler/tflchef/core/src/Op/PadV2.cpp b/compiler/tflchef/core/src/Op/PadV2.cpp

new file mode 100644 (file)

index 0000000..bfa2289
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/PadV2.cpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PadV2.h"
+
+flatbuffers::Offset<void> PadV2Chef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+  tflite::PadV2OptionsBuilder padv2_options_builder{fbb};
+  return padv2_options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> PadV2ChefFactory::create(const tflchef::Operation *operation) const
+{
+  return std::unique_ptr<OpChef>{new PadV2Chef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/PadV2.h b/compiler/tflchef/core/src/Op/PadV2.h

new file mode 100644 (file)

index 0000000..d155323
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/PadV2.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_PADV2_H__
+#define __OP_PADV2_H__
+
+#include "OpChef.h"
+
+class PadV2Chef final : public OpChef
+{
+public:
+  explicit PadV2Chef(const tflchef::Operation *operation) : _operation{operation}
+  {
+    // DO NOTHING
+  }
+
+public:
+  tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_PADV2; }
+
+  tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_PadV2Options; }
+
+  flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+  const tflchef::Operation *_operation;
+};
+
+struct PadV2ChefFactory final : public OpChefFactory
+{
+  std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_PADV2_H__
diff --git a/compiler/tflchef/core/src/OpChef.def b/compiler/tflchef/core/src/OpChef.def

index 263725a..2441862 100644 (file)
--- a/compiler/tflchef/core/src/OpChef.def
+++ b/compiler/tflchef/core/src/OpChef.def
@@ -55,10 +55,12 @@ OP_CHEF(Minimum, MinimumChefFactory)
  OP_CHEF(MirrorPad, MirrorPadChefFactory)
  OP_CHEF(Mul, MulChefFactory)
  OP_CHEF(Neg, NegChefFactory)
+OP_CHEF(NonMaxSuppressionV4, NonMaxSuppressionV4ChefFactory)
  OP_CHEF(NotEqual, NotEqualChefFactory)
  OP_CHEF(OneHot, OneHotChefFactory)
  OP_CHEF(Pack, PackChefFactory)
  OP_CHEF(Pad, PadChefFactory)
+OP_CHEF(PadV2, PadV2ChefFactory)
  OP_CHEF(Pow, PowChefFactory)
  OP_CHEF(PRelu, PReluChefFactory)
  OP_CHEF(Range, RangeChefFactory)
diff --git a/compiler/tflchef/core/src/OpChefs.h b/compiler/tflchef/core/src/OpChefs.h

index 55c37eb..5b2e89b 100644 (file)
--- a/compiler/tflchef/core/src/OpChefs.h
+++ b/compiler/tflchef/core/src/OpChefs.h
@@ -68,10 +68,12 @@
  #include "Op/MirrorPad.h"
  #include "Op/Mul.h"
  #include "Op/Neg.h"
+#include "Op/NonMaxSuppressionV4.h"
  #include "Op/NotEqual.h"
  #include "Op/OneHot.h"
  #include "Op/Pack.h"
  #include "Op/Pad.h"
+#include "Op/PadV2.h"
  #include "Op/Pow.h"
  #include "Op/PRelu.h"
  #include "Op/Range.h"
diff --git a/compiler/tflchef/proto/tflchef.proto b/compiler/tflchef/proto/tflchef.proto

index 792503b..70b966e 100644 (file)
--- a/compiler/tflchef/proto/tflchef.proto
+++ b/compiler/tflchef/proto/tflchef.proto
@@ -35,6 +35,7 @@ message TensorQuantization {
    repeated float max = 2;
    repeated float scale = 3;
    repeated int64 zero_point = 4;
+  optional int32 quantized_dimension = 5 [default = 0];
  }
  
  message Operand {
@@ -153,6 +154,10 @@ message PadOptions {
    // None
  }
  
+message PadV2Options {
+  // None
+}
+
  message MirrorPadOptions {
    optional MirrorPadMode mode = 1 [default = REFLECT];
  }
@@ -362,6 +367,10 @@ message GatherNdOptions {
    // None
  }
  
+message NonMaxSuppressionV4Options {
+  // None
+}
+
  message NotEqualOptions {
    // None
  }
@@ -507,7 +516,7 @@ message Operation {
    optional LogSoftmaxOptions log_softmax_options = 168;
    // DequantizeOptions 169
    optional NegOptions neg_options = 170;
-  // PadV2Options 171
+  optional PadV2Options padv2_options = 171;
    optional LessEqualOptions lessequal_options = 172;
    optional SliceOptions slice_options = 173;
    optional TransposeConvOptions transpose_conv_options = 174;
@@ -534,7 +543,7 @@ message Operation {
    optional MatrixSetDiagOptions matrix_set_diag_options = 195;
    // HardSwishOptions 196
    optional DepthToSpaceOptions depth_to_space_options = 197;
-  // NonMaxSuppressionV4Options 198
+  optional NonMaxSuppressionV4Options non_max_suppression_v4_options = 198;
    // NonMaxSuppressionV5Options 199
    optional ScatterNdOptions scatter_nd_options = 200;
    optional NotEqualOptions notequal_options = 201;
diff --git a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.cpp b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.cpp

new file mode 100644 (file)

index 0000000..ad99219
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NonMaxSuppressionV4.h"
+
+#include "Convert.h"
+#include "FillerHelper.h"
+
+namespace tflchef
+{
+
+void TFliteOpNonMaxSuppressionV4::filler(const tflite::Operator *op, TFliteImport *import,
+                                         tflchef::ModelRecipe *model_recipe) const
+{
+  const auto &inputs = *op->inputs();
+
+  const tflite::Tensor *max_output_size_tensor = import->tensors()->Get(inputs[2]);
+  assert(max_output_size_tensor->type() == tflite::TensorType::TensorType_INT32);
+
+  const tflite::Tensor *iou_threshold_tensor = import->tensors()->Get(inputs[3]);
+  assert(iou_threshold_tensor->type() == tflite::TensorType::TensorType_FLOAT32);
+
+  const tflite::Tensor *score_threshold_tensor = import->tensors()->Get(inputs[4]);
+  assert(score_threshold_tensor->type() == tflite::TensorType::TensorType_FLOAT32);
+
+  for (int32_t index = 2; index < 5; ++index)
+  {
+    fill_tensor_to_import(index, import);
+  }
+}
+
+tflchef::Operation *TFliteOpNonMaxSuppressionV4::build(const tflite::Operator *op,
+                                                       TFliteImport *import,
+                                                       tflchef::ModelRecipe *model_recipe) const
+{
+  auto operation = model_recipe->add_operation();
+
+  operation->set_type("NonMaxSuppressionV4");
+
+  return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.h b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.h

new file mode 100644 (file)

index 0000000..114a2ad
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_NON_MAX_SUPPRESSION_V4_H__
+#define __TFLITE_OP_NON_MAX_SUPPRESSION_V4_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for NON_MAX_SUPPRESSION_V4
+ */
+class TFliteOpNonMaxSuppressionV4 : public TFliteOpChef
+{
+public:
+  void filler(const tflite::Operator *op, TFliteImport *import,
+              tflchef::ModelRecipe *model_recipe) const override;
+  tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+                            tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_NON_MAX_SUPPRESSION_V4_H__
diff --git a/compiler/tflchef/tflite/src/Op/PadV2.cpp b/compiler/tflchef/tflite/src/Op/PadV2.cpp

new file mode 100644 (file)

index 0000000..0b1c9f3
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/PadV2.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PadV2.h"
+
+#include "FillerHelper.h"
+
+namespace tflchef
+{
+
+void TFliteOpPadV2::filler(const tflite::Operator *op, TFliteImport *import,
+                           tflchef::ModelRecipe *model_recipe) const
+{
+  // Filler for paddings and constant_values
+  fill_tensor_to_import(1, import);
+  fill_tensor_to_import(2, import);
+}
+
+tflchef::Operation *TFliteOpPadV2::build(const tflite::Operator *op, TFliteImport *import,
+                                         tflchef::ModelRecipe *model_recipe) const
+{
+  auto operation = model_recipe->add_operation();
+
+  operation->set_type("PadV2");
+
+  return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/PadV2.h b/compiler/tflchef/tflite/src/Op/PadV2.h

new file mode 100644 (file)

index 0000000..3aa474b
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/PadV2.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_PADV2_H__
+#define __TFLITE_OP_PADV2_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for PADV2
+ */
+class TFliteOpPadV2 : public TFliteOpChef
+{
+public:
+  void filler(const tflite::Operator *op, TFliteImport *import,
+              tflchef::ModelRecipe *model_recipe) const override;
+  tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+                            tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_PADV2_H__
diff --git a/compiler/tflchef/tflite/src/Op/TransposeConv.cpp b/compiler/tflchef/tflite/src/Op/TransposeConv.cpp

index 7e772b9..4e7adf6 100644 (file)
--- a/compiler/tflchef/tflite/src/Op/TransposeConv.cpp
+++ b/compiler/tflchef/tflite/src/Op/TransposeConv.cpp
@@ -35,6 +35,10 @@ void TFliteOpTransposeConv::filler(const tflite::Operator *op, TFliteImport *imp
      auto vec = extract_buffer<int32_t>(buffer);
      import->set_tensor_filler(inputs[0], vec);
    }
+
+  // filter
+  const tflite::Tensor *filter_tensor = import->tensors()->Get(inputs[1]);
+  import->set_tensor_filler(inputs[1]);
  }
  
  tflchef::Operation *TFliteOpTransposeConv::build(const tflite::Operator *op, TFliteImport *import,
diff --git a/compiler/tflchef/tflite/src/RecipeChef.cpp b/compiler/tflchef/tflite/src/RecipeChef.cpp

index db62d0e..088961c 100644 (file)
--- a/compiler/tflchef/tflite/src/RecipeChef.cpp
+++ b/compiler/tflchef/tflite/src/RecipeChef.cpp
@@ -184,6 +184,8 @@ std::unique_ptr<ModelRecipe> generate_recipe(const tflite::Model *model)
          for (uint32_t idx = 0; idx < quant->zero_point()->size(); ++idx)
            chef_quant->add_zero_point(quant->zero_point()->Get(idx));
        }
+      tflchef::TensorQuantization *chef_quant = operand->mutable_quant();
+      chef_quant->set_quantized_dimension(quant->quantized_dimension());
      }
    }
  
diff --git a/compiler/tflchef/tflite/src/TFliteOpChefs.h b/compiler/tflchef/tflite/src/TFliteOpChefs.h

index ad52af1..de14e37 100644 (file)
--- a/compiler/tflchef/tflite/src/TFliteOpChefs.h
+++ b/compiler/tflchef/tflite/src/TFliteOpChefs.h
@@ -68,10 +68,12 @@
  #include "Op/MirrorPad.h"
  #include "Op/Mul.h"
  #include "Op/Neg.h"
+#include "Op/NonMaxSuppressionV4.h"
  #include "Op/NotEqual.h"
  #include "Op/OneHot.h"
  #include "Op/Pack.h"
  #include "Op/Pad.h"
+#include "Op/PadV2.h"
  #include "Op/Pow.h"
  #include "Op/PRelu.h"
  #include "Op/Range.h"
diff --git a/compiler/tflchef/tflite/src/TFliteOpRegistry.h b/compiler/tflchef/tflite/src/TFliteOpRegistry.h

index 0a44b3f..8d33007 100644 (file)
--- a/compiler/tflchef/tflite/src/TFliteOpRegistry.h
+++ b/compiler/tflchef/tflite/src/TFliteOpRegistry.h
@@ -105,10 +105,12 @@ private:
      REG_TFL_OP(MIRROR_PAD, TFliteOpMirrorPad);
      REG_TFL_OP(MUL, TFliteOpMul);
      REG_TFL_OP(NEG, TFliteOpNeg);
+    REG_TFL_OP(NON_MAX_SUPPRESSION_V4, TFliteOpNonMaxSuppressionV4);
      REG_TFL_OP(NOT_EQUAL, TFliteOpNotEqual);
      REG_TFL_OP(ONE_HOT, TFliteOpOneHot);
      REG_TFL_OP(PACK, TFliteOpPack);
      REG_TFL_OP(PAD, TFliteOpPad);
+    REG_TFL_OP(PADV2, TFliteOpPadV2);
      REG_TFL_OP(POW, TFliteOpPow);
      REG_TFL_OP(PRELU, TFliteOpPRelu);
      REG_TFL_OP(RANGE, TFliteOpRange);
diff --git a/compiler/tflchef/tools/file/Driver.cpp b/compiler/tflchef/tools/file/Driver.cpp

index cecfeeb..46e5b55 100644 (file)
--- a/compiler/tflchef/tools/file/Driver.cpp
+++ b/compiler/tflchef/tools/file/Driver.cpp
@@ -41,7 +41,7 @@ int entry(int argc, char **argv)
    {
      std::cout << err.what() << std::endl;
      std::cout << arser;
-    return 0;
+    return 255;
    }
  
    int32_t model_version = 1;
diff --git a/compiler/tflchef/tools/reverse/Driver.cpp b/compiler/tflchef/tools/reverse/Driver.cpp

index 1116dec..4d795a3 100644 (file)
--- a/compiler/tflchef/tools/reverse/Driver.cpp
+++ b/compiler/tflchef/tools/reverse/Driver.cpp
@@ -38,7 +38,7 @@ int entry(int argc, char **argv)
    {
      std::cout << err.what() << std::endl;
      std::cout << arser;
-    return 0;
+    return 255;
    }
  
    std::string tflite_path = arser.get<std::string>("tflite");
diff --git a/compiler/tfldump/driver/Driver.cpp b/compiler/tfldump/driver/Driver.cpp

index 3961d2f..38c9c06 100644 (file)
--- a/compiler/tfldump/driver/Driver.cpp
+++ b/compiler/tfldump/driver/Driver.cpp
@@ -33,7 +33,7 @@ int entry(int argc, char **argv)
    {
      std::cout << err.what() << '\n';
      std::cout << arser;
-    return 0;
+    return 255;
    }
  
    std::string tflite_path = arser.get<std::string>("tflite");
diff --git a/compiler/tfldump/src/OpPrinter.cpp b/compiler/tfldump/src/OpPrinter.cpp

index 9fc1a64..df027c3 100644 (file)
--- a/compiler/tfldump/src/OpPrinter.cpp
+++ b/compiler/tfldump/src/OpPrinter.cpp
@@ -676,6 +676,7 @@ OpPrinterRegistry::OpPrinterRegistry()
    _op_map[tflite::BuiltinOperator_MAX_POOL_2D] = make_unique<Pool2DPrinter>();
    _op_map[tflite::BuiltinOperator_MIRROR_PAD] = make_unique<MirrorPadPrinter>();
    _op_map[tflite::BuiltinOperator_MUL] = make_unique<MulPrinter>();
+  // There is no Option for NON_MAX_SUPPRESSION_V4
    _op_map[tflite::BuiltinOperator_ONE_HOT] = make_unique<OneHotPrinter>();
    _op_map[tflite::BuiltinOperator_PACK] = make_unique<PackPrinter>();
    // There is no Option for PAD
diff --git a/compiler/tflite2circle/CMakeLists.txt b/compiler/tflite2circle/CMakeLists.txt

index a0a2e02..b1d1f61 100644 (file)
--- a/compiler/tflite2circle/CMakeLists.txt
+++ b/compiler/tflite2circle/CMakeLists.txt
@@ -14,5 +14,6 @@ target_link_libraries(tflite2circle arser)
  target_link_libraries(tflite2circle safemain)
  target_link_libraries(tflite2circle mio_tflite)
  target_link_libraries(tflite2circle mio_circle)
+target_link_libraries(tflite2circle vconone)
  
  install(TARGETS tflite2circle DESTINATION bin)
diff --git a/compiler/tflite2circle/driver/Driver.cpp b/compiler/tflite2circle/driver/Driver.cpp

index 67b8e33..2f11e0a 100644 (file)
--- a/compiler/tflite2circle/driver/Driver.cpp
+++ b/compiler/tflite2circle/driver/Driver.cpp
@@ -24,10 +24,25 @@
  #include "CircleModel.h"
  #include "TFLModel.h"
  
+#include <vconone/vconone.h>
+
+void print_version(void)
+{
+  std::cout << "tflite2circle version " << vconone::get_string() << std::endl;
+  std::cout << vconone::get_copyright() << std::endl;
+}
+
  int entry(int argc, char **argv)
  {
    arser::Arser arser{"tflite2circle is a Tensorflow lite to circle model converter"};
  
+  arser.add_argument("--version")
+      .nargs(0)
+      .required(false)
+      .default_value(false)
+      .help("Show version information and exit")
+      .exit_with(print_version);
+
    arser.add_argument("tflite")
        .nargs(1)
        .type(arser::DataType::STR)
@@ -42,7 +57,7 @@ int entry(int argc, char **argv)
    {
      std::cout << err.what() << std::endl;
      std::cout << arser;
-    return 0;
+    return 255;
    }
  
    std::string tfl_path = arser.get<std::string>("tflite");
diff --git a/compiler/tflite2circle/requires.cmake b/compiler/tflite2circle/requires.cmake

index ff19b74..837c287 100644 (file)
--- a/compiler/tflite2circle/requires.cmake
+++ b/compiler/tflite2circle/requires.cmake
@@ -2,3 +2,4 @@ require("arser")
  require("mio-tflite")
  require("mio-circle")
  require("safemain")
+require("vconone")
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions.h

index 159a8af..00b3de9 100644 (file)
--- a/compiler/tflite2circle/src/BuildBuiltinOptions.h
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions.h
@@ -62,10 +62,12 @@
  #include "BuildBuiltinOptions/MirrorPadOptions.h"
  #include "BuildBuiltinOptions/MulOptions.h"
  #include "BuildBuiltinOptions/NegOptions.h"
+#include "BuildBuiltinOptions/NonMaxSuppressionV4Options.h"
  #include "BuildBuiltinOptions/NotEqualOptions.h"
  #include "BuildBuiltinOptions/OneHotOptions.h"
  #include "BuildBuiltinOptions/PackOptions.h"
  #include "BuildBuiltinOptions/PadOptions.h"
+#include "BuildBuiltinOptions/PadV2Options.h"
  #include "BuildBuiltinOptions/RangeOptions.h"
  #include "BuildBuiltinOptions/Pool2DOptions.h"
  #include "BuildBuiltinOptions/PowOptions.h"
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/NonMaxSuppressionV4Options.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/NonMaxSuppressionV4Options.cpp

new file mode 100644 (file)

index 0000000..1a39f50
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/NonMaxSuppressionV4Options.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NonMaxSuppressionV4Options.h"
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::NonMaxSuppressionV4Options>
+build_circle_NonMaxSuppressionV4Options(flatbuffers::FlatBufferBuilder &fb,
+                                        const tflite::Operator *)
+{
+  circle::NonMaxSuppressionV4OptionsBuilder builtin_options_builder{fb};
+  return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/NonMaxSuppressionV4Options.h b/compiler/tflite2circle/src/BuildBuiltinOptions/NonMaxSuppressionV4Options.h

new file mode 100644 (file)

index 0000000..6073142
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/NonMaxSuppressionV4Options.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_NON_MAX_SUPPRESSION_V4_OPTIONS_H__
+#define __BBO_NON_MAX_SUPPRESSION_V4_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::NonMaxSuppressionV4Options>
+build_circle_NonMaxSuppressionV4Options(flatbuffers::FlatBufferBuilder &fb,
+                                        const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_NON_MAX_SUPPRESSION_V4_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/PadV2Options.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/PadV2Options.cpp

new file mode 100644 (file)

index 0000000..6636634
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/PadV2Options.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PadV2Options.h"
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::PadV2Options>
+build_circle_PadV2Options(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+  circle::PadV2OptionsBuilder builtin_options_builder{fb};
+  return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/PadV2Options.h b/compiler/tflite2circle/src/BuildBuiltinOptions/PadV2Options.h

new file mode 100644 (file)

index 0000000..36a2c82
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/PadV2Options.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_PADV2_OPTIONS_H__
+#define __BBO_PADV2_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::PadV2Options>
+build_circle_PadV2Options(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_PADV2_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/TFLBuiltinOptions.lst b/compiler/tflite2circle/src/TFLBuiltinOptions.lst

index 3ef9f15..a2a1453 100644 (file)
--- a/compiler/tflite2circle/src/TFLBuiltinOptions.lst
+++ b/compiler/tflite2circle/src/TFLBuiltinOptions.lst
@@ -26,6 +26,7 @@ TFL_BUILTIN_OPTIONS(SpaceToDepthOptions)
  //TFL_BUILTIN_OPTIONS(EmbeddingLookupSparseOptions)
  TFL_BUILTIN_OPTIONS(MulOptions)
  TFL_BUILTIN_OPTIONS(PadOptions)
+TFL_BUILTIN_OPTIONS(PadV2Options)
  TFL_BUILTIN_OPTIONS(GatherOptions)
  TFL_BUILTIN_OPTIONS(BatchToSpaceNDOptions)
  TFL_BUILTIN_OPTIONS(SpaceToBatchNDOptions)
@@ -99,7 +100,7 @@ TFL_BUILTIN_OPTIONS(MatrixSetDiagOptions)
  TFL_BUILTIN_OPTIONS(IfOptions)
  TFL_BUILTIN_OPTIONS(WhileOptions)
  TFL_BUILTIN_OPTIONS(DepthToSpaceOptions)
-//TFL_BUILTIN_OPTIONS(NonMaxSuppressionV4Options)
+TFL_BUILTIN_OPTIONS(NonMaxSuppressionV4Options)
  //TFL_BUILTIN_OPTIONS(NonMaxSuppressionV5Options)
  TFL_BUILTIN_OPTIONS(RankOptions)
  TFL_BUILTIN_OPTIONS(ScatterNdOptions)
diff --git a/compiler/vconone/CMakeLists.txt b/compiler/vconone/CMakeLists.txt

new file mode 100644 (file)

index 0000000..b8cb793
--- /dev/null
+++ b/compiler/vconone/CMakeLists.txt
@@ -0,0 +1,31 @@
+if (NOT VCONONE_VERSION)
+  set(VCONONE_VERSION 0x0000000000080001)
+  # NOTE order is [build patch minor major]
+  # if VCONONE_VERSION is set with -D option, it will be cached
+  # you may have to remove cache file if you remove -D option
+endif()
+
+configure_file(version_cfg.h.in version_cfg.h @ONLY)
+
+set(DRIVER "driver/driver.cpp")
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(vconone STATIC ${SOURCES})
+target_include_directories(vconone PUBLIC include)
+target_include_directories(vconone PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
+
+add_executable(one-version ${DRIVER})
+target_link_libraries(one-version vconone)
+install(TARGETS one-version DESTINATION bin)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(vconone_test ${TESTS})
+target_link_libraries(vconone_test vconone)
diff --git a/compiler/vconone/README.md b/compiler/vconone/README.md

new file mode 100644 (file)

index 0000000..c08dd63
--- /dev/null
+++ b/compiler/vconone/README.md
@@ -0,0 +1,14 @@
+# vconone
+
+_vconone_ provides version number and strings for one-* commands and command
+line tools
+
+# Revise version number
+
+To revise version number, update `VCONONE_VERSION` in `CmakeLists.txt`
+or give `-DVCONONE_VERSION=0x0000000100080001` at cmake configure step.
+
+Number given is four numbers `build`, `patch`, `minor` and `major` in order for
+each 16bit integers. `build` is not used for now.
+
+`0x0000000100080001` version is interpretered as `1.8.1`
diff --git a/compiler/vconone/driver/driver.cpp b/compiler/vconone/driver/driver.cpp

new file mode 100644 (file)

index 0000000..12bd0ee
--- /dev/null
+++ b/compiler/vconone/driver/driver.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vconone/vconone.h>
+
+#include <string>
+#include <iostream>
+
+int main(int argc, char *argv[])
+{
+  auto str = vconone::get_string();
+  if (argc >= 2)
+  {
+    for (int c = 1; c < argc; ++c)
+      std::cout << argv[c] << " ";
+    std::cout << "version " << str << std::endl;
+    std::cout << vconone::get_copyright() << std::endl;
+  }
+  else
+    std::cout << str;
+
+  return 0;
+}
diff --git a/compiler/vconone/include/vconone/vconone.h b/compiler/vconone/include/vconone/vconone.h

new file mode 100644 (file)

index 0000000..a6a1998
--- /dev/null
+++ b/compiler/vconone/include/vconone/vconone.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __VCON_ONE_H__
+#define __VCON_ONE_H__
+
+#include <cstdint>
+#include <string>
+
+namespace vconone
+{
+
+struct four
+{
+  uint16_t major;
+  uint16_t minor;
+  uint16_t patch;
+  uint16_t build; // build is not used for now
+};
+
+union version {
+  uint64_t v;
+  four f;
+};
+
+/**
+ * @brief get_number will return version union structure
+ */
+version get_number(void);
+
+/**
+ * @brief get_string will return string of major.minor.patch (without build)
+ */
+std::string get_string(void);
+
+/**
+ * @brief get_string4 will return string of major.minor.patch.build
+ */
+std::string get_string4(void);
+
+/**
+ * @brief get_copyright will return copyright string
+ */
+std::string get_copyright(void);
+
+} // namespace vconone
+
+#endif // __VCON_ONE_H__
diff --git a/compiler/vconone/src/version.cpp b/compiler/vconone/src/version.cpp

new file mode 100644 (file)

index 0000000..9b693c6
--- /dev/null
+++ b/compiler/vconone/src/version.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "vconone/vconone.h"
+
+#include "version_cfg.h"
+
+#include <sstream>
+
+namespace vconone
+{
+
+version get_number(void)
+{
+  version v;
+  v.v = VCONONE_VERSION;
+  return v;
+}
+
+std::string get_string4(void)
+{
+  std::ostringstream ss;
+
+  auto v = get_number();
+  ss << unsigned(v.f.major) << "." << unsigned(v.f.minor) << "." << unsigned(v.f.patch) << "."
+     << unsigned(v.f.build);
+
+  return ss.str();
+}
+
+std::string get_string(void)
+{
+  std::ostringstream ss;
+
+  auto v = get_number();
+  ss << unsigned(v.f.major) << "." << unsigned(v.f.minor) << "." << unsigned(v.f.patch);
+
+  return ss.str();
+}
+
+std::string get_copyright(void)
+{
+  std::string str;
+  str = "Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved\r\n";
+  str += "Licensed under the Apache License, Version 2.0\r\n";
+  str += "https://github.com/Samsung/ONE";
+  return str;
+}
+
+} // namespace vconone
diff --git a/compiler/vconone/src/version.test.cpp b/compiler/vconone/src/version.test.cpp

new file mode 100644 (file)

index 0000000..35a0647
--- /dev/null
+++ b/compiler/vconone/src/version.test.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vconone/vconone.h>
+
+#include <gtest/gtest.h>
+
+TEST(vconone, version_number)
+{
+  auto v = vconone::get_number();
+
+  ASSERT_NE(0x0000000000000000ULL, v.v);
+}
+
+TEST(vconone, version_string)
+{
+  auto str = vconone::get_string();
+
+  ASSERT_NE("..", str);
+  ASSERT_NE("", str);
+}
+
+TEST(vconone, version_string4)
+{
+  auto str = vconone::get_string4();
+
+  ASSERT_NE("...", str);
+  ASSERT_NE("", str);
+}
+
+TEST(vconone, copyright)
+{
+  auto str = vconone::get_copyright();
+
+  ASSERT_NE("", str);
+}
diff --git a/compiler/vconone/version_cfg.h.in b/compiler/vconone/version_cfg.h.in

new file mode 100644 (file)

index 0000000..aa3ad9e
--- /dev/null
+++ b/compiler/vconone/version_cfg.h.in
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __VCON_ONE_VERSION_CFG_H__
+#define __VCON_ONE_VERSION_CFG_H__
+
+#define VCONONE_VERSION @VCONONE_VERSION@ULL
+
+#endif // __VCON_ONE_VERSION_CFG_H__
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLArgOperationKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLArgOperationKernel.h

deleted file mode 100644 (file)

index 9699b5c..0000000
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLArgOperationKernel.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/**
- * @file CLArgOperationKernel.h
- * @brief This file defines CLArgOperationKernel
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __ARM_COMPUTE_CLARGOPERATIONKERNEL_H__
-#define __ARM_COMPUTE_CLARGOPERATIONKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/TypesEx.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to define interface for the argop kernel.
- */
-class CLArgOperationKernel : public ICLKernel
-{
-public:
-  /**
-   * @brief Default constructor.
-   */
-  CLArgOperationKernel();
-  /**
-   * @brief Prevent instances of this class from being copied (As this class contains pointers).
-   * @param [in] copiedInstance Const reference of CLArgOperationKernel to be copied
-   */
-  CLArgOperationKernel(const CLArgOperationKernel &) = delete;
-  /**
-   * @brief Prevent instances of this class from being copied (As this class contains pointers).
-   * @param [in] copiedInstance Const reference of CLArgOperationKernel to be copied
-   * @return Reference of this instance
-   */
-  CLArgOperationKernel &operator=(const CLArgOperationKernel &) = delete;
-  /**
-   * @brief Allow instances of this class to be moved
-   * @param [in] movedInstance Rvalue reference of CLArgOperationKernel to be moved
-   */
-  CLArgOperationKernel(CLArgOperationKernel &&) = default;
-  /**
-   * @brief Allow instances of this class to be moved
-   * @param [in] movedInstance Rvalue reference of CLArgOperationKernel to be moved
-   * @return Reference of this instance
-   */
-  CLArgOperationKernel &operator=(CLArgOperationKernel &&) = default;
-  /**
-   * @brief Initialise the kernel's input, output and border mode.
-   * @param[in]  input          An input tensor. Data types supported: U8/QASYMM8/S32/F32.
-   * @param[out] output         The output tensor, Data types supported: S32.
-   * @param[in]  axis           Axis along which to reduce. It must be sorted and no duplicates.
-   * @param[in]  op             Arg operation to perform.
-   * return N/A
-   */
-  void configure(const ICLTensor *input, ICLTensor *output, const uint32_t axis, ArgOperation op);
-  /**
-   * @brief Static function to check if given info will lead to a valid configuration of @ref
-   * CLArgOperationKernel
-   * @param[in] input           An input tensor info. Data types supported: U8/QASYMM8/S32/F32.
-   * @param[in] output          The output tensor info, Data types supported: S32.
-   * @param[in] axis            Axis along which to reduce. It must be sorted and no duplicates.
-   * @param[in] op              Arg operation to perform.
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *input, const ITensorInfo *output, const uint32_t axis,
-                         ArgOperation op);
-
-  /*
-   * @brief Run CLArgOperationKernel op
-   * @param[in] window  Window to be used for in_slice
-   * @param[in] queue   cl::CommandQueue
-   * @return N/A
-   */
-  void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
-  const ICLTensor *_input;
-  ICLTensor *_output;
-  uint32_t _axis;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLARGOPERATIONKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h

deleted file mode 100644 (file)

index b0357fe..0000000
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLCastKernel.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/**
- * @file      CLCastKernel.h
- * @ingroup   COM_AI_RUNTIME
- * @brief     This file defines CLCastKernel class
- */
-
-#ifndef __ARM_COMPUTE_CLCASTKERNEL_H__
-#define __ARM_COMPUTE_CLCASTKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/TypesEx.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to define OpenCL kernel for cast operation
- */
-class CLCastKernel : public ICLKernel
-{
-public:
-  /**
-   * @brief Construct CLCastKernel object
-   */
-  CLCastKernel();
-
-  /**
-   * @brief Prevent instances of this class from being copied (As this class contains pointers)
-   */
-  CLCastKernel(const CLCastKernel &) = delete;
-
-  /**
-   * @brief Prevent instances of this class from being copied (As this class contains pointers)
-   */
-  CLCastKernel &operator=(const CLCastKernel &) = delete;
-
-  /**
-   * @brief Construct CLCastKernel object using default move constructor
-   * @param[in] CLCastKernel object to move
-   */
-  CLCastKernel(CLCastKernel &&) = default;
-
-  /**
-   * @brief Allow instances of this class to be moved
-   * @param[in] CLCastKernel object to move
-   */
-  CLCastKernel &operator=(CLCastKernel &&) = default;
-
-  /**
-   * @brief Destruct this CLCastKernel object
-   */
-  ~CLCastKernel() = default;
-
-  /**
-   * @brief Initialise the kernel's input and output.
-   * @param[in]  input  Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
-   * @param[in]  output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
-   * @param[in]  input_subtype  Sub data type of input.
-   * @return N/A
-   */
-  void configure(const ICLTensor *input, ICLTensor *output, SubDataType input_subtype);
-
-  /**
-   * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command
-   *        queue.
-   * @note  The queue is *not* flushed by this method, and therefore the kernel will not have
-   *        been executed by the time this method returns.
-   * @param[in] window      Region on which to execute the kernel. (Must be a valid region of
-   *                        the window returned by window()).
-   * @param[in,out] queue   Command queue on which to enqueue the kernel.@return N/A
-   * @return N/A
-   */
-  void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
-  const ICLTensor *_input; /**< Source tensor */
-  ICLTensor *_output;      /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_CLCASTKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h

deleted file mode 100644 (file)

index 8615cf1..0000000
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLDEPTHTOSPACEKERNEL_H__
-#define __ARM_COMPUTE_CLDEPTHTOSPACEKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform depthTospace operation */
-class CLDepthToSpaceKernel : public ICLKernel
-{
-public:
-  /** Default constructor */
-  CLDepthToSpaceKernel();
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  CLDepthToSpaceKernel(const CLDepthToSpaceKernel &) = delete;
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  CLDepthToSpaceKernel &operator=(const CLDepthToSpaceKernel &) = delete;
-  /** Allow instances of this class to be moved */
-  CLDepthToSpaceKernel(CLDepthToSpaceKernel &&) = default;
-  /** Allow instances of this class to be moved */
-  CLDepthToSpaceKernel &operator=(CLDepthToSpaceKernel &&) = default;
-  /** Default destructor */
-  ~CLDepthToSpaceKernel() = default;
-  /** Initialise the kernel's input and output.
-   *
-   * @param[in]  input  Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
-   * @param[in]  output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
-   */
-  void configure(const ICLTensor *input, ICLTensor *output, const int32_t block_size);
-
-  // Inherited methods overridden:
-  void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
-  const ICLTensor *_input; /**< Source tensor */
-  ICLTensor *_output;      /**< Destination tensor */
-};
-
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_CLDEPTHTOSPACEKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.h

deleted file mode 100644 (file)

index 9321c36..0000000
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYKERNELEX_H__
-#define __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYKERNELEX_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to multiply matrices
- *
- * @note This kernel should be used ONLY for Midgard architectures
- *
- * This kernel performs the following computation:
- *
- *  -# Convert a values from int8 to int32
- *  -# Convert b values from int8 to int32
- *  -# Compute the int32 matrix product of the resulting a * b and store the result as int32
- *
- */
-class CLGEMMLowpMatrixMultiplyKernelEx : public ICLKernel
-{
-public:
-  /** Default Constructor */
-  CLGEMMLowpMatrixMultiplyKernelEx();
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  CLGEMMLowpMatrixMultiplyKernelEx(const CLGEMMLowpMatrixMultiplyKernelEx &) = delete;
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  CLGEMMLowpMatrixMultiplyKernelEx &operator=(const CLGEMMLowpMatrixMultiplyKernelEx &) = delete;
-  /** Allow instances of this class to be moved */
-  CLGEMMLowpMatrixMultiplyKernelEx(CLGEMMLowpMatrixMultiplyKernelEx &&) = default;
-  /** Allow instances of this class to be moved */
-  CLGEMMLowpMatrixMultiplyKernelEx &operator=(CLGEMMLowpMatrixMultiplyKernelEx &&) = default;
-  /** Initialise the kernel's input and output.
-   *
-   * @note This kernel should be used ONLY for Midgard architectures
-   *
-   * @param[in]  input0    Input tensor containing the LHS matrix. Data type supported: QASYMM8
-   * @param[in]  input1    Input tensor containing the RHS matrix. Data type supported: same as @p
-   * input0
-   * @param[out] output    Output tensor to store the result of matrix multiplication. Data type
-   * supported: S32
-   * @param[in]  gemm_info (Optional) GEMM information used to retrieve the original dimensions of
-   * the input matrices
-   */
-  void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output,
-                 const GEMMReshapeInfo &gemm_info = GEMMReshapeInfo());
-  /** Static function to check if given info will lead to a valid configuration of @ref
-   * CLGEMMLowpMatrixMultiplyKernelEx
-   *
-   * @param[in] input0    Input tensor containing the LHS matrix. Data type supported: QASYMM8
-   * @param[in] input1    Input tensor containing the RHS matrix. Data type supported: same as @p
-   * input0
-   * @param[in] output    Output tensor to store the result of matrix multiplication. Data type
-   * supported: S32
-   * @param[in] gemm_info (Optional) GEMM information used to retrieve the original dimensions of
-   * the input matrices
-   *
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *input0, const ITensorInfo *input1,
-                         const ITensorInfo *output,
-                         const GEMMReshapeInfo &gemm_info = GEMMReshapeInfo());
-
-  // Inherited methods overridden:
-  void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
-  const ICLTensor *_input0;
-  const ICLTensor *_input1;
-  ICLTensor *_output;
-  bool _slide_matrix_b;
-  bool _reinterpret_input_as_3d;
-  bool _reinterpret_output_as_3d;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYKERNELEX_H__*/
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLPReLUKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLPReLUKernel.h

deleted file mode 100644 (file)

index dd2dbf6..0000000
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLPReLUKernel.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLPRELU_KERNEL_H__
-#define __ARM_COMPUTE_CLPRELU_KERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to calculate PReLU*/
-class CLPReLUKernel : public ICLKernel
-{
-public:
-  /** Default constructor */
-  CLPReLUKernel();
-  /** Prevent instances of this class from being copied (As this class contains pointers). */
-  CLPReLUKernel(const CLPReLUKernel &) = delete;
-  /** Prevent instances of this class from being copied (As this class contains pointers). */
-  CLPReLUKernel &operator=(const CLPReLUKernel &) = delete;
-  /** Allow instances of this class to be moved */
-  CLPReLUKernel(CLPReLUKernel &&) = default;
-  /** Allow instances of this class to be moved */
-  CLPReLUKernel &operator=(CLPReLUKernel &&) = default;
-  /** Initialize the kernel's input, output.
-   *
-   * @param[in]  input  Source tensor1.
-   * @param[in]  alpha  Source tensor2.
-   * @param[out] output  Output tensor.
-   */
-  void configure(const ICLTensor *input, const ICLTensor *alpha, ICLTensor *output);
-
-  // Inherited methods overridden:
-  void run(const Window &window, cl::CommandQueue &queue) override;
-
-  BorderSize border_size() const override;
-
-private:
-  const ICLTensor *_input;
-  const ICLTensor *_alpha;
-  ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLPRELU_KERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h

deleted file mode 100644 (file)

index 4c0a82c..0000000
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLSPACETODEPTHKERNEL_H__
-#define __ARM_COMPUTE_CLSPACETODEPTHKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform spaceTodepth operation */
-class CLSpaceToDepthKernel : public ICLKernel
-{
-public:
-  /** Default constructor */
-  CLSpaceToDepthKernel();
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  CLSpaceToDepthKernel(const CLSpaceToDepthKernel &) = delete;
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  CLSpaceToDepthKernel &operator=(const CLSpaceToDepthKernel &) = delete;
-  /** Allow instances of this class to be moved */
-  CLSpaceToDepthKernel(CLSpaceToDepthKernel &&) = default;
-  /** Allow instances of this class to be moved */
-  CLSpaceToDepthKernel &operator=(CLSpaceToDepthKernel &&) = default;
-  /** Default destructor */
-  ~CLSpaceToDepthKernel() = default;
-  /** Initialise the kernel's input and output.
-   *
-   * @param[in]  input  Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
-   * @param[in]  output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
-   */
-  void configure(const ICLTensor *input, ICLTensor *output, const int32_t block_size);
-
-  // Inherited methods overridden:
-  void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
-  const ICLTensor *_input; /**< Source tensor */
-  ICLTensor *_output;      /**< Destination tensor */
-};
-
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_CLSPACETODEPTHKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h b/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h

deleted file mode 100644 (file)

index 9d174de..0000000
--- a/compute/ARMComputeEx/arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLEKERNEL_H__
-#define __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLEKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the Upsampling layer kernel for transpose convolution on OpenCL.
- */
-class CLTransposeConvLayerUpsampleKernel : public ICLKernel
-{
-public:
-  /** Constructor */
-  CLTransposeConvLayerUpsampleKernel();
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  CLTransposeConvLayerUpsampleKernel(const CLTransposeConvLayerUpsampleKernel &) = delete;
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  CLTransposeConvLayerUpsampleKernel &
-  operator=(const CLTransposeConvLayerUpsampleKernel &) = delete;
-  /** Default Move Constructor. */
-  CLTransposeConvLayerUpsampleKernel(CLTransposeConvLayerUpsampleKernel &&) = default;
-  /** Default move assignment operator */
-  CLTransposeConvLayerUpsampleKernel &operator=(CLTransposeConvLayerUpsampleKernel &&) = default;
-  /** Default destructor */
-  ~CLTransposeConvLayerUpsampleKernel() = default;
-
-  /** Initialise the kernel's input and output.
-   *
-   * @param[in]  input        Source tensor. Data types supported: QASYMM8/F16/F32.
-   * @param[out] output       Destination tensor. Data types supported: same as @p input. All but
-   * the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only
-   * performed within the XY-plane.
-   * @param[in]  inner_border Top and right inner border sizes. These rows and columns will be
-   * filled with zero.
-   * @param[in]  info         Contains padding and stride information described in @ref
-   * PadStrideInfo.
-   */
-  void configure(const ICLTensor *input, ICLTensor *output, const BorderSize &inner_border,
-                 const PadStrideInfo &info);
-  /** Static function to check if given info will lead to a valid configuration of @ref
-   * CLTransposeConvLayerUpsample
-   *
-   * @param[in] input        Source tensor info. Data types supported: QASYMM8/F16/F32.
-   * @param[in] output       Destination tensor info. Data types supported: same as @p input. All
-   * but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is
-   * only performed within the XY-plane.
-   * @param[in] inner_border Top and right inner border sizes. These rows and columns will be filled
-   * with zero.
-   * @param[in] info         Contains padding and stride information described in @ref
-   * PadStrideInfo.
-   *
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *input, const ITensorInfo *output,
-                         const BorderSize &inner_border, const PadStrideInfo &info);
-
-  // Inherited methods overridden:
-  void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
-  const ICLTensor *_input;
-  ICLTensor *_output;
-  BorderSize _inner_border;
-  PadStrideInfo _info;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLEKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h b/compute/ARMComputeEx/arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h

deleted file mode 100644 (file)

index d4c9c61..0000000
--- a/compute/ARMComputeEx/arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CPPUPSAMPLEKERNEL_EX_H__
-#define __ARM_COMPUTE_CPPUPSAMPLEKERNEL_EX_H__
-
-#include "arm_compute/core/CPP/ICPPKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** CPP kernel to perform tensor upsample.
- *
- */
-class CPPUpsampleKernelEx : public ICPPKernel
-{
-public:
-  const char *name() const override { return "CPPUpsampleKernelEx"; }
-  /** Default constructor */
-  CPPUpsampleKernelEx();
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  CPPUpsampleKernelEx(const CPPUpsampleKernelEx &) = delete;
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  CPPUpsampleKernelEx &operator=(const CPPUpsampleKernelEx &) = delete;
-  /** Allow instances of this class to be moved */
-  CPPUpsampleKernelEx(CPPUpsampleKernelEx &&) = default;
-  /** Allow instances of this class to be moved */
-  CPPUpsampleKernelEx &operator=(CPPUpsampleKernelEx &&) = default;
-  /** Default destructor */
-  ~CPPUpsampleKernelEx() = default;
-
-  /** Set the input and output of the kernel.
-   *
-   * @param[in]  input  The input tensor to upsample. Data types supported: F32/F16/QASYMM8
-   * @param[out] output The output tensor. Data types supported: Same as @p input
-   * @param[in]  info   Padding info.
-   */
-  void configure(const ITensor *input, ITensor *output, const PadStrideInfo &info);
-
-  // Inherited methods overridden:
-  void run(const Window &window, const ThreadInfo &info) override;
-  bool is_parallelisable() const override;
-
-private:
-  const ITensor *_input;
-  ITensor *_output;
-  PadStrideInfo _info;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CPPUPSAMPLEKERNEL_EX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NECastKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NECastKernel.h

deleted file mode 100644 (file)

index 4e9f097..0000000
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NECastKernel.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NECASTKERNEL_H__
-#define __ARM_COMPUTE_NECASTKERNEL_H__
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/TypesEx.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the cast layer kernel. */
-class NECastKernel : public INEKernel
-{
-public:
-  const char *name() const override { return "NECastKernel"; }
-  /** Default constructor */
-  NECastKernel();
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  NECastKernel(const NECastKernel &) = delete;
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  NECastKernel &operator=(const NECastKernel &) = delete;
-  /** Default Move Constructor. */
-  NECastKernel(NECastKernel &&) = default;
-  /** Default move assignment operator */
-  NECastKernel &operator=(NECastKernel &&) = default;
-  /** Default destructor */
-  ~NECastKernel() = default;
-  /** Set input, output tensors.
-   *
-   * @param[in]  input  Source tensor. Data type supported: U8/S8/QASYMM8/U32/S32/F32.
-   * @param[out] output Destination tensor with the same dimensions of input. Data type supported:
-   * U8/S8/QASYMM8/U32/S32/F32.
-   * @param[in]  input_subtype  Sub data type of input.
-   */
-  void configure(const ITensor *input, ITensor *output, SubDataType input_subtype);
-  /** Static function to check if given info will lead to a valid configuration of @ref NECastKernel
-   *
-   * @param[in] input  Input tensor info. Data types supported: U8/S8/QASYMM8/U32/S32/F32.
-   * @param[in] output Output tensor info. Data types supported: U8/S8/QASYMM8/U32/S32/F32.
-   * @param[in] input_subtype  Sub data type of input.
-   *
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *input, const ITensorInfo *output,
-                         SubDataType input_subtype);
-
-  // Inherited methods overridden:
-  void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-  const ITensor *_input;
-  ITensor *_output;
-  SubDataType _input_subtype;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_NECASTKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.h

deleted file mode 100644 (file)

index b62897e..0000000
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNELEX_H__
-#define __ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNELEX_H__
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the depth to space kernel */
-class NEDepthToSpaceLayerKernelEx : public INEKernel
-{
-public:
-  const char *name() const override { return "NEDepthToSpaceLayerKernelEx"; }
-  /** Default constructor */
-  NEDepthToSpaceLayerKernelEx();
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  NEDepthToSpaceLayerKernelEx(const NEDepthToSpaceLayerKernelEx &) = delete;
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  NEDepthToSpaceLayerKernelEx &operator=(const NEDepthToSpaceLayerKernelEx &) = delete;
-  /** Allow instances of this class to be moved */
-  NEDepthToSpaceLayerKernelEx(NEDepthToSpaceLayerKernelEx &&) = default;
-  /** Allow instances of this class to be moved */
-  NEDepthToSpaceLayerKernelEx &operator=(NEDepthToSpaceLayerKernelEx &&) = default;
-  /** Default destructor */
-  ~NEDepthToSpaceLayerKernelEx() = default;
-  /** Initialise the kernel's inputs and output.
-   *
-   * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported:
-   * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
-   * @param[out] output      Tensor output. Data types supported: same as @p input
-   * @param[in]  block_shape Block shape x value.
-   */
-  void configure(const ITensor *input, ITensor *output, int32_t block_shape);
-  /** Static function to check if given info will lead to a valid configuration of @ref
-   * NEDepthToSpaceLayerKernelEx.
-   *
-   * @param[in] input       Tensor input info. Supported tensor rank: 4. Data types supported:
-   * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
-   * @param[in] output      Tensor output info. Data types supported: same as @p input
-   * @param[in] block_shape Block shape value.
-   *
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape);
-
-  // Inherited methods overridden:
-  void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-  const ITensor *_input; /**< Source tensor */
-  ITensor *_output;      /**< Destination tensor */
-  int32_t _block_shape;  /**< Block shape */
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNELEX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernelEx.h

deleted file mode 100644 (file)

index 57de78d..0000000
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernelEx.h
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NEELEMENTWISEUNARYKERNELEX_H__
-#define __ARM_COMPUTE_NEELEMENTWISEUNARYKERNELEX_H__
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/TypesEx.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for an element-wise unary operation kernel
- *
- * Element-wise operation is computed by:
- * @f[ output(x) = OP(input(x))@f]
- *
- */
-class NEElementwiseUnaryKernelEx : public INEKernel
-{
-public:
-  const char *name() const override { return "NEElementwiseUnaryKernelEx"; }
-  /** Default constructor */
-  NEElementwiseUnaryKernelEx();
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  NEElementwiseUnaryKernelEx(const NEElementwiseUnaryKernelEx &) = delete;
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  NEElementwiseUnaryKernelEx &operator=(const NEElementwiseUnaryKernelEx &) = delete;
-  /** Allow instances of this class to be moved */
-  NEElementwiseUnaryKernelEx(NEElementwiseUnaryKernelEx &&) = default;
-  /** Allow instances of this class to be moved */
-  NEElementwiseUnaryKernelEx &operator=(NEElementwiseUnaryKernelEx &&) = default;
-  /** Default destructor */
-  ~NEElementwiseUnaryKernelEx() = default;
-
-  /** Static function to check if given info will lead to a valid configuration of @ref
-   * NEElementwiseUnaryKernelEx
-   *
-   * @param[in] op     Arithmetic operation to be executed.
-   * @param[in] input  First tensor input. Data types supported: F16/F32/S32.
-   * @param[in] output Output tensor. Data types supported: Same as @p input.
-   */
-  void configure(ElementWiseUnaryEx op, const ITensor *input, ITensor *output);
-
-  /** Static function to check if given info will lead to a valid configuration of @ref
-   * NEElementwiseUnaryKernelEx
-   *
-   * @param[in] op     Arithmetic operation to be executed.
-   * @param[in] input  First tensor input info. Data types supported: F16/F32/S32.
-   * @param[in] output Output tensor info. Data types supported: Same as @p input.
-   *
-   * @return a Status
-   */
-  static Status validate(ElementWiseUnaryEx op, const ITensorInfo *input,
-                         const ITensorInfo *output);
-
-  // Inherited methods overridden:
-  void run(const Window &window, const ThreadInfo &info) override;
-
-  /** Common signature for all the specialised arithmetic functions
-   *
-   * @param[in]  input  An input tensor. Data types supported: F16/F32/S32.
-   * @param[out] output The output tensor. Data types supported: Same as @p input.
-   * @param[in]  window Region on which to execute the kernel.
-   */
-  using ElementwiseUnaryFunction = void(const ITensor *input, ITensor *output,
-                                        const Window &window);
-
-protected:
-  // Inherited methods overridden:
-  static Status validate_arguments(const ITensorInfo &input, const ITensorInfo &output);
-
-  /** Function to use for the particular tensor types passed to configure() */
-  std::function<void(const ITensor *input, ITensor *output, const Window &window)> _function;
-
-  const ITensor *_input;
-  ITensor *_output;
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NEELEMENTWISEUNARYKERNELEX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEPReLUKernel.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEPReLUKernel.h

deleted file mode 100644 (file)

index 722efd3..0000000
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NEPReLUKernel.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NEPRELUKERNEL_H__
-#define __ARM_COMPUTE_NEPRELUKERNEL_H__
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform Parametric Rectified Linear Unit
- *
- * Result is computed by:
- * @f[ output(x) = alpha * x for x < 0, output(x) = x for x >= 0 @f]
- */
-class NEPReLUKernel : public INEKernel
-{
-public:
-  const char *name() const override { return "NEPReLUKernel"; }
-  /** Default constructor */
-  NEPReLUKernel();
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  NEPReLUKernel(const NEPReLUKernel &) = delete;
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  NEPReLUKernel &operator=(const NEPReLUKernel &) = delete;
-  /** Allow instances of this class to be moved */
-  NEPReLUKernel(NEPReLUKernel &&) = default;
-  /** Allow instances of this class to be moved */
-  NEPReLUKernel &operator=(NEPReLUKernel &&) = default;
-  /** Initialise the kernel's inputs and output
-   *
-   * @param[in]  input Input tensor. Data type supported: QASYMM8/F32
-   * @param[in]  alpha Alpha tensor. Data types supported: Same as @p input
-   * @param[out] output Output tensor. Data types supported: Same as @p input
-   */
-  void configure(const ITensor *input, const ITensor *alpha, ITensor *output);
-
-  // Inherited methods overridden:
-  void run(const Window &window, const ThreadInfo &info) override;
-
-  /** Static function to check if given info will lead to a valid configuration of @ref
-   * NEPReLUKernel.h
-   *
-   * @param[in] input  Input tensor input info. Data types supported: QASYMM8/F32.
-   * @param[in] alpha  Alpha tensor input info. Data types supported: Same as @p input.
-   * @param[in] output Output tensor info. Data types supported: Same as @p input.
-   *
-   * @return a Status
-   */
-  static Status validate(const ITensorInfo *input, const ITensorInfo *alpha,
-                         const ITensorInfo *output);
-  static Status validate_arguments(const ITensorInfo &input, const ITensorInfo &alpha,
-                                   const ITensorInfo &output);
-
-private:
-  const ITensor *_input; /**< Source tensor */
-  const ITensor *_alpha; /**< Alpha tensor */
-  ITensor *_output;      /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NEPRELUKERNEL_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernelEx.h b/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernelEx.h

deleted file mode 100644 (file)

index 0ffcf6b..0000000
--- a/compute/ARMComputeEx/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernelEx.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NESPACETODEPTHLAYERKERNELEX_H__
-#define __ARM_COMPUTE_NESPACETODEPTHLAYERKERNELEX_H__
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the space to depth kernel */
-class NESpaceToDepthLayerKernelEx : public INEKernel
-{
-public:
-  const char *name() const override { return "NESpaceToDepthLayerKernelEx"; }
-  /** Default constructor */
-  NESpaceToDepthLayerKernelEx();
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  NESpaceToDepthLayerKernelEx(const NESpaceToDepthLayerKernelEx &) = delete;
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  NESpaceToDepthLayerKernelEx &operator=(const NESpaceToDepthLayerKernelEx &) = delete;
-  /** Allow instances of this class to be moved */
-  NESpaceToDepthLayerKernelEx(NESpaceToDepthLayerKernelEx &&) = default;
-  /** Allow instances of this class to be moved */
-  NESpaceToDepthLayerKernelEx &operator=(NESpaceToDepthLayerKernelEx &&) = default;
-  /** Default destructor */
-  ~NESpaceToDepthLayerKernelEx() = default;
-  /** Initialise the kernel's inputs and output.
-   *
-   * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported:
-   * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
-   * @param[out] output      Tensor output. Data types supported: same as @p input
-   * @param[in]  block_shape Block shape value
-   */
-  void configure(const ITensor *input, ITensor *output, int32_t block_shape);
-  /** Static function to check if given info will lead to a valid configuration of @ref
-   * NESpaceToDepthLayerKernelEx
-   *
-   * @param[in] input       Tensor input info. Supported tensor rank: 4. Data types supported:
-   * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
-   * @param[in] output      Tensor output info. Data types supported: same as @p input
-   * @param[in] block_shape Block shape value
-   *
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape);
-
-  // Inherited methods overridden:
-  void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-  const ITensor *_input; /**< Source tensor */
-  ITensor *_output;      /**< Destination tensor */
-  int32_t _block_shape;  /**< Block shape */
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NESPACETODEPTHLAYERKERNELEX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h

index 97bc4ce..cfbd134 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/CLFunctionsEx.h
@@ -16,25 +16,14 @@
  #ifndef __ARM_COMPUTE_CLFUNCTIONSEX_H__
  #define __ARM_COMPUTE_CLFUNCTIONSEX_H__
  
-#include <arm_compute/runtime/CL/functions/CLArgOperation.h>
-#include <arm_compute/runtime/CL/functions/CLBatchToSpaceND.h>
  #include <arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h>
-#include <arm_compute/runtime/CL/functions/CLCast.h>
-#include <arm_compute/runtime/CL/functions/CLDepthToSpace.h>
  #include <arm_compute/runtime/CL/functions/CLEmbeddingLookup.h>
  #include <arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h>
  #include <arm_compute/runtime/CL/functions/CLGatherEx.h>
  #include <arm_compute/runtime/CL/functions/CLHashtableLookup.h>
  #include <arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h>
-#include <arm_compute/runtime/CL/functions/CLLogicalNot.h>
  #include <arm_compute/runtime/CL/functions/CLNeg.h>
-#include <arm_compute/runtime/CL/functions/CLPixelWiseDivision.h>
-#include <arm_compute/runtime/CL/functions/CLPReLU.h>
  #include <arm_compute/runtime/CL/functions/CLReduceOperation.h>
-#include <arm_compute/runtime/CL/functions/CLRNNLayerEx.h>
-#include <arm_compute/runtime/CL/functions/CLSpaceToDepth.h>
-#include <arm_compute/runtime/CL/functions/CLSplit.h>
-#include <arm_compute/runtime/CL/functions/CLStridedSliceEx.h>
  #include <arm_compute/runtime/CL/functions/CLTopKV2.h>
  #include <arm_compute/runtime/CL/functions/CLTransposeConvLayer.h>
  
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgOperation.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgOperation.h

deleted file mode 100644 (file)

index c37096f..0000000
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLArgOperation.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/**
- * @file CLArgOperation.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains arm_compute::CLArgOperation class
- */
-
-#ifndef __ARM_COMPUTE_CLARGOPERATION_H__
-#define __ARM_COMPUTE_CLARGOPERATION_H__
-
-#include "arm_compute/core/CL/kernels/CLArgOperationKernel.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/TypesEx.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to execute CLArgOperation operation
- */
-class CLArgOperation : public IFunction
-{
-public:
-  /**
-   * @brief Construct a new CLArgOperation object
-   */
-  CLArgOperation();
-
-  /**
-   * @brief Prevent instances of this class from being copied (As this class contains pointers)
-   */
-  CLArgOperation(const CLArgOperation &) = delete;
-
-  /**
-   * @brief Prevent instances of this class from being copied (As this class contains pointers)
-   */
-  CLArgOperation &operator=(const CLArgOperation &) = delete;
-
-  /**
-   * @brief Construct a new CLArgOperation object by using copy constructor
-   * @param[in] CLArgOperation object to move
-   */
-  CLArgOperation(CLArgOperation &&) = default;
-
-  /**
-   * @brief Assign a CLArgOperation object.
-   * @param[in] CLArgOperation object to assign. This object will be moved.
-   */
-  CLArgOperation &operator=(CLArgOperation &&) = default;
-
-  /**
-   * @brief Initialise the kernel's inputs and outputs.
-   * @param[in]  input     Input tensor. Data types supported: U8/QASYMM8/S32/F32.
-   * @param[out] output    The result of arg operation. Data types supported: S32.
-   * @param[in]  axis      Axis along which to reduce. It must be sorted and no duplicates.
-   * @param[in]  op        Arg operation to perform.
-   * @return N/A
-   */
-  void configure(ICLTensor *input, ICLTensor *output, std::vector<uint32_t> axis, ArgOperation op);
-
-  /**
-   * @brief Static function to check if given info will lead to a valid configuration
-   * @param[in]  input     Input tensor. Data types supported: U8/QASYMM8/S32/F32.
-   * @param[in]  axis      Axis along which to reduce. It must be sorted and no duplicates.
-   * @param[out] output    The result of arg operation. Data types supported: S32.
-   * @param[in]  op        Arg operation to perform.
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *input, const std::vector<uint32_t> &axis,
-                         const ITensorInfo *output, ArgOperation op);
-  /**
-   * @brief Run the OpenCL kernel for this operation
-   * @return N/A
-   */
-  void run() override;
-
-private:
-  ICLTensor *_input{nullptr};
-  ICLTensor *_output{nullptr};
-  std::vector<uint32_t> _axis{};
-  ArgOperation _arg_op{ArgOperation::MAX};
-
-  std::unique_ptr<CLTensor[]> _interm_tensors{nullptr};
-  std::unique_ptr<CLArgOperationKernel[]> _argop_kernels{nullptr};
-  size_t _num_of_kernels{0};
-};
-}
-#endif /*__ARM_COMPUTE_CLARGOPERATION_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h

deleted file mode 100644 (file)

index eed5cb8..0000000
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLBatchToSpaceND.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_H__
-#define __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to run @ref CLBatchToSpaceNDKernel
- *
- * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32.
- * @note The function converts the input tensor to the tensor of the output tensor's type.
- */
-class CLBatchToSpaceND : public ICLSimpleFunction
-{
-public:
-  /** Initialise the kernel's input and output.
-   *
-   * @param[in]  input              Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
-   * @param[out] output             Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
-   * @param[in]  block_size         A pointer to an array of integer values specifying block sizes
-   *                                for spatial dimension.
-   */
-  void configure(ICLTensor *input, ICLTensor *output, const int32_t *block_size);
-};
-
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h

deleted file mode 100644 (file)

index ebe0d8a..0000000
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLCast.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/**
- * @file CLCast.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains arm_compute::CLCast class
- */
-
-#ifndef __ARM_COMPUTE_CLCAST_H__
-#define __ARM_COMPUTE_CLCAST_H__
-
-#include "arm_compute/core/TypesEx.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to run @ref CLCastKernel.
- * This converts the input tensor to the tensor of the output tensor's type.
- */
-class CLCast : public ICLSimpleFunction
-{
-public:
-  /**
-   * @brief Initialise the kernel's input and output
-   * @param[in, out] input    Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
-   *                          The input tensor is [in, out] because its TensorInfo might be
-   *                          modified inside the kernel.
-   * @param[out]     output   Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
-   * @param[in]      input_subtype  Sub data type of input.
-   */
-  void configure(ICLTensor *input, ICLTensor *output, SubDataType input_subtype);
-};
-}
-#endif /* __ARM_COMPUTE_CLCAST_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h

deleted file mode 100644 (file)

index d52a538..0000000
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDepthToSpace.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLDEPTHTOSPACE_H__
-#define __ARM_COMPUTE_CLDEPTHTOSPACE_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to run @ref CLDepthToSpaceKernel
- *
- * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32.
- * @note The function converts the input tensor to the tensor of the output tensor's type.
- */
-class CLDepthToSpace : public ICLSimpleFunction
-{
-public:
-  /** Initialise the kernel's input and output.
-   *
-   * @param[in]  input              Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
-   * @param[out] output             Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
-   * @param[block_size] block size  integer only
-   */
-  void configure(ICLTensor *input, ICLTensor *output, const int32_t block_size);
-};
-} // namesace arm_compute
-
-#endif /* __ARM_COMPUTE_CLDEPTHTOSPACE_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h

new file mode 100644 (file)

index 0000000..409eaf5
--- /dev/null
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLDIRECTTRANSPOSECONVLAYER_H__
+#define __ARM_COMPUTE_CLDIRECTTRANSPOSECONVLAYER_H__
+
+#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
+#include "arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h"
+#include "arm_compute/runtime/CL/functions/CLReverse.h"
+#include "arm_compute/runtime/CL/functions/CLTranspose.h"
+
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+class ICLTensor;
+/** Function to run the deconvolution layer.
+ *
+ * Deconvolution Layer is the backward pass of Convolution Layer. First we transform the input
+ * depending on the stride and pad info and then perform a 1x1
+ * convolution pass. Input stride defines how many zeroes we should put between each element of the
+ * input and pad is the amount of padding.
+ *
+ *  The relation between input to output is as follows:
+ *  \f[
+ *       width\_output = (width\_input - 1) \cdot stride\_x - 2 \cdot padding\_x + kernel\_x
+ *  \f]
+ *  \f[
+ *       height\_output = (height\_input - 1) \cdot stride\_y - 2 \cdot padding\_y + kernel\_y
+ *  \f]
+ *
+ *  where:
+ *      width_input is the size of the first input dimension.
+ *      height_input is the size of the second input dimension.
+ *      width_output is the size of the first output dimension.
+ *      height_output is the size of the second output dimension.
+ *      kernel_x and kernel_y are the convolution sizes in x and y.
+ *      stride_x and stride_y is the input stride of the first and second dimension.
+ *
+ * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution.
+ * Therefore, it will be necessary to use the weights in the
+ * reverse order to perform an actual convolution. This is achieved by using @ref CLReverse.
+ *
+ * This function calls the following OpenCL kernels/functions:
+ *
+ * -# @ref CLDeconvolutionLayerUpsample
+ * -# @ref CLConvolutionLayer
+ *
+ * And the following CPP kernels:
+ * -# @ref CLReverse
+ *
+ */
+class CLDirectTransposeConvLayer : public IFunction
+{
+public:
+  /** Constructor */
+  CLDirectTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CLDirectTransposeConvLayer(const CLDirectTransposeConvLayer &) = delete;
+  /** Default move constructor */
+  CLDirectTransposeConvLayer(CLDirectTransposeConvLayer &&) = default;
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CLDirectTransposeConvLayer &operator=(const CLDirectTransposeConvLayer &) = delete;
+  /** Default move assignment operator */
+  CLDirectTransposeConvLayer &operator=(CLDirectTransposeConvLayer &&) = default;
+  /** Set the input, weights, biases and output tensors.
+   *
+   * @param[in,out] input        Input tensor. 3 lower dimensions represent a single input, and an
+ * optional 4th dimension for batch of inputs.
+   *                             Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+   * @param[in]     weights      The 4d weights with dimensions [width, height, IFM, OFM]. Data type
+ * supported: Same as @p input.
+   * @param[in]     bias         (Optional) The biases have one dimension.
+   *                             Data type supported: Should match @p input data type, except for
+ * input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
+   * @param[out]    output       Output tensor. The output has the same number of dimensions as the
+ * @p input.
+   * @param[in]     info         Contains padding and policies to be used in the deconvolution, this
+ * is decribed in @ref PadStrideInfo.
+ * @param[in] invalid_right  The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom  The number of zeros added to bottom edge of the output.
+   * @param[in]     weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
+ * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+   *
+   */
+  void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
+                 const PadStrideInfo &info, unsigned int invalid_right, unsigned int invalid_bottom,
+                 const WeightsInfo &weights_info = WeightsInfo());
+  /** Set the input, weights, biases and output tensors.
+   *
+   * @param[in]     compile_context The compile context to be used.
+   * @param[in,out] input           Input tensor. 3 lower dimensions represent a single input, and
+ * an optional 4th dimension for batch of inputs.
+   *                                Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+   * @param[in]     weights         The 4d weights with dimensions [width, height, IFM, OFM]. Data
+ * type supported: Same as @p input.
+   * @param[in]     bias            (Optional) The biases have one dimension.
+   *                                Data type supported: Should match @p input data type, except for
+ * input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
+   * @param[out]    output          Output tensor. The output has the same number of dimensions as
+ * the @p input.
+   * @param[in]     info            Contains padding and policies to be used in the deconvolution,
+ * this is decribed in @ref PadStrideInfo.
+ * @param[in] invalid_right  The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom  The number of zeros added to bottom edge of the output.
+   * @param[in]     weights_info    (Optional) Weights information needed for @ref
+ * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref
+ * CLWeightsReshapeKernel.
+   *
+   */
+  void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights,
+                 const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &info,
+                 unsigned int invalid_right, unsigned int invalid_bottom,
+                 const WeightsInfo &weights_info = WeightsInfo());
+  /** Static function to check if given info will lead to a valid configuration of @ref
+ * CLDirectTransposeConvLayer
+   *
+   * @param[in] input        Input tensor info. 3 lower dimensions represent a single input, and an
+ * optional 4th dimension for batch of inputs.
+   *                         Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+   * @param[in] weights      The 4d weights info with dimensions [width, height, IFM, OFM]. Data
+ * type supported: Same as @p input.
+   * @param[in] bias         (Optional) The biases have one dimension.
+   *                         Data type supported: Should match @p input data type, except for input
+ * of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
+   * @param[in] output       Output tensor info. The output has the same number of dimensions as the
+ * @p input.
+   * @param[in] info         Contains padding and policies to be used in the deconvolution, this is
+ * decribed in @ref PadStrideInfo.
+ * @param[in] invalid_right  The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom  The number of zeros added to bottom edge of the output.
+   * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
+ * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+   *
+   * @return a status
+   */
+  static Status validate(const ITensorInfo *input, const ITensorInfo *weights,
+                         const ITensorInfo *bias, ITensorInfo *output, const PadStrideInfo &info,
+                         unsigned int invalid_right, unsigned int invalid_bottom,
+                         const WeightsInfo &weights_info = WeightsInfo());
+
+  // Inherited methods overridden:
+  void run() override;
+  void prepare() override;
+
+private:
+  MemoryGroup _memory_group;
+  CLDeconvolutionLayerUpsample _scale_f;
+  CLConvolutionLayer _conv_f;
+  CLReverse _flip_weights;
+
+  CLTensor _scaled_output;
+  ICLTensor *_original_weights;
+  CLTensor _weights_flipped;
+  CLTensor _flip_axis;
+
+  bool _is_prepared;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLDIRECTTRANSPOSECONVLAYER_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h

index 1a0284a..f3266f6 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h
@@ -50,7 +50,7 @@
  #include "arm_compute/core/CL/kernels/CLTransposeKernel.h"
  #include "arm_compute/runtime/MemoryGroup.h"
  #include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.h"
+#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
  
  namespace arm_compute
  {
@@ -168,7 +168,7 @@ private:
    CLFullyConnectedHybridLayerReshapeWeights _reshape_weights_kernel;
    CLScaleFactorSymm8Kernel _scale_factor_kernel;
    CLQuantizationSymmetricKernel _quant_input_kernel;
-  CLGEMMLowpMatrixMultiplyCoreEx _mm_gemmlowp;
+  CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
    CLMultiplyScaleFactorKernel _multiply_scale_kernel;
    CLGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel; // TODO(COMPMID-1889): Use CLGEMM to
                                                                  // add bias in
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.h

deleted file mode 100644 (file)

index 68aba74..0000000
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCOREEX_H__
-#define __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCOREEX_H__
-
-#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-
-namespace arm_compute
-{
-class IMemoryManager;
-class ICLTensor;
-
-/** Basic function to execute GEMMLowpMatrixMultiplyCore on OpenCL. This function calls the
- * following OpenCL kernels:
- *
- *  -# @ref CLGEMMLowpMatrixMultiplyKernel (if the parameter "reshape_b_only_on_first_run" of
- * GEMMInfo is FALSE)
- *  -# @ref CLGEMMLowpMatrixAReductionKernel (if the offset of matrix B is not 0)
- *  -# @ref CLGEMMLowpMatrixBReductionKernel (if the offset of matrix A is not 0)
- *
-*/
-class CLGEMMLowpMatrixMultiplyCoreEx : public IFunction
-{
-public:
-  /** Constructor */
-  CLGEMMLowpMatrixMultiplyCoreEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  CLGEMMLowpMatrixMultiplyCoreEx(const CLGEMMLowpMatrixMultiplyCoreEx &) = delete;
-  /** Default move constructor */
-  CLGEMMLowpMatrixMultiplyCoreEx(CLGEMMLowpMatrixMultiplyCoreEx &&) = default;
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  CLGEMMLowpMatrixMultiplyCoreEx &operator=(const CLGEMMLowpMatrixMultiplyCoreEx &) = delete;
-  /** Default move assignment operator */
-  CLGEMMLowpMatrixMultiplyCoreEx &operator=(CLGEMMLowpMatrixMultiplyCoreEx &&) = default;
-  /** Initialise the kernel's inputs, output
-   *
-   * @note GEMMLowp:  low precision GEMM kernel. [A * B + C]
-   *  This kernel performs the following computations:
-   *
-   *  -# Convert a values from QASYMM8 to int32 and add a_offset to each of them.
-   *  -# Convert b values from QASYMM8 to int32 and add b_offset to each of them.
-   *  -# Compute the matrix product of the resulting a * b in int32.
-   *  -# Quantize to uint8 if gemm_info.gemmlowp_output_stage != NONE
-   *
-   * @param[in]  a         First input tensor  (Matrix A). Data type supported: QASYMM8.
-   * @param[in]  b         Second input tensor (Matrix B). Data type supported: same as @p a
-   * @param[in]  c         Third input tensor  (Matrix C). It can be a nullptr. Data type supported:
-   * S32
-   * @param[out] output    Output tensor. Data type supported: S32 or QASYMM8 if
-   * gemm_info.gemmlowp_output_stage != NONE
-   * @param[in]  gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped
-   * and
-   *                       if the reshape of matrix B should be executed only for the first run
-   */
-  void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output,
-                 const GEMMInfo &gemm_info = GEMMInfo());
-  /** Static function to check if given info will lead to a valid configuration of @ref
-   * CLGEMMLowpMatrixMultiplyCoreEx
-   *
-   * @param[in] a         First input tensor info (Matrix A). Data type supported: QASYMM8.
-   * @param[in] b         Second input tensor info (Matrix B). Data type supported: same as @p a
-   * @param[in] c         Third input tensor info (Matrix C). It can be a nullptr. Data type
-   * supported: S32
-   * @param[in] output    Output tensor info. Data type supported: S32 or QASYMM8 if
-   * gemm_info.gemmlowp_output_stage != NONE
-   * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped
-   * and
-   *                      if the reshape of matrix B should be executed only for the first run
-   *
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c,
-                         const ITensorInfo *output, const GEMMInfo &gemm_info = GEMMInfo());
-
-  // Inherited methods overridden:
-  void run() override;
-  void prepare() override;
-
-private:
-  MemoryGroup _memory_group;
-
-  // Kernels used
-  CLGEMMLowpMatrixMultiplyKernelEx _mm_midgard_kernel;
-  CLGEMMLowpMatrixAReductionKernel _mtx_a_reduction_kernel;
-  CLGEMMLowpMatrixBReductionKernel _mtx_b_reduction_kernel;
-
-  // Temporary tensors
-  CLTensor _vector_sum_col;
-  CLTensor _vector_sum_row;
-
-  int32_t _a_offset;
-  int32_t _b_offset;
-  bool _reshape_b_only_on_first_run;
-  bool _is_prepared;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCOREEX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLLogicalNot.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLLogicalNot.h

deleted file mode 100644 (file)

index 5121671..0000000
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLLogicalNot.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLLOGICALNOT_H__
-#define __ARM_COMPUTE_CLLOGICALNOT_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-class CLLogicalNot : public ICLSimpleFunction
-{
-public:
-  /** Initialise the function's source and destination.
-   *
-   * @param[in]  input  Source tensor. Data types supported: QASYMM8.
-   * @param[out] output Output tensor. Data types supported: QASYMM8.
-   */
-  void configure(ICLTensor *input, ICLTensor *output);
-};
-
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLLOGICALNOT_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h

deleted file mode 100644 (file)

index 7fbe558..0000000
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPReLU.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLPRELU_H__
-#define __ARM_COMPUTE_CLPRELU_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-class CLPReLU : public ICLSimpleFunction
-{
-public:
-  /** Initialise the function's source and destination.
-   *
-   * @param[in]  input. Data types supported:
-   * QASYMM8/F16/F32.
-   * @param[in]  alpha. Data types supported:
-   * QASYMM8/F16/F32.
-   * @param[out] output Output tensor. Data types supported: Same as @p input.
-   */
-  void configure(ICLTensor *input, ICLTensor *alpha, ICLTensor *output);
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLPRELU_H__*/
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h

deleted file mode 100644 (file)

index e83fb01..0000000
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLPixelWiseDivision.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/**
- * @file CLPixelWiseDivision.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains arm_compute::CLPixelWiseDivision class
- */
-#ifndef __ARM_COMPUTE_CLPIXELWISEDIVISION_H__
-#define __ARM_COMPUTE_CLPIXELWISEDIVISION_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to run @ref CLPixelWiseDivisionKernel.
- */
-class CLPixelWiseDivision : public ICLSimpleFunction
-{
-public:
-  /**
-   * @brief Initialise the kernel's inputs, output and convertion policy.
-   * @param[in, out] input1          An input tensor. Data types supported: U8/S16/F16/F32
-   *                                 The input tensor is [in, out] because its TensorInfo might be
-   * modified inside the kernel in case of broadcasting of dimension 0.
-   * @param[in, out] input2          An input tensor. Data types supported: same as @p input1.
-   *                                 The input tensor is [in, out] because its TensorInfo might be
-   * modified inside the kernel in case of broadcasting of dimension 0.
-   * @param[out]     output          The output tensor, Data types supported: same as @p input1.
-   * Note: U8 requires both inputs to be U8.
-   * @param[in]      scale           Scale to apply after multiplication.
-   *                                 Scale must be positive and its value must be either 1/255 or
-   * 1/2^n where n is between 0 and 15.
-   * @param[in]      overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
-   * @param[in]      rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest
-   * even.
-   * @return N/A
-   */
-  void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, float scale = 1.f,
-                 ConvertPolicy overflow_policy = ConvertPolicy::WRAP,
-                 RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO);
-
-  /**
-   * @brief Static function to check if given info will lead to a valid configuration of @ref
-   * CLPixelWiseDivision
-   * @param[in] input1          An input tensor info. Data types supported: U8/S16/F16/F32
-   * @param[in] input2          An input tensor info. Data types supported: same as @p input1.
-   * @param[in] output          The output tensor info, Data types supported: same as @p input1.
-   * Note: U8 requires both inputs to be U8.
-   * @param[in] scale           Scale to apply after multiplication.
-   *                            Scale must be positive and its value must be either 1/255 or 1/2^n
-   * where n is between 0 and 15.
-   * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
-   * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even.
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *input1, const ITensorInfo *input2,
-                         const ITensorInfo *output, float scale = 1.f,
-                         ConvertPolicy overflow_policy = ConvertPolicy::WRAP,
-                         RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO);
-};
-}
-#endif /*__ARM_COMPUTE_CLPIXELWISEDIVISION_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLRNNLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLRNNLayerEx.h

deleted file mode 100644 (file)

index b49cbd8..0000000
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLRNNLayerEx.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLRNN_LAYER_EX_H__
-#define __ARM_COMPUTE_CLRNN_LAYER_EX_H__
-
-#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLCopyKernel.h"
-#include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h"
-#include "arm_compute/runtime/CL/functions/CLGEMM.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to run @ref CLRNNLayerEx */
-class CLRNNLayerEx : public IFunction
-{
-public:
-  /** Default constructor */
-  CLRNNLayerEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
-  /** Initialize the function
-   *
-   * @param[in]     input             Input is a 2-D tensor of shape [input_size, batch_size]. Data
-   * types supported: F16/F32
-   * @param[in]     weights           Weights tensor of shape [input_size, num_units] that
-   * multiplies the input. Data types supported: Same as @p input
-   * @param[in]     recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies
-   * the current 'state'. Data types supported: Same as @p input
-   * @param[in]     bias              Bias vector of shape [num_units]. Data types supported: Same
-   * as @p input
-   * @param[out]    output            Output tensor of shape [num_units, batch_size]. Data types
-   * supported: Same as @p input
-   * @param[in,out] hidden_state      Output tensor of shape [num_units, batch_size]. Data types
-   * supported: Same as @p input
-   * @param[in]     info              Activation layer parameter.
-   */
-  void configure(const ICLTensor *input, const ICLTensor *weights,
-                 const ICLTensor *recurrent_weights, const ICLTensor *bias, ICLTensor *hidden_state,
-                 ICLTensor *output, ActivationLayerInfo &info);
-  /** Initialize the function
-   *
-   * @param[in] input             Input is a 2-D tensor of shape [input_size, batch_size]. Data
-   * types supported: F16/F32
-   * @param[in] weights           Weights tensor of shape [input_size, num_units] that multiplies
-   * the input. Data types supported: Same as @p input
-   * @param[in] recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies the
-   * current 'state'. Data types supported: Same as @p input
-   * @param[in] bias              Bias vector of shape [num_units]. Data types supported: Same as @p
-   * input
-   * @param[in] output            Output tensor of shape [num_units, batch_size]. Data types
-   * supported: Same as @p input
-   * @param[in] hidden_state      Output tensor of shape [num_units, batch_size]. Data types
-   * supported: Same as @p input
-   * @param[in] info              Activation layer parameter.
-   *
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *input, const ITensorInfo *weights,
-                         const ITensorInfo *recurrent_weights, const ITensorInfo *bias,
-                         const ITensorInfo *hidden_state, const ITensorInfo *output,
-                         const ActivationLayerInfo &info);
-
-  // Inherited methods overridden:
-  void run() override;
-  void prepare() override;
-
-private:
-  MemoryGroup _memory_group;
-  CLGEMM _gemm_state_f;
-  CLSaturatedArithmeticOperationKernel _add_kernel;
-  CLActivationLayerKernel _activation_kernel;
-  CLFullyConnectedLayer _fully_connected_kernel;
-  CLCopyKernel _copy_kernel;
-  CLTensor _fully_connected_out;
-  CLTensor _gemm_output;
-  CLTensor _add_output;
-  bool _is_prepared;
-};
-}
-#endif /* __ARM_COMPUTE_CLRNN_LAYER_EX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h

deleted file mode 100644 (file)

index 2090b46..0000000
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLSpaceToDepth.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLSPACETODEPTH_H__
-#define __ARM_COMPUTE_CLSPACETODEPTH_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to run @ref CLSpaceToDepthKernel
- *
- * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32.
- * @note The function converts the input tensor to the tensor of the output tensor's type.
- */
-class CLSpaceToDepth : public ICLSimpleFunction
-{
-public:
-  /** Initialise the kernel's input and output.
-   *
-   * @param[in]  input              Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
-   * @param[out] output             Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
-   * @param[block_size] block size  integer only
-   */
-  void configure(ICLTensor *input, ICLTensor *output, const int32_t block_size);
-};
-
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_CLSPACETODEPTH_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h

deleted file mode 100644 (file)

index 03edd15..0000000
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLStridedSliceEx.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/**
- * @file CLStridedSlice.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains arm_compute::CLStridedSlice and arm_compute::CLStridedSliceCPU class
- */
-
-#ifndef __ARM_COMPUTE_CLSTRIDEDSLICEEX_H__
-#define __ARM_COMPUTE_CLSTRIDEDSLICEEX_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to run @ref CLStridedSliceKernel
- */
-class CLStridedSliceEx : public ICLSimpleFunction
-{
-public:
-  /**
-   * @brief Initialise the kernel's inputs and outputs
-   * @param[in]  input   Tensor input. Data type supported:
-   *                     U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
-   * @param[out] output  Output tensor. Data type supported: Same as @p input
-   * @param[in]  beginData 'begin' vector of strided slice operation
-   * @param[in]  endData   'end' vector of strided slice operation
-   * @param[in]  stridesData 'strides' vector of strided slice operation
-   * @param[in]  beginMask  If the ith bit is set, begin[i] is ignored
-   * @param[in]  endMask    If the ith bit is set, end[i] is ignored
-   * @param[in]  shrinkAxisMask  If the ith bit is set, the ith specification shrinks the
-   *                             dimensionality by 1, taking on the value at index begin[i]
-   * @return N/A
-   */
-  void configure(const ICLTensor *input, ICLTensor *output, ICLTensor *beginData,
-                 ICLTensor *endData, ICLTensor *stridesData, int32_t beginMask, int32_t endMask,
-                 int32_t shrinkAxisMask);
-};
-}
-#endif /*__ARM_COMPUTE_CLSTRIDEDSLICEEX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h

index 54a697e..5fb102e 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h
@@ -15,7 +15,7 @@
   */
  
  /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -37,16 +37,11 @@
   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   * SOFTWARE.
   */
-
  #ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__
  #define __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__
  
-#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
-#include "arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h"
-
-#include "arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h"
-
-#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h"
+#include "arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h"
  #include "arm_compute/runtime/IFunction.h"
  #include "arm_compute/runtime/IMemoryManager.h"
  
@@ -54,119 +49,102 @@
  
  namespace arm_compute
  {
-class ICLTensor;
-/** Function to run the transpose convolution layer.
- *
- * @note This layer was copied in order to fix a bug computing to wrong output dimensions.
- *
- * TransposeConv Layer is the backward pass of Convolution Layer. First we transform the input
- * depending on the stride and pad info and then perform a 1x1
- * convolution pass. Input stride defines how many zeroes we should put between each element of the
- * input, pad is the amount of padding and finally a is a user
- * specified value where a < stride - 1, that increases the padding top and right of the input
- * image.
- *
- *  The relation between input to output is as follows:
- *  \f[
- *       width\_output = (width\_input - 1) \cdot stride\_x - \cdot padding\_x + kernel\_x
- *  \f]
- *  \f[
- *       height\_output = (height\_input - 1) \cdot stride\_y - \cdot padding\_y + kernel\_y
- *  \f]
- *
- *  where:
- *      width_input is the size of the first input dimension.
- *      height_input is the size of the second input dimension.
- *      width_output is the size of the first output dimension.
- *      height_output is the size of the second output dimension.
- *      kernel_x and kernel_y are the convolution sizes in x and y.
- *      stride_x and stride_y is the input stride of the first and second dimension.
- *
- * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution.
- * Therefore, it will be necessary to use the weights in the
- * reverse order to perform an actual convolution. This is achieved by using the @ref
- * CPPFlipWeightsKernel.
- *
- * This function calls the following OpenCL kernels/functions:
- *
- * -# @ref CLTransposeConvLayerUpsample
- * -# @ref CLConvolutionLayer
+/** Basic function to compute the deconvolution layer. This function calls the following OpenCL
+ * kernels/functions:
   *
+ * -# @ref CLGEMMDeconvolutionLayer
+ * -# @ref CLDirectTransposeConvLayer
   */
  class CLTransposeConvLayer : public IFunction
  {
  public:
-  /** Constructor */
+  /** Default constructor */
    CLTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  CLTransposeConvLayer(const CLTransposeConvLayer &) = delete;
-  /** Default move constructor */
-  CLTransposeConvLayer(CLTransposeConvLayer &&) = default;
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  CLTransposeConvLayer &operator=(const CLTransposeConvLayer &) = delete;
-  /** Default move assignment operator */
-  CLTransposeConvLayer &operator=(CLTransposeConvLayer &&) = default;
+
    /** Set the input, weights, biases and output tensors.
     *
-   * @param[in,out] input          Input tensor. 3 lower dimensions represent a single input,
-   *                               and an optional 4th dimension for batch of inputs.
-   *                               Data types supported: QASYMM8/F16/F32.
-   * @param[in]     weights        The 4d weights with dimensions [width, height, IFM, OFM].
-   *                               Data type supported: Same as @p input.
-   * @param[in]     bias           (Optional) The biases have one dimension. Data type supported:
-   *                               Same as @p input.
-   * @param[out]    output         Output tensor. The output has the same number of dimensions
-   *                               as the @p input.
-   * @param[in]     info           Contains padding and policies to be used in the
-   *                               transpose convolution, this is decribed in @ref PadStrideInfo.
-   * @param[in]     invalid_right  The number of zeros added to right edge of the output.
-   * @param[in]     invalid_bottom The number of zeros added to top edge of the output.
-   * @param[in]     weights_info   (Optional) Weights information needed for @ref
-   *                               CLConvolutionLayer, specifies if the weights tensor has been
-   *                               reshaped with @ref CLWeightsReshapeKernel.
+   * @param[in,out] input        Input tensor. 3 lower dimensions represent a single input, and an
+ * optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+   * @param[in]     weights      The 4d weights with dimensions [width, height, IFM, OFM]. Data type
+ * supported: Same as @p input.
+   * @param[in]     bias         (Optional) The biases have one dimension. Data type supported: Same
+ * as @p input.
+   * @param[out]    output       Output tensor. The output has the same number of dimensions as the
+ * @p input.
+   * @param[in]     deconv_info  Contains padding and policies to be used in the deconvolution, this
+ * is described in @ref PadStrideInfo.
+ * @param[in] invalid_right  The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom  The number of zeros added to bottom edge of the output.
+   * @param[in]     weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
+ * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+   *
     */
    void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
-                 const PadStrideInfo &info, unsigned int invalid_right, unsigned int invalid_bottom,
+                 const PadStrideInfo &deconv_info, unsigned int invalid_right,
+                 unsigned int invalid_bottom, const WeightsInfo &weights_info = WeightsInfo());
+  /** Set the input, weights, biases and output tensors.
+   *
+   * @param[in]     compile_context The compile context to be used.
+   * @param[in,out] input           Input tensor. 3 lower dimensions represent a single input, and
+ * an optional 4th dimension for batch of inputs. Data types supported:
+ * QASYMM8_SIGNED/QASYMM8/F16/F32.
+   * @param[in]     weights         The 4d weights with dimensions [width, height, IFM, OFM]. Data
+ * type supported: Same as @p input.
+   * @param[in]     bias            (Optional) The biases have one dimension. Data type supported:
+ * Same as @p input.
+   * @param[out]    output          Output tensor. The output has the same number of dimensions as
+ * the @p input.
+   * @param[in]     deconv_info     Contains padding and policies to be used in the deconvolution,
+ * this is described in @ref PadStrideInfo.
+ * @param[in] invalid_right  The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom  The number of zeros added to bottom edge of the output.
+   * @param[in]     weights_info    (Optional) Weights information needed for @ref
+ * CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref
+ * CLWeightsReshapeKernel.
+   *
+   */
+  void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights,
+                 const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info,
+                 unsigned int invalid_right, unsigned int invalid_bottom,
                   const WeightsInfo &weights_info = WeightsInfo());
    /** Static function to check if given info will lead to a valid configuration of @ref
-   * CLTransposeConvLayer
+ * CLTransposeConvLayer
+   *
+   * @param[in] input        Input tensor info. 3 lower dimensions represent a single input, and an
+ * optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+   * @param[in] weights      The 4d weights info with dimensions [width, height, IFM, OFM]. Data
+ * type supported: Same as @p input.
+   * @param[in] bias         (Optional) The biases have one dimension. Data type supported: Same as
+ * @p input.
+   * @param[in] output       Output tensor info. The output has the same number of dimensions as the
+ * @p input.
+   * @param[in] deconv_info  Contains padding and policies to be used in the deconvolution, this is
+ * described in @ref PadStrideInfo.
+ * @param[in] invalid_right  The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom  The number of zeros added to bottom edge of the output.
+   * @param[in] weights_info (Optional) Weights information needed for @ref CLConvolutionLayer,
+ * specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
     *
-   * @param[in] input           Input tensor info. 3 lower dimensions represent a single input,
-   *                            and an optional 4th dimension for batch of inputs.
-   *                            Data types supported: QASYMM8/F16/F32.
-   * @param[in] weights         The 4d weights info with dimensions [width, height, IFM, OFM].
-   *                            Data type supported: Same as @p input.
-   * @param[in] bias            (Optional) The biases have one dimension. Data type supported:
-   *                            Same as @p input.
-   * @param[in] output          Output tensor info. The output has the same number of dimensions
-   *                            as the @p input.
-   * @param[in] info            Contains padding and policies to be used in the
-   *                            transpose convolution, this is decribed in @ref PadStrideInfo.
-   * @param[in] innvalid_right  The number of zeros added to right edge of the output.
-   * @param[in] invalid_bottom  The number of zeros added to top edge of the output.
-   * @param[in] weights_info    (Optional) Weights information needed for @ref CLConvolutionLayer,
-   *                            specifies if the weights tensor has been reshaped with @ref
-   *                            CLWeightsReshapeKernel.
     * @return a status
     */
    static Status validate(const ITensorInfo *input, const ITensorInfo *weights,
-                         const ITensorInfo *bias, ITensorInfo *output, const PadStrideInfo &info,
-                         unsigned int innvalid_right, unsigned int invalid_bottom,
+                         const ITensorInfo *bias, ITensorInfo *output,
+                         const PadStrideInfo &deconv_info, unsigned int invalid_right,
+                         unsigned int invalid_bottom,
                           const WeightsInfo &weights_info = WeightsInfo());
  
+  static DeconvolutionMethod
+  get_deconvolution_method(const ITensorInfo *input, const ITensorInfo *weights,
+                           const ITensorInfo *bias, ITensorInfo *output,
+                           const PadStrideInfo &deconv_info, unsigned int invalid_right,
+                           unsigned int invalid_bottom, const WeightsInfo &weights_info);
    // Inherited methods overridden:
    void run() override;
    void prepare() override;
  
  private:
-  MemoryGroup _memory_group;
-  CLTransposeConvLayerUpsample _scale_f;
-  CLConvolutionLayer _conv_f;
-  CPPFlipWeightsKernel _flip_weights;
-  CLTensor _scaled_output;
-  ICLTensor *_original_weights;
-  CLTensor _weights_flipped;
-  bool _is_prepared;
+  std::shared_ptr<IMemoryManager> _memory_manager;
+  std::unique_ptr<IFunction> _function;
  };
-}
+} // namespace arm_compute
  #endif /* __ARM_COMPUTE_CLTRANSPOSECONVLAYER_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h

deleted file mode 100644 (file)

index 7570fe7..0000000
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLE_H__
-#define __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLE_H__
-
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to run @ref CLTransposeConvLayerUpsampleKernel */
-class CLTransposeConvLayerUpsample : public IFunction
-{
-public:
-  /** Default constructor */
-  CLTransposeConvLayerUpsample();
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  CLTransposeConvLayerUpsample(const CLTransposeConvLayerUpsample &) = delete;
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  CLTransposeConvLayerUpsample &operator=(const CLTransposeConvLayerUpsample &) = delete;
-  /** Allow instances of this class to be moved */
-  CLTransposeConvLayerUpsample(CLTransposeConvLayerUpsample &&) = default;
-  /** Allow instances of this class to be moved */
-  CLTransposeConvLayerUpsample &operator=(CLTransposeConvLayerUpsample &&) = default;
-  /** Default destructor */
-  virtual ~CLTransposeConvLayerUpsample() = default;
-
-  /** Initialize the function's source, destination, interpolation type and border_mode.
-   *
-   * @param[in, out] input        Source tensor. Data type supported: QASYMM8/F16/F32.
-   * @param[out]     output       Destination tensor. Data type supported: same as @p input.
-   * @param[in]      inner_border The number of zeros added to right and top edges of the input.
-   * @param[in]      info         Contains padding and policies to be used in the deconvolution.
-   */
-  void configure(ICLTensor *input, ICLTensor *output, const BorderSize &inner_border,
-                 const PadStrideInfo &info);
-  /** Static function to check if given info will lead to a valid configuration of @ref
-   * CLTransposeConvLayerUpsample
-   *
-   * @param[in] input        Source tensor info. Data type supported: QASYMM8/F16/F32.
-   * @param[in] output       Destination tensor info. Data type supported: same as @p input.
-   * @param[in] inner_border The number of zeros added to right and top edges of the input.
-   * @param[in] info         Contains padding and policies to be used in the deconvolution.
-   *
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *input, const ITensorInfo *output,
-                         const BorderSize &inner_border, const PadStrideInfo &info);
-
-  // Inherited methods overridden:
-  void run() override;
-
-private:
-  CLTransposeConvLayerUpsampleKernel _upsample;
-  ICLTensor *_output;
-};
-}
-#endif /* __ARM_COMPUTE_CLTRANSPOSECONVLAYERUPSAMPLE_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPUpsampleEx.h b/compute/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPUpsampleEx.h

deleted file mode 100644 (file)

index 666afef..0000000
--- a/compute/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPUpsampleEx.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CPPUPSAMPLE_EX_H__
-#define __ARM_COMPUTE_CPPUPSAMPLE_EX_H__
-
-#include "arm_compute/runtime/CPP/ICPPSimpleFunction.h"
-
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to run @ref CPPUpsample */
-class CPPUpsampleEx : public ICPPSimpleFunction
-{
-public:
-  /** Configure the upsample CPP kernel
-   *
-   * @param[in]  input  The input tensor to upsample. Data types supported: F32/F16/QASYMM8
-   * @param[out] output The output tensor. Data types supported: Same as @p input
-   * @param[in]  info   Padding information
-   */
-  void configure(const ITensor *input, ITensor *output, const PadStrideInfo &info);
-};
-}
-#endif /* __ARM_COMPUTE_CPPUPSAMPLE_EX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h

index 49504fd..3fad230 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h
@@ -18,20 +18,13 @@
  
  #include <arm_compute/runtime/NEON/functions/NEActivationLayerEx.h>
  #include <arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h>
-#include <arm_compute/runtime/NEON/functions/NECast.h>
-#include <arm_compute/runtime/NEON/functions/NEDepthToSpaceLayerEx.h>
  #include <arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h>
  #include <arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h>
  #include <arm_compute/runtime/NEON/functions/NEGatherEx.h>
  #include <arm_compute/runtime/NEON/functions/NEHashtableLookup.h>
  #include <arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h>
-#include <arm_compute/runtime/NEON/functions/NEPReLU.h>
-#include <arm_compute/runtime/NEON/functions/NEReduceMeanEx.h>
  #include <arm_compute/runtime/NEON/functions/NEReduceSum.h>
-#include <arm_compute/runtime/NEON/functions/NERNNLayerEx.h>
  #include <arm_compute/runtime/NEON/functions/NEReduceOperation.h>
-#include <arm_compute/runtime/NEON/functions/NESpaceToBatchLayerEx.h>
-#include <arm_compute/runtime/NEON/functions/NESpaceToDepthLayerEx.h>
  #include <arm_compute/runtime/NEON/functions/NETransposeConvLayer.h>
  
  #endif // __ARM_COMPUTE_NEFUNCTIONSEX_H__
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECast.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECast.h

deleted file mode 100644 (file)

index f0f0d81..0000000
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NECast.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NECAST_H__
-#define __ARM_COMPUTE_NECAST_H__
-
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/TypesEx.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Basic function to run @ref NECastKernel that converts an input tensor to the other types */
-class NECast : public INESimpleFunctionNoBorder
-{
-public:
-  /** Configure the kernel.
-   *
-   * @param[in]  input  Source tensor. Data types supported: U8/S8/QASYMM8/U32/S32/F32.
-   * @param[out] output Destination tensor with the same dimensions of input. Data type supported:
-   * U8/S8/QASYMM8/U32/S32/F32.
-   * @param[in]  input_subtype  Sub data type of input.
-   */
-  void configure(const ITensor *input, ITensor *output,
-                 SubDataType input_subtype = SubDataType::NONE);
-  /** Static function to check if given info will lead to a valid configuration of @ref NECast
-   *
-   * @param[in] input  Input tensor info. Data types supported: U8/S8/QASYMM8/U32/S32/F32.
-   * @param[in] output Output tensor info. Data type supported: U8/S8/QASYMM8/U32/S32/F32.
-   * @param[in] input_subtype  Sub data type of input.
-   *
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *input, const ITensorInfo *output,
-                         SubDataType input_subtype = SubDataType::NONE);
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NECAST_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayerEx.h

deleted file mode 100644 (file)

index 005d85a..0000000
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayerEx.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NEDEPTHTOSPACELAYEREX_H__
-#define __ARM_COMPUTE_NEDEPTHTOSPACELAYEREX_H__
-
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to run @ref NEDepthToSpaceLayerKernelEx. */
-class NEDepthToSpaceLayerEx : public INESimpleFunctionNoBorder
-{
-public:
-  /** Set the input and output tensors.
-   *
-   * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported:
-   * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
-   * @param[out] output      Tensor output. Data types supported: same as @p input
-   * @param[in]  block_shape Block shape value.
-   */
-  void configure(const ITensor *input, ITensor *output, int32_t block_shape);
-  /** Static function to check if given info will lead to a valid configuration of @ref
-   * NEDepthToSpaceLayerEx.
-   *
-   * @param[in] input       Tensor input info. Supported tensor rank: 4. Data types supported:
-   * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
-   * @param[in] output      Tensor output info. Data types supported: same as @p input
-   * @param[in] block_shape Block shape x value.
-   *
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape);
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NEDEPTHTOSPACELAYEREX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayerEx.h

deleted file mode 100644 (file)

index 27a38e9..0000000
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayerEx.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NEELEMENTWISEUNARYLAYEREX_H__
-#define __ARM_COMPUTE_NEELEMENTWISEUNARYLAYEREX_H__
-
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to perform negative on an input tensor. */
-class NENegLayer : public INESimpleFunction
-{
-public:
-  /** Initialize the function
-   *
-   * @param[in]  input  Input tensor. Data types supported: F16/F32/S32.
-   * @param[out] output Output tensor. Data types supported: same as @p input.
-   */
-  void configure(const ITensor *input, ITensor *output);
-  /** Static function to check if given info will lead to a valid configuration of @ref NERsqrtLayer
-   *
-   * @param[in] input  First tensor input info. Data types supported: F16/F32/S32.
-   * @param[in] output Output tensor info. Data types supported: Same as @p input.
-   *
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NEELEMENTWISEUNARYLAYEREX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h

index 39c57eb..56548a4 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h
@@ -46,7 +46,7 @@
  #include "arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h"
  #include "arm_compute/core/NEON/kernels/NETransposeKernel.h"
  #include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.h"
+#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
  #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
  #include "arm_compute/runtime/Tensor.h"
  
@@ -164,7 +164,7 @@ private:
    MemoryGroup _memory_group;
    NEFullyConnectedHybridLayerReshapeWeights _reshape_weights_function;
    NEQuantizationSymmetricKernel _quant_input_kernel;
-  NEGEMMLowpMatrixMultiplyCoreEx _mm_gemmlowp;
+  NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
    NEMultiplyScaleFactorKernel _multiply_scale_kernel;
    NEGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel;
    Tensor _reshape_weights_output;
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.h

deleted file mode 100644 (file)

index d844513..0000000
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.h
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCOREEX_H__
-#define __ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCOREEX_H__
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-// #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute GEMMLowpMatrixMultiplyCore on NEON. This function calls the following
- * NEON kernels if the DOT product instruction is not available:
- *
- *  -# @ref NEGEMMInterleave4x4Kernel
- *  -# @ref NEGEMMTranspose1xWKernel
- *  -# @ref NEGEMMLowpMatrixMultiplyKernel
- *  -# @ref NEGEMMLowpOffsetContributionKernel
- *  -# @ref NEActivationLayer
- *
- * otherwise if the DOT product instruction is available:
- *
- *  -# @ref NEGEMMLowpOffsetContributionKernel
- *
-*/
-class NEGEMMLowpMatrixMultiplyCoreEx : public IFunction
-{
-public:
-  /** Constructor */
-  NEGEMMLowpMatrixMultiplyCoreEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  NEGEMMLowpMatrixMultiplyCoreEx(const NEGEMMLowpMatrixMultiplyCoreEx &) = delete;
-  /** Default move constructor */
-  NEGEMMLowpMatrixMultiplyCoreEx(NEGEMMLowpMatrixMultiplyCoreEx &&) = default;
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  NEGEMMLowpMatrixMultiplyCoreEx &operator=(const NEGEMMLowpMatrixMultiplyCoreEx &) = delete;
-  /** Default move assignment operator */
-  NEGEMMLowpMatrixMultiplyCoreEx &operator=(NEGEMMLowpMatrixMultiplyCoreEx &&) = default;
-  /** Initialise the kernel's inputs, output
-   *
-   * @note GEMM_LOWP:  low precision GEMM kernel
-   *  This kernel performs the following computations:
-   *
-   *  -# Convert a values from QASYMM8 to int32 and add a_offset to each of them.
-   *  -# Convert b values from QASYMM8 to int32 add b_offset to each of them.
-   *  -# Compute the matrix product of the resulting a * b in int32.
-   *
-   * @note The @p output type is S32 if @p gemm_info.type == GEMMLowpOutputStageType::NONE. It is
-   * QASYMM8/QASYMM8_SIGNED otherwise
-   *
-   * @param[in]  a         First input tensor  (Matrix A). Data type supported:
-   * QASYMM8/QASYMM8_SIGNED.
-   * @param[in]  b         Second input tensor (Matrix B). Data type supported: same as @p a
-   * @param[in]  c         Third input tensor  (Matrix C). It can be a nullptr. Data type supported:
-   * S32
-   * @param[out] output    Output tensor. Data type supported: Data type supported:
-   * S32/QASYMM8/QASYMM8_SIGNED
-   * @param[in]  gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped
-   * and
-   *                       if the reshape of matrix B should be executed only for the first run
-   */
-  void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *output,
-                 const GEMMInfo &gemm_info = GEMMInfo());
-  /** Static function to check if given info will lead to a valid configuration of @ref
-   * NEGEMMLowpMatrixMultiplyCoreEx
-   *
-   * @note The @p output type is S32 if @p gemm_info.type == GEMMLowpOutputStageType::NONE. It is
-   * QASYMM8/QASYMM8_SIGNED otherwise
-   *
-   * @param[in] a         First input tensor info  (Matrix A). Data type supported:
-   * QASYMM8/QASYMM8_SIGNED.
-   * @param[in] b         Second input tensor info (Matrix B). Data type supported: same as @p a
-   * @param[in] c         Third input tensor  info (Matrix C). It can be a nullptr. Data type
-   * supported: S32
-   * @param[in] output    Output tensor info. Data type supported: Data type supported:
-   * S32/QASYMM8/QASYMM8_SIGNED
-   * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped
-   * and
-   *                      if the reshape of matrix B should be executed only for the first run
-   *
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c,
-                         const ITensorInfo *output, const GEMMInfo &gemm_info = GEMMInfo());
-
-  // Inherited methods overridden
-  void run() override;
-  void prepare() override;
-
-private:
-  MemoryGroup _memory_group;
-  NEGEMMAssemblyDispatch _asm_glue;
-  std::unique_ptr<INEKernel> _mm_kernel;
-  std::unique_ptr<INEKernel> _mtx_a_reshape_kernel;
-  std::unique_ptr<INEKernel> _mtx_b_reshape_kernel;
-  NEGEMMLowpMatrixAReductionKernel _mtx_a_reduction_kernel;
-  NEGEMMLowpMatrixBReductionKernel _mtx_b_reduction_kernel;
-  NEGEMMLowpOffsetContributionKernel _offset_contribution_kernel;
-  NEGEMMLowpOffsetContributionOutputStageKernel _offset_contribution_output_stage_kernel;
-
-  Tensor _vector_sum_col;
-  Tensor _vector_sum_row;
-  Tensor _tmp_a;
-  Tensor _tmp_b;
-  Tensor _mm_result_s32;
-  Tensor _signed_a;
-  Tensor _signed_output;
-  const ITensor *_original_b;
-  int32_t _a_offset;
-  int32_t _b_offset;
-
-  bool _run_vector_matrix_multiplication;
-  bool _assembly_path;
-  bool _fused_assembly_path;
-  bool _reshape_b_only_on_first_run;
-  bool _is_prepared;
-  bool _fuse_output_stage;
-  bool _flip_signedness;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCOREEX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEPReLU.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEPReLU.h

deleted file mode 100644 (file)

index ca84133..0000000
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEPReLU.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NEPRELU_H__
-#define __ARM_COMPUTE_NEPRELU_H__
-
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to run @ref NEPReLUKernel */
-class NEPReLU : public INESimpleFunctionNoBorder
-{
-public:
-  /** Initialise the kernel's inputs and output
-   *
-   * @param[in]  input. Data types supported: QASYMM8/F32.
-   * @param[in]  alpha. Data types supported: Same as @p input.
-   * @param[out] output Output tensor. Data types supported: Same as @p input.
-   */
-  void configure(const ITensor *input, const ITensor *alpha, ITensor *output);
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NEPRELU_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NERNNLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NERNNLayerEx.h

deleted file mode 100644 (file)

index 8a7b179..0000000
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NERNNLayerEx.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NERNNLAYER_EX_H__
-#define __ARM_COMPUTE_NERNNLAYER_EX_H__
-
-#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h"
-#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Basic function to run @ref NERNNLayerEx */
-class NERNNLayerEx : public IFunction
-{
-public:
-  /** Default constructor */
-  NERNNLayerEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  NERNNLayerEx(const NERNNLayerEx &) = delete;
-  /** Default move constructor */
-  NERNNLayerEx(NERNNLayerEx &&) = default;
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  NERNNLayerEx &operator=(const NERNNLayerEx &) = delete;
-  /** Default move assignment operator */
-  NERNNLayerEx &operator=(NERNNLayerEx &&) = default;
-  /** Initialize the function
-   *
-   * @param[in]     input             Input is a 2-D tensor of shape [input_size, batch_size]. Data
-   * types supported: F16/F32
-   * @param[in]     weights           Weights tensor of shape [input_size, num_units] that
-   * multiplies the input. Data types supported: Same as @p input
-   * @param[in]     recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies
-   * the current 'state'. Data types supported: Same as @p input
-   * @param[in]     bias              Bias vector of shape [num_units]. Data types supported: Same
-   * as @p input
-   * @param[out]    output            Output tensor of shape [num_units, batch_size]. Data types
-   * supported: Same as @p input
-   * @param[in,out] hidden_state      Output tensor of shape [num_units, batch_size]. Data types
-   * supported: Same as @p input
-   * @param[in]     info              Activation layer parameter.
-   */
-  void configure(const ITensor *input, const ITensor *weights, const ITensor *recurrent_weights,
-                 const ITensor *bias, ITensor *hidden_state, ITensor *output,
-                 ActivationLayerInfo &info);
-  /** Initialize the function
-   *
-   * @param[in] input             Input is a 2-D tensor of shape [input_size, batch_size]. Data
-   * types supported: F16/F32
-   * @param[in] weights           Weights tensor of shape [input_size, num_units] that multiplies
-   * the input. Data types supported: Same as @p input
-   * @param[in] recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies the
-   * current 'state'. Data types supported: Same as @p input
-   * @param[in] bias              Bias vector of shape [num_units]. Data types supported: Same as @p
-   * input
-   * @param[in] output            Output tensor of shape [num_units, batch_size]. Data types
-   * supported: Same as @p input
-   * @param[in] hidden_state      Output tensor of shape [num_units, batch_size]. Data types
-   * supported: Same as @p input
-   * @param[in] info              Activation layer parameter.
-   *
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *input, const ITensorInfo *weights,
-                         const ITensorInfo *recurrent_weights, const ITensorInfo *bias,
-                         const ITensorInfo *hidden_state, const ITensorInfo *output,
-                         const ActivationLayerInfo &info);
-
-  // Inherited methods overridden:
-  void run() override;
-  void prepare() override;
-
-private:
-  MemoryGroup _memory_group;
-  NEGEMM _gemm_state_f;
-  NEArithmeticAdditionKernel _add_kernel;
-  NEActivationLayerKernel _activation_kernel;
-  NEFullyConnectedLayer _fully_connected_kernel;
-  NECopyKernel _copy_kernel;
-  Tensor _fully_connected_out;
-  Tensor _gemm_output;
-  Tensor _add_output;
-  bool _is_prepared;
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NERNNLAYER_EX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceMeanEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceMeanEx.h

deleted file mode 100644 (file)

index 03ac457..0000000
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceMeanEx.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NEON_REDUCE_MEAN_EX_H__
-#define __ARM_COMPUTE_NEON_REDUCE_MEAN_EX_H__
-
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h"
-#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to perform reduce operation */
-class NEReduceMeanEx : public IFunction
-{
-public:
-  /** Constructor */
-  NEReduceMeanEx(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
-  /** Configure kernel
-   *
-   * @note Supported tensor rank: up to 4
-   *
-   * @param[in]  input          Source tensor. Data type supported: QASYMM8/F16/F32
-   * @param[in]  reduction_axis Reduction axis vector.
-   * @param[in]  keep_dims      If positive, retains reduced dimensions with length 1.
-   * @param[out] output         Destination tensor. Data type supported: Same as @p input
-   */
-  void configure(ITensor *input, const Coordinates &reduction_axis, bool keep_dims,
-                 ITensor *output);
-
-  /** Static function to check if given info will lead to a valid configuration of @ref
-   * NEReduceMeanEx
-   *
-   * @param[in] input          Source tensor. Data type supported: QASYMM8/F16/F32
-   * @param[in] reduction_axis Reduction axis vector.
-   * @param[in] keep_dims      If positive, retains reduced dimensions with length 1.
-   * @param[in] output         Destination tensor. Data type supported: Same as @p input
-   *
-   * @return A status
-   */
-  static Status validate(const ITensorInfo *input, const Coordinates &reduction_axis,
-                         bool keep_dims, const ITensorInfo *output);
-
-  // Inherited methods overridden:
-  void run() override;
-
-private:
-  MemoryGroup _memory_group;
-  std::unique_ptr<NEReductionOperation[]> _reduction_kernels{nullptr};
-  std::unique_ptr<Tensor[]> _reduced_outs{nullptr};
-  NEReshapeLayer _reshape;
-  unsigned int _reduction_ops;
-  bool _keep_dims;
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NEON_REDUCE_MEAN_EX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToBatchLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToBatchLayerEx.h

deleted file mode 100644 (file)

index 3b695fb..0000000
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToBatchLayerEx.h
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NESPACETOBATCHLAYEREX_H__
-#define __ARM_COMPUTE_NESPACETOBATCHLAYEREX_H__
-
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h"
-#include "arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to spatial divide a tensor. This function calls the following NEON
- * kernels/functions:
- *
- *  -# @ref NEMemsetKernel
- *  -# @ref NESpaceToBatchLayerKernel
- */
-class NESpaceToBatchLayerEx : public IFunction
-{
-public:
-  /** Default constructor */
-  NESpaceToBatchLayerEx();
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  NESpaceToBatchLayerEx(const NESpaceToBatchLayerEx &) = delete;
-  /** Prevent instances of this class from being copied (As this class contains pointers) */
-  NESpaceToBatchLayerEx &operator=(const NESpaceToBatchLayerEx &) = delete;
-  /** Allow instances of this class to be moved */
-  NESpaceToBatchLayerEx(NESpaceToBatchLayerEx &&) = default;
-  /** Allow instances of this class to be moved */
-  NESpaceToBatchLayerEx &operator=(NESpaceToBatchLayerEx &&) = default;
-  /** Default destructor */
-  virtual ~NESpaceToBatchLayerEx() = default;
-  /** Set the input and output tensors.
-   *
-   * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported:
-   * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
-   * @param[in]  block_shape 1-D tensor with shape [M]. Data types supported: S32
-   * @param[in]  paddings    2-D tensor with shape [2, M]. Data types supported: S32
-   * @param[out] output      Tensor output. Data types supported: same as @p input
-   */
-  void configure(const ITensor *input, const ITensor *block_shape, const ITensor *paddings,
-                 ITensor *output);
-  /** Set the input and output tensors. (Static block shape and paddings)
-   *
-   * @param[in]  input         Tensor input. Supported tensor rank: 4. Data types supported:
-   * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
-   * @param[in]  block_shape_x Block shape x value.
-   * @param[in]  block_shape_y Block shape y value.
-   * @param[in]  padding_left  The left padding of the output tensor.
-   * @param[in]  padding_right The right padding of the output tensor.
-   * @param[out] output        Tensor output. Data types supported: same as @p input
-   */
-  void configure(const ITensor *input, const int block_shape_x, const int block_shape_y,
-                 const Size2D &padding_left, const Size2D &padding_right, ITensor *output);
-  /** Static function to check if given info will lead to a valid configuration of @ref
-   * NESpaceToBatchLayerEx
-   *
-   * @param[in] input       Tensor input info. Supported tensor rank: 4. Data types supported:
-   * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
-   * @param[in] block_shape block shape tensor info with shape [M]. Data types supported: S32
-   * @param[in] paddings    paddings tensor info with shape [2, M]. Data types supported: S32
-   * @param[in] output      Tensor output info. Data types supported: same as @p input
-   *
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape,
-                         const ITensorInfo *paddings, const ITensorInfo *output);
-  /** Static function to check if given info will lead to a valid configuration of @ref
-   * NESpaceToBatchLayerEx (Static block shape and paddings)
-   *
-   * @param[in] input         Tensor input info. Supported tensor rank: 4. Data types supported:
-   * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
-   * @param[in] block_shape_x Block shape x value.
-   * @param[in] block_shape_y Block shape y value.
-   * @param[in] padding_left  The left padding of the output tensor.
-   * @param[in] padding_right The right padding of the output tensor.
-   * @param[in] output        Tensor output info. Data types supported: same as @p input
-   *
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *input, const int block_shape_x, const int block_shape_y,
-                         const Size2D &padding_left, const Size2D &padding_right,
-                         const ITensorInfo *output);
-
-  // Inherited methods overridden:
-  void run() override;
-
-private:
-  NESpaceToBatchLayerKernel _space_to_batch_kernel; /**< SpaceToBatch kernel to run */
-  NEMemsetKernel _memset_kernel;                    /**< Memset kernel to run */
-  bool _has_padding;                                /**< Flag to check if the output has padding */
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NESPACETOBATCHLAYEREX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToDepthLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToDepthLayerEx.h

deleted file mode 100644 (file)

index 9f32616..0000000
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NESpaceToDepthLayerEx.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NESPACETODEPTHLAYEREX_H__
-#define __ARM_COMPUTE_NESPACETODEPTHLAYEREX_H__
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** This function calls the following NEON kernels/functions:
- *
- *  -# @ref NESpaceToDepthLayerKernelEx
- */
-class NESpaceToDepthLayerEx : public INESimpleFunctionNoBorder
-{
-public:
-  /** Set the input and output tensors.
-   *
-   * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported:
-   * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
-   * @param[out] output      Tensor output. Data types supported: same as @p input
-   * @param[in]  block_shape Block shape value
-   */
-  void configure(const ITensor *input, ITensor *output, int32_t block_shape);
-  /** Static function to check if given info will lead to a valid configuration of @ref
-   * NESpaceToDepthLayerEx (Static block shape and paddings)
-   *
-   * @param[in] input       Tensor input info. Supported tensor rank: 4. Data types supported:
-   * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
-   * @param[in] output      Tensor output info. Data types supported: same as @p input
-   * @param[in] block_shape Block shape value
-   *
-   * @return a status
-   */
-  static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape);
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NESPACETODEPTHLAYEREX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h

index 408d150..24ff5da 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h
@@ -15,7 +15,7 @@
   */
  
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -37,16 +37,14 @@
   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   * SOFTWARE.
   */
-
  #ifndef __ARM_COMPUTE_NETRANSPOSECONVLAYER_H__
  #define __ARM_COMPUTE_NETRANSPOSECONVLAYER_H__
  
-#include "arm_compute/runtime/CPP/functions/CPPUpsampleEx.h"
+#include "arm_compute/runtime/CPP/functions/CPPUpsample.h"
  #include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
  #include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEPermute.h"
+#include "arm_compute/runtime/NEON/functions/NEReverse.h"
  
-#include "arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h"
  #include "arm_compute/core/Types.h"
  #include "arm_compute/runtime/IFunction.h"
  #include "arm_compute/runtime/IMemoryManager.h"
@@ -59,8 +57,8 @@ namespace arm_compute
  {
  /** Function to run the deconvolution layer.
   *
- * Transpose convolution Layer is the backward pass of Convolution Layer. First we transform the
- * input depending on the stride and pad info and then perfrom a 1x1
+ * Deconvolution Layer is the backward pass of Convolution Layer. First we transform the input
+ * depending on the stride and pad info and then perfrom a 1x1
   * convolution pass. Input stride defines how many zeroes we should put between each element of the
   * input, pad is the amount of padding and finaly a is a user
   * specified value where a < stride - 1 that increases the padding top and right of the input image.
@@ -81,21 +79,22 @@ namespace arm_compute
   *      kernel_x and kernel_y are the convolution sizes in x and y.
   *      stride_x and stride_y is the input stride of the first and second dimension.
   *
- * The weights used by Transpose convolution are supposed to be the same as the ones used for
- * Convolution. Therefore, it will be necessary to use the weights in the
- * reverse order to perform an actual convolution. This is achieved by using the @ref
- * CPPFlipWeightsKernel.
+ * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution.
+ * Therefore, it will be necessary to use the weights in the
+ * reverse order to perform an actual convolution. This is achieved by using @ref NEReverse.
   *
   * This function calls the following NEON kernels/functions:
   *
- * -# @ref CPPUpsample
+ * -# @ref CPPUpsampleEx
   * -# @ref NEConvolutionLayer
+ * -# @ref NEPermute
+ * -# @ref NEReverse
   *
   */
  class NETransposeConvLayer : public IFunction
  {
  public:
-  /** Default constructor */
+  /** Constructor */
    NETransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
  
    /** Prevent instances of this class from being copied (As this class contains pointers) */
@@ -112,37 +111,38 @@ public:
    /** Set the input, weights, biases and output tensors.
     *
     * @param[in,out] input   Input tensor. 3 lower dimensions represent a single input, and an
-   * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8.
+ * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
     * @param[in]     weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type
-   * supported: Same as @p input.
+ * supported: Same as @p input.
     * @param[in]     bias    Optional, ignored if NULL. The biases have one dimension. Data type
-   * supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input, F16 for F16 input.
+ * supported: Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16
+ * for F16 input.
     * @param[out]    output  Output tensor. The output has the same number of dimensions as the @p
-   * input.
+ * input.
     * @param[in]     info    Contains padding and policies to be used in the deconvolution, this is
-   * decribed in @ref PadStrideInfo.
-   * @param[in]     invalid_right  The number of zeros added to right edge of the output.
-   * @param[in]     invalid_bottom The number of zeros added to top edge of the output.
+ * decribed in @ref PadStrideInfo.
+ * @param[in]     invalid_right  The number of zeros added to right edge of the output.
+ * @param[in]     invalid_bottom The number of zeros added to bottom edge of the output.
     *
     */
    void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output,
                   const PadStrideInfo &info, unsigned int invalid_right,
                   unsigned int invalid_bottom);
    /** Static function to check if given info will lead to a valid configuration of @ref
-   * NETransposeConvLayer
+ * NETransposeConvLayer
     *
     * @param[in] input   Input tensor info. 3 lower dimensions represent a single input, and an
-   * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8.
+ * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
     * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type
-   * supported: Same as @p input.
+ * supported: Same as @p input.
     * @param[in] bias    (Optional) The biases have one dimension. Data type supported: Data types
-   * supported: S32 for QASYMM8 input, F32 for F32 input, F16 for F16 input.
+ * supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
     * @param[in] output  Output tensor info. The output has the same number of dimensions as the @p
-   * input.
+ * input.
     * @param[in] info    Contains padding and policies to be used in the deconvolution, this is
-   * decribed in @ref PadStrideInfo.
-   * @param[in] innvalid_right  The number of zeros added to right edge of the output.
-   * @param[in] invalid_bottom  The number of zeros added to top edge of the output.
+ * decribed in @ref PadStrideInfo.
+ * @param[in] innvalid_right  The number of zeros added to right edge of the output.
+ * @param[in] invalid_bottom  The number of zeros added to bottom edge of the output.
     *
     * @return a status
     */
@@ -158,17 +158,11 @@ public:
  private:
    MemoryGroup _memory_group;
    NEConvolutionLayer _conv_f;
-  CPPUpsampleEx _upsample_f;
-  CPPFlipWeightsKernel _flip_weights;
-  NEPermute _permute_input;
-  NEPermute _permute_weights;
-  NEPermute _permute_output;
+  CPPUpsample _upsample_f;
+  NEReverse _flip_weights;
    Tensor _scaled_output;
    Tensor _weights_flipped;
-  Tensor _permuted_input;
-  Tensor _permuted_weights;
-  Tensor _permuted_output;
-  bool _is_nchw;
+  Tensor _flip_axis;
    const ITensor *_original_weights;
    ITensor *_input;
    PadStrideInfo _info;
diff --git a/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp b/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp

index 7b6b974..ba42a24 100644 (file)
--- a/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
+++ b/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
@@ -55,16 +55,7 @@ using namespace arm_compute;
  
  const std::map<std::string, std::string> CLKernelLibraryEx::_kernel_program_map = {
      // ARMComputeEx kernels
-    {"arg_op", "arg_operation.cl"},
-    {"arithmetic_add_qasymm8", "arithmetic_op_quantized.cl"},
      {"binary_logical_op", "binary_logical_op.cl"},
-    {"cast", "cast.cl"},
-    {"cast_qasymm_in", "cast.cl"},
-    {"cast_qasymm_out", "cast.cl"},
-    {"comparison_op", "comparison_op.cl"},
-    {"comparison_op_qasymm8", "comparison_op_quantized.cl"},
-    {"depth_to_space_nchw", "depth_to_space.cl"},
-    {"depth_to_space_nhwc", "depth_to_space.cl"},
      {"embedding_lookup", "embedding_lookup.cl"},
      {"gather_ex", "gather_ex.cl"},
      {"gather_ex_1d", "gather_ex.cl"},
@@ -74,10 +65,6 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_kernel_program_map
      {"instance_normalization_ex", "instance_normalization_ex.cl"},
      {"multiply_scale_factor", "multiply_scale_factor.cl"},
      {"neg_tensor", "neg_tensor.cl"},
-    {"permute_generic", "permute_ex.cl"},
-    {"pixelwise_mul_qasymm8", "pixelwise_mul_quantized.cl"},
-    {"prelu", "prelu.cl"},
-    {"prelu_qasymm8", "prelu_quantized.cl"},
      {"quantization_symm8", "quantization_symm8.cl"},
      {"reduce_min_max", "reduce_operation.cl"},
      {"reduce_sum_mean", "reduce_operation.cl"},
@@ -91,29 +78,15 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_kernel_program_map
      {"radixsort_reorder", "topkv2_radixsort.cl"},
      {"topkv2_quicksort", "topkv2_quicksort.cl"},
      {"scale_factor_symm8", "scale_factor.cl"},
-    {"space_to_depth_nchw", "space_to_depth.cl"},
-    {"space_to_depth_nhwc", "space_to_depth.cl"},
  };
  
  const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map = {
  #ifdef EMBEDDED_KERNELS
      {
-        "arg_operation.cl",
-#include "./cl_kernels/arg_operation.clembed"
-    },
-    {
-        "cast.cl",
-#include "./cl_kernels/cast.clembed"
-    },
-    {
          "embedding_lookup.cl",
  #include "./cl_kernels/embedding_lookup.clembed"
      },
      {
-        "depth_to_space.cl",
-#include "./cl_kernels/depth_to_space.clembed"
-    },
-    {
          "gather_ex.cl",
  #include "./cl_kernels/gather_ex.clembed"
      },
@@ -150,14 +123,6 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map
  #include "./cl_kernels/neg_tensor.clembed"
      },
      {
-        "prelu.cl",
-#include "./cl_kernels/prelu.clembed"
-    },
-    {
-        "prelu_quantized.cl",
-#include "./cl_kernels/prelu_quantized.clembed"
-    },
-    {
          "quantization_symm8.cl",
  #include "./cl_kernels/quantization_symm8.clembed"
      },
@@ -170,10 +135,6 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map
  #include "./cl_kernels/scale_factor.clembed"
      },
      {
-        "space_to_depth.cl",
-#include "./cl_kernels/space_to_depth.clembed"
-    },
-    {
          "topkv2.cl",
  #include "./cl_kernels/topkv2.clembed"
      },
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/arg_operation.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/arg_operation.cl

deleted file mode 100644 (file)

index 03717cf..0000000
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/arg_operation.cl
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "helpers.h"
-
-#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(OP_CODE)
-/** Perform arg_max/arg_min
- *
- * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type.
- *       e.g. -DDATA_TYPE=short
- * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=size.
- *            e.g. -DDEPTH_OUT=16
- * @attention Operation type(code) specifying which operation to perform should be passed as
- *            preprocessor argument using -DOP_CODE = number. e.g. -DOP_CODE=1
- *
- * @param[in]  input_ptr                            Pointer to the source image. Supported data
- * types:
- *                                                  U8/QASYMM8/S8/U16/S16/F16/U32/S32/F32
- * @param[in]  input_stride_x                       Stride of the source image in X dimension
- *                                                  (in bytes)
- * @param[in]  input_step_x                         input_stride_x * number of elements along X
- *                                                  processed per workitem(in bytes)
- * @param[in]  input_stride_y                       Stride of the source image in Y dimension
- *                                                  (in bytes)
- * @param[in]  input_step_y                         input_stride_y * number of elements along Y
- *                                                  processed per workitem(in bytes)
- * @param[in]  input_stride_z                       Stride of the source tensor in Z dimension
- *                                                  (in bytes)
- * @param[in]  input_step_z                         input_stride_z * number of elements along Z
- *                                                  processed per workitem(in bytes)
- * @param[in]  input_offset_first_element_in_bytes  The offset of the first element
- *                                                  in the source image
- * @param[in]  input_stride_w                       Stride of the source tensor in W dimension
- *                                                  (in bytes)
- * @param[in]  input_step_w                         output_stride_w * number of elements along W
- *                                                  processed per workitem(in bytes)
- * @param[out] output_ptr                           Pointer to the destination image.
- *                                                  Supported data types: U32
- * @param[in]  output_stride_x                      Stride of the destination image in X dimension
- *                                                  (in bytes)
- * @param[in]  output_step_x                        output_stride_x * number of elements along X
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_stride_y                      Stride of the destination image in Y dimension
- *                                                  (in bytes)
- * @param[in]  output_step_y                        output_stride_y * number of elements along Y
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_stride_z                      Stride of the source tensor in Z dimension
- *                                                  (in bytes)
- * @param[in]  output_step_z                        output_stride_z * number of elements along Z
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_stride_w                      Stride of the source tensor in W dimension
- *                                                  (in bytes)
- * @param[in]  output_step_w                        output_stride_w * number of elements along W
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_offset_first_element_in_bytes The offset of the first element in the
- *                                                  destination image
- * @param[in]  axis                                 Axis through which reduction occurs
- * @param[in]  dim                                  Dimension across the axis to be reduced.
- */
-
-__kernel void arg_op(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(output), const int axis,
-                     const int dim)
-{
-  Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, 0);
-  Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT);
-
-  int indices[4] = {
-      get_global_id(0), get_global_id(1), get_global_id(2) % DEPTH_OUT,
-      get_global_id(2) / DEPTH_OUT,
-  };
-
-  DATA_TYPE value =
-      *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1], indices[2], indices[3]));
-  DATA_TYPE tval = value;
-  int idx = 0;
-  for (int i = 1; i < dim; ++i)
-  {
-    indices[axis] = i;
-
-#if OP_CODE == 1 // ArgMax
-    value = max(value, *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1],
-                                                               indices[2], indices[3])));
-#elif OP_CODE == 2 // ArgMin
-    value = min(value, *((__global DATA_TYPE *)tensor4D_offset(&in, indices[0], indices[1],
-                                                               indices[2], indices[3])));
-#else
-    return;
-
-#endif
-
-    if (tval != value)
-    {
-      idx = indices[axis];
-      tval = value;
-    }
-  }
-
-  *((__global uint *)out.ptr) = idx;
-}
-#endif // defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(OP_CODE)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_quantized.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_quantized.cl

deleted file mode 100644 (file)

index f74c1c1..0000000
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_quantized.cl
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "helpers_asymm.h"
-
-#ifdef SATURATE
-#define ADD(x, y) add_sat((x), (y))
-#define SUB(x, y) sub_sat((x), (y))
-#else /* SATURATE */
-#define ADD(x, y) (x) + (y)
-#define SUB(x, y) (x) - (y)
-#endif /* SATURATE */
-
-/** Performs a pixelwise addition used to quantize down the int32 accumulator values of GEMMLowp to
- *  QASYMM8
- *
- * The following computations will be performed:
- *
- *  -# Add offset terms to inputs
-    -# Get scaled value of two inputs
- *  -# Add inputs
- *  -# Add offset terms to final result
- *  -# Multiply each entry of result by result_mult_int
- *  -# Shift the int32 accumulator by result_shift
- *  -# Clamp the resulting int32 values to the [0..255] range and cast to QASYMM8.
- *
- * @attention The inputs and output data types need to be passed at compile time using
- *            -DDATA_TYPE_IN1, -DDATA_TYPE_IN2 and -DDATA_TYPE_OUT:
- *            e.g. -DDATA_TYPE_IN1=uchar -DDATA_TYPE_IN2=uchar -DDATA_TYPE_OUT=uchar
- * @attention The number of bits to shift left of input tensors must be passed at compile time using
- *            -DLEFT_SHIFT
- * @attention The offset, scalar scale factor and number of bits to shift right of input tensors
- *            must be passed at compile time using -DIN1_OFFSET, -RIN1_MULT_INT, -DIN1_SHIFT,
- -DIN2_OFFSET,
- *            -RIN2_MULT_INT and -DIN2_SHIFT
- * @attention The offset, scalar scale factor and number of bits to shift right of output tensor
- *            must be passed at compile time using -DRESULT_OFFSET, -RESULT_MULT_INT and
- -DRESULT_SHIFT
- *
- * @attention The input and output data_types need to be passed at compile time using
- *            -DDATA_TYPE_IN1, -DDATA_TYPE_IN2 and -DDATA_TYPE_OUT:
- *            e.g. -DDATA_TYPE_IN1=uchar -DDATA_TYPE_IN2=uchar -DDATA_TYPE_OUT=uchar
- * @attention The inputs and output scale information of qasymm8 need to be passed at compile time
- *            using -DSCALE_IN1, -DSCALE_IN2 and -DSCALE_OUT:
- *            e.g. -DSCALE_IN1=1.f -DSCALE_IN2=1.f -DSCALE_OUT=2.f
- * @attention The inputs and output scale offset need to be passed at compile time using
- *            -DOFFSET_IN1, -DOFFSET_IN2 and -DOFFSET_OUT:
- *            e.g. -DOFFSET_IN1=0 -DOFFSET_IN2=0 -DOFFSET_OUT=0
- * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g.
- *            -DVEC_SIZE=16
- * @attention To perform saturating operation -DSATURATE has to be passed to the compiler otherwise
- *            wrapping policy will be used.
- *
- * @param[in]  in1_ptr                           Pointer to the source tensor.
- *                                               Supported data types: QASYMM8
- * @param[in]  in1_stride_x                      Stride of the source tensor in X dimension
- *                                               (in bytes)
- * @param[in]  in1_step_x                        in1_stride_x * number of elements along X processed
- *                                               per workitem(in bytes)
- * @param[in]  in1_stride_y                      Stride of the source tensor in Y dimension
- *                                               (in bytes)
- * @param[in]  in1_step_y                        in1_stride_y * number of elements along Y processed
- *                                               per workitem(in bytes)
- * @param[in]  in1_stride_z                      Stride of the source tensor in Z dimension
- *                                               (in bytes)
- * @param[in]  in1_step_z                        in1_stride_z * number of elements along Z processed
- *                                               per workitem(in bytes)
- * @param[in]  in1_offset_first_element_in_bytes The offset of the first element in the source
- *                                               tensor
- * @param[in]  in2_ptr                           Pointer to the source tensor. Supported data types:
- *                                               QASYMM8
- * @param[in]  in2_stride_x                      Stride of the source tensor in X dimension
- *                                               (in bytes)
- * @param[in]  in2_step_x                        in2_stride_x * number of elements along X processed
- *                                               per workitem(in bytes)
- * @param[in]  in2_stride_y                      Stride of the source tensor in Y dimension
- *                                               (in bytes)
- * @param[in]  in2_step_y                        in2_stride_y * number of elements along Y processed
- *                                               per workitem(in bytes)
- * @param[in]  in2_stride_z                      Stride of the source tensor in Z dimension
- *                                               (in bytes)
- * @param[in]  in2_step_z                        in2_stride_z * number of elements along Z processed
- *                                               per workitem(in bytes)
- * @param[in]  in2_offset_first_element_in_bytes The offset of the first element in the source
- *                                               tensor
- * @param[out] out_ptr                           Pointer to the destination tensor.
- *                                               Supported data types: QASYMM8
- * @param[in]  out_stride_x                      Stride of the destination tensor in X dimension
- *                                               (in bytes)
- * @param[in]  out_step_x                        out_stride_x * number of elements along X processed
- *                                               per workitem(in bytes)
- * @param[in]  out_stride_y                      Stride of the destination tensor in Y dimension
- *                                               (in bytes)
- * @param[in]  out_step_y                        out_stride_y * number of elements along Y processed
- *                                               per workitem(in bytes)
- * @param[in]  out_stride_z                      Stride of the source tensor in Z dimension
- *                                               (in bytes)
- * @param[in]  out_step_z                        out_stride_z * number of elements along Z processed
- *                                               per workitem(in bytes)
- * @param[in]  out_offset_first_element_in_bytes The offset of the first element in the destination
- *                                               tensor
- */
-__kernel void arithmetic_add_qasymm8(TENSOR3D_DECLARATION(in1), TENSOR3D_DECLARATION(in2),
-                                     TENSOR3D_DECLARATION(out))
-{
-  // Get pixels pointer
-  Tensor3D in1 = CONVERT_TO_TENSOR3D_STRUCT(in1);
-  Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2);
-  Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out);
-
-  // Load data
-  VEC_DATA_TYPE(int, 16)
-  in1_data = CONVERT(vload16(0, (__global DATA_TYPE_IN1 *)in1.ptr), VEC_DATA_TYPE(int, 16));
-  VEC_DATA_TYPE(int, 16)
-  in2_data = CONVERT(vload16(0, (__global DATA_TYPE_IN2 *)in2.ptr), VEC_DATA_TYPE(int, 16));
-
-  // Get scaled value of two inputs
-  VEC_DATA_TYPE(int, 16) in1_val = in1_data + (VEC_DATA_TYPE(int, 16))(IN1_OFFSET);
-  VEC_DATA_TYPE(int, 16) in2_val = in2_data + (VEC_DATA_TYPE(int, 16))(IN2_OFFSET);
-
-  VEC_DATA_TYPE(int, 16)
-  left_shift = (VEC_DATA_TYPE(int, 16))1 << (VEC_DATA_TYPE(int, 16))(LEFT_SHIFT);
-  VEC_DATA_TYPE(int, 16) shifted_in1_val = in1_val * left_shift;
-  VEC_DATA_TYPE(int, 16) shifted_in2_val = in2_val * left_shift;
-
-  VEC_DATA_TYPE(int, 16)
-  scaled_in1_val =
-      ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(shifted_in1_val, IN1_MULT_INT, IN1_SHIFT, 16);
-  VEC_DATA_TYPE(int, 16)
-  scaled_in2_val =
-      ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(shifted_in2_val, IN2_MULT_INT, IN2_SHIFT, 16);
-
-  // Add inputs and multiply with a multiplier smaller than 1
-  VEC_DATA_TYPE(int, 16) sum_val = scaled_in1_val + scaled_in2_val;
-  VEC_DATA_TYPE(int, 16)
-  out_val =
-      ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(sum_val, RESULT_MULT_INT, RESULT_SHIFT, 16);
-  out_val += (VEC_DATA_TYPE(int, 16))(RESULT_OFFSET);
-
-  VEC_DATA_TYPE(uchar, 16) res = CONVERT(out_val, VEC_DATA_TYPE(uchar, 16));
-
-  // TODO: Apply min-max BOUND to support fuse with relu.
-  /*
-  #if defined(MIN_BOUND)
-      res = max(res, (uchar16)MIN_BOUND);
-  #endif // defined(MIN_BOUND)
-  #if defined(MAX_BOUND)
-      res = min(res, (uchar16)MAX_BOUND);
-  #endif // defined(MAX_BOUND)
-  */
-
-  // Store result
-  VSTORE(16)(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global DATA_TYPE_OUT *)out.ptr);
-}
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/cast.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/cast.cl

deleted file mode 100644 (file)

index 4147a00..0000000
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/cast.cl
+++ /dev/null
@@ -1,233 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "helpers.h"
-
-#ifndef SCALE
-#define SCALE 1.0f
-#endif
-#ifndef OFFSET
-#define OFFSET 0
-#endif
-#ifndef VEC_SIZE
-#define VEC_SIZE 1
-#endif
-
-#if defined(DATA_TYPE_IN) && defined(DATA_TYPE_OUT)
-/** Perform a cast operation on an input tensor.
- *
- * @attention Data types of both input and output can be passed using the -DDATA_TYPE_IN and
- *            -DDATA_TYPE_OUT compile flag, e.g. -DDATA_TYPE_IN=float, -DDATA_TYPE_OUT=int
- * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g.
- *            -DVEC_SIZE=16
- * @attention -DBOOL_INPUT : Whether type of input is bool.
- *
- * @param[in]  input_ptr                            Pointer to the source image. Supported data
- *                                                  types: F16/F32
- * @param[in]  input_stride_x                       Stride of the source image in X dimension (in
- *                                                  bytes)
- * @param[in]  input_step_x                         input_stride_x * number of elements along X
- *                                                  processed per workitem(in bytes)
- * @param[in]  input_stride_y                       Stride of the source image in Y dimension (in
- *                                                  bytes)
- * @param[in]  input_step_y                         input_stride_y * number of elements along Y
- *                                                  processed per workitem(in bytes)
- * @param[in]  input_stride_z                       Stride of the source tensor in Z dimension (in
- *                                                  bytes)
- * @param[in]  input_step_z                         input_stride_z * number of elements along Z
- *                                                  processed per workitem(in bytes)
- * @param[in]  input_offset_first_element_in_bytes  The offset of the first element in the source
- *                                                  image
- * @param[out] output_ptr                           Pointer to the destination image. Supported data
- *                                                  types: same as @p input_ptr
- * @param[in]  output_stride_x                      Stride of the destination image in X dimension
- *                                                  (in bytes)
- * @param[in]  output_step_x                        output_stride_x * number of elements along X
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_stride_y                      Stride of the destination image in Y dimension
- *                                                  (in bytes)
- * @param[in]  output_step_y                        output_stride_y * number of elements along Y
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_stride_z                      Stride of the source tensor in Z dimension (in
- *                                                  bytes)
- * @param[in]  output_step_z                        output_stride_z * number of elements along Z
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_offset_first_element_in_bytes The offset of the first element in the
- *                                                  destination image
- */
-__kernel void cast(TENSOR3D_DECLARATION(input), TENSOR3D_DECLARATION(output))
-{
-  Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
-  Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
-
-  VSTORE(VEC_SIZE)
-  (CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_IN *)input.ptr),
-           VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)),
-   0, (__global DATA_TYPE_OUT *)output.ptr);
-  VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)
-  res = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_IN *)input.ptr),
-                VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE));
-#if defined(BOOL_INPUT)
-  VEC_DATA_TYPE(char, VEC_SIZE) tmp = CONVERT(res, VEC_DATA_TYPE(char, VEC_SIZE));
-  VEC_DATA_TYPE(char, VEC_SIZE) mask = (VEC_DATA_TYPE(char, VEC_SIZE))(1);
-  res = CONVERT(tmp & mask, VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE));
-#endif // defined(BOOL_INPUT)
-
-  VSTORE(VEC_SIZE)(res, 0, (__global DATA_TYPE_OUT *)output.ptr);
-}
-
-/** Perform a cast operation on an QASYMM8 input tensor.
- * @attention Data types of both input and output can be passed using the -DDATA_TYPE_IN and
- *            -DDATA_TYPE_OUT compile flag, e.g. -DDATA_TYPE_IN=float, -DDATA_TYPE_OUT=int
- * @attention Offset and Scale of input should be given as a preprocessor argument using
- *            -DOFFSET=int, -DSCALE=float. e.g. -DOFFSET=1, -DSCALE=0.5
- * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g.
- *            -DVEC_SIZE=16
- *
- * @param[in]  input_ptr                            Pointer to the source image. Supported data
- *                                                  types: F16/F32
- * @param[in]  input_stride_x                       Stride of the source image in X dimension (in
- *                                                  bytes)
- * @param[in]  input_step_x                         input_stride_x * number of elements along X
- *                                                  processed per workitem(in bytes)
- * @param[in]  input_stride_y                       Stride of the source image in Y dimension (in
- *                                                  bytes)
- * @param[in]  input_step_y                         input_stride_y * number of elements along Y
- *                                                  processed per workitem(in bytes)
- * @param[in]  input_stride_z                       Stride of the source tensor in Z dimension (in
- *                                                  bytes)
- * @param[in]  input_step_z                         input_stride_z * number of elements along Z
- *                                                  processed per workitem(in bytes)
- * @param[in]  input_offset_first_element_in_bytes  The offset of the first element in the source
- *                                                  image
- * @param[out] output_ptr                           Pointer to the destination image. Supported data
- *                                                  types: same as @p input_ptr
- * @param[in]  output_stride_x                      Stride of the destination image in X dimension
- *                                                  (in bytes)
- * @param[in]  output_step_x                        output_stride_x * number of elements along X
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_stride_y                      Stride of the destination image in Y dimension
- *                                                  (in bytes)
- * @param[in]  output_step_y                        output_stride_y * number of elements along Y
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_stride_z                      Stride of the source tensor in Z dimension (in
- *                                                  bytes)
- * @param[in]  output_step_z                        output_stride_z * number of elements along Z
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_offset_first_element_in_bytes The offset of the first element in the
- *                                                  destination image
- */
-__kernel void cast_qasymm_in(TENSOR3D_DECLARATION(input), TENSOR3D_DECLARATION(output))
-{
-  Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
-  Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
-
-  VEC_DATA_TYPE(DATA_TYPE_IN, VEC_SIZE)
-  in_data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_IN *)input.ptr);
-  VEC_DATA_TYPE(int, VEC_SIZE) offset = (VEC_DATA_TYPE(int, VEC_SIZE))(OFFSET);
-  VEC_DATA_TYPE(float, VEC_SIZE) scale = (VEC_DATA_TYPE(float, VEC_SIZE))(SCALE);
-
-  VEC_DATA_TYPE(int, VEC_SIZE) tmp = CONVERT(in_data, VEC_DATA_TYPE(int, VEC_SIZE)) - offset;
-  VEC_DATA_TYPE(float, VEC_SIZE) out_data = CONVERT(tmp, VEC_DATA_TYPE(float, VEC_SIZE)) * scale;
-
-  VSTORE(VEC_SIZE)
-  (CONVERT(out_data, VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)), 0,
-   (__global DATA_TYPE_OUT *)output.ptr);
-}
-
-/** Perform a cast operation on an QASYMM8 output tensor.
- * @attention Data types of both input and output can be passed using the -DDATA_TYPE_IN and
- *            -DDATA_TYPE_OUT compile flag, e.g. -DDATA_TYPE_IN=float, -DDATA_TYPE_OUT=int
- * @attention Offset and Scale of output should be given as a preprocessor argument using
- *            -DOFFSET=int, -DSCALE=float. e.g. -DOFFSET=1, -DSCALE=0.5
- * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g.
- *            -DVEC_SIZE=16
- *
- * @param[in]  input_ptr                            Pointer to the source image. Supported data
- *                                                  types: F16/F32
- * @param[in]  input_stride_x                       Stride of the source image in X dimension (in
- *                                                 bytes)
- * @param[in]  input_step_x                         input_stride_x * number of elements along X
- *                                                  processed per workitem(in bytes)
- * @param[in]  input_stride_y                       Stride of the source image in Y dimension (in
- *                                                  bytes)
- * @param[in]  input_step_y                         input_stride_y * number of elements along Y
- *                                                  processed per workitem(in bytes)
- * @param[in]  input_stride_z                       Stride of the source tensor in Z dimension (in
- *                                                  bytes)
- * @param[in]  input_step_z                         input_stride_z * number of elements along Z
- *                                                  processed per workitem(in bytes)
- * @param[in]  input_offset_first_element_in_bytes  The offset of the first element in the source
- *                                                  image
- * @param[out] output_ptr                           Pointer to the destination image. Supported data
- *                                                  types: U8
- * @param[in]  output_stride_x                      Stride of the destination image in X dimension
- *                                                  (in bytes)
- * @param[in]  output_step_x                        output_stride_x * number of elements along X
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_stride_y                      Stride of the destination image in Y dimension
- *                                                  (in bytes)
- * @param[in]  output_step_y                        output_stride_y * number of elements along Y
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_stride_z                      Stride of the source tensor in Z dimension (in
- *                                                  bytes)
- * @param[in]  output_step_z                        output_stride_z * number of elements along Z
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_offset_first_element_in_bytes The offset of the first element in the
- *                                                  destination image
- */
-__kernel void cast_qasymm_out(TENSOR3D_DECLARATION(input), TENSOR3D_DECLARATION(output))
-{
-  Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
-  Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
-
-  VEC_DATA_TYPE(DATA_TYPE_IN, VEC_SIZE)
-  in_data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_IN *)input.ptr);
-  VEC_DATA_TYPE(int, VEC_SIZE) offset = (VEC_DATA_TYPE(int, VEC_SIZE))(OFFSET);
-  VEC_DATA_TYPE(float, VEC_SIZE) scale = (VEC_DATA_TYPE(float, VEC_SIZE))(SCALE);
-
-  VEC_DATA_TYPE(float, VEC_SIZE) tmp = CONVERT(in_data, VEC_DATA_TYPE(float, VEC_SIZE)) / scale;
-  VEC_DATA_TYPE(float, VEC_SIZE) out_data = tmp + CONVERT(offset, VEC_DATA_TYPE(float, VEC_SIZE));
-
-  VSTORE(VEC_SIZE)
-  (CONVERT(out_data, VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)), 0,
-   (__global DATA_TYPE_OUT *)output.ptr);
-}
-#endif // defined(DATA_TYPE_IN) && defined(DATA_TYPE_OUT)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/depth_to_space.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/depth_to_space.cl

deleted file mode 100644 (file)

index 0285c95..0000000
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/depth_to_space.cl
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "helpers.h"
-
-#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(BLOCK_SIZE) && defined(Z_OUT)
-/** Perform space to depth rearrangement of tensor
- *
- * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
- * @attention Input tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=size.
- *            e.g. -DDEPTH_OUT=16
- * @attention The value of the z-axis of output tensor should be given as a preprocessor argument
- *            using -DZ_OUT=size. e.g. -DZ_OUT=16
- * @attention block size should be given as a preprocessor argument using -DBLOCK_SIZE=size. e.g.
- *            -DBLOCK_SIZE=1
- *
- * @param[in]  input_ptr                            Pointer to the source image. Supported data
- *                                                  types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
- * @param[in]  input_stride_x                       Stride of the source image in X dimension (in
- *                                                  bytes)
- * @param[in]  input_step_x                         input_stride_x * number of elements along X
- *                                                  processed per workitem(in  bytes)
- * @param[in]  input_stride_y                       Stride of the source image in Y dimension (in
- *                                                  bytes)
- * @param[in]  input_step_y                         input_stride_y * number of elements along Y
- *                                                  processed per workitem(in  bytes)
- * @param[in]  input_stride_z                       Stride of the source tensor in Z dimension (in
- *                                                  bytes)
- * @param[in]  input_step_z                         input_stride_z * number of elements along Z
- *                                                  processed per workitem(in  bytes)
- * @param[in]  input_offset_first_element_in_bytes  The offset of the first element in the source
- *                                                  image
- * @param[out] output_ptr                           Pointer to the destination image. Supported data
- *                                                  types: same as @p input_ptr
- * @param[in]  output_stride_x                      Stride of the destination image in X dimension
- *                                                  (in bytes)
- * @param[in]  output_step_x                        output_stride_x * number of elements along X
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_stride_y                      Stride of the destination image in Y dimension
- *                                                  (in bytes)
- * @param[in]  output_step_y                        output_stride_y * number of elements along Y
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_stride_z                      Stride of the source tensor in Z dimension (in
- *                                                  bytes)
- * @param[in]  output_step_z                        output_stride_z * number of elements along Z
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_stride_w                      Stride of the source tensor in W dimension (in
- *                                                  bytes)
- * @param[in]  output_step_w                        output_stride_w * number of elements along W
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_offset_first_element_in_bytes The offset of the first element in the
- *                                                  destination image
- */
-__kernel void depth_to_space_nchw(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(output))
-{
-  Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0);
-  Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, Z_OUT);
-
-  int out_index[4] = {0};
-  int in_index[4] = {0};
-
-  out_index[0] = get_global_id(0);         // W
-  out_index[1] = get_global_id(1);         // H
-  out_index[2] = get_global_id(2) % Z_OUT; // C
-  out_index[3] = get_global_id(2) / Z_OUT; // B
-
-  in_index[0] = out_index[0] / BLOCK_SIZE;
-  in_index[1] = out_index[1] / BLOCK_SIZE;
-  in_index[2] = out_index[2] +
-                ((out_index[1] % BLOCK_SIZE) * BLOCK_SIZE + out_index[0] % BLOCK_SIZE) * DEPTH_OUT;
-  in_index[3] = out_index[3];
-
-  *((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(
-      &in, in_index[0], in_index[1], in_index[2], in_index[3]));
-}
-#endif // defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(BLOCK_SIZE) && defined(Z_OUT)
-
-#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(BLOCK_SIZE) && defined(Z_OUT)
-/** Perform space to depth rearrangement of tensor (NHWC)
- *
- * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
- * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=size.
- *            e.g. -DDEPTH_OUT=16
- * @attention The value of the z-axis of output tensor should be given as a preprocessor argument
- *            using -DZ_OUT=size. e.g. -DZ_OUT=16
- * @attention block size should be given as a preprocessor argument using -DBLOCK_SIZE=size. e.g.
- *            -DBLOCK_SIZE=1
- *
- * @param[in]  input_ptr                            Pointer to the source image. Supported data
- *                                                  types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
- * @param[in]  input_stride_x                       Stride of the source image in X dimension (in
- *                                                  bytes)
- * @param[in]  input_step_x                         input_stride_x * number of elements along X
- *                                                  processed per workitem(in  bytes)
- * @param[in]  input_stride_y                       Stride of the source image in Y dimension (in
- *                                                  bytes)
- * @param[in]  input_step_y                         input_stride_y * number of elements along Y
- *                                                  processed per workitem(in  bytes)
- * @param[in]  input_stride_z                       Stride of the source tensor in Z dimension (in
- *                                                  bytes)
- * @param[in]  input_step_z                         input_stride_z * number of elements along Z
- *                                                  processed per workitem(in  bytes)
- * @param[in]  input_offset_first_element_in_bytes  The offset of the first element in the source
- *                                                  image
- * @param[out] output_ptr                           Pointer to the destination image. Supported data
- *                                                  types: same as @p input_ptr
- * @param[in]  output_stride_x                      Stride of the destination image in X dimension
- *                                                  (in bytes)
- * @param[in]  output_step_x                        output_stride_x * number of elements along X
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_stride_y                      Stride of the destination image in Y dimension
- *                                                  (in bytes)
- * @param[in]  output_step_y                        output_stride_y * number of elements along Y
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_stride_z                      Stride of the source tensor in Z dimension (in
- *                                                  bytes)
- * @param[in]  output_step_z                        output_stride_z * number of elements along Z
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_stride_w                      Stride of the source tensor in W dimension (in
- *                                                  bytes)
- * @param[in]  output_step_w                        output_stride_w * number of elements along W
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_offset_first_element_in_bytes The offset of the first element in the
- *                                                  destination image
- */
-__kernel void depth_to_space_nhwc(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(output))
-{
-  Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0);
-  Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, Z_OUT);
-
-  int out_index[4] = {0};
-  int in_index[4] = {0};
-
-  out_index[0] = get_global_id(0);         // C
-  out_index[1] = get_global_id(1);         // W
-  out_index[2] = get_global_id(2) % Z_OUT; // H
-  out_index[3] = get_global_id(2) / Z_OUT; // B
-
-  in_index[0] = out_index[0] +
-                ((out_index[2] % BLOCK_SIZE) * BLOCK_SIZE + out_index[1] % BLOCK_SIZE) * DEPTH_OUT;
-  in_index[1] = out_index[1] / BLOCK_SIZE;
-  in_index[2] = out_index[2] / BLOCK_SIZE;
-  in_index[3] = out_index[3];
-
-  *((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(
-      &in, in_index[0], in_index[1], in_index[2], in_index[3]));
-}
-#endif // defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(BLOCK_SIZE) && defined(Z_OUT)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h

index 2d0b6a2..e07a25e 100644 (file)
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h
@@ -15,7 +15,7 @@
   */
  
  /*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -37,7 +37,6 @@
   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   * SOFTWARE.
   */
-
  #ifndef ARM_COMPUTE_HELPER_H
  #define ARM_COMPUTE_HELPER_H
  
@@ -59,16 +58,219 @@
  #pragma OPENCL EXTENSION cl_arm_printf : enable
  #endif // defined(ARM_COMPUTE_DEBUG_ENABLED) && defined(cl_arm_printf)
  
+#define GPU_ARCH_MIDGARD 0x100
+#define GPU_ARCH_BIFROST 0x200
+
+/** Concatenate two inputs.
+ *
+ * @param[in] a The first input to be concatenated
+ * @param[in] b The second input to be concatenated
+ *
+ * @return The concatenated output
+ */
+#define CONCAT(a, b) a##b
+
+/** Expand the given vector
+ *
+ * @param[in] x The vector to be expanded
+ *
+ * @return The expanded output
+ */
  #define EXPAND(x) x
  
+/** Clamp the given value between an upper and lower bound.
+ *
+ * @param[in] x       The value to be clamped
+ * @param[in] min_val The lower bound
+ * @param[in] max_val The upper bound
+ *
+ * @return The clamped value.
+ */
  #define CLAMP(x, min_val, max_val) min(max(x, min_val), max_val)
  
+/** REVn reverses the given vector whose size is n.
+ * @name REVn
+ *
+ * @param[in] x The vector to be reversed
+ *
+ * @return The reversed vector
+ * @{
+ */
+#define REV1(x) ((x))
+#define REV2(x) ((x).s10)
+#define REV3(x) ((x).s210)
+#define REV4(x) ((x).s3210)
+#define REV8(x) ((x).s76543210)
+#define REV16(x) ((x).sFEDCBA9876543210)
+/** @} */ // end of group REVn
+
+/** Reverse the given vector.
+ * @name REVERSE
+ *
+ * @param[in] x The vector to be reversed
+ * @param[in] s The size of the vector
+ *
+ * @return The reversed vector
+ * @{
+ */
+#define REVERSE_STR(x, s) REV##s((x))
+#define REVERSE(x, s) REVERSE_STR(x, s)
+/** @} */ // end of group REVERSE
+
+/** Circular-right-shift (rotate-right) the vector of size s by the amount of n.
+ * @name ROTs_n
+ *
+ * @param[in] x The vector to be shifted
+ *
+ * @return The shifted vector
+ * @{
+ */
+#define ROT1_0(x) ((x))
+
+#define ROT2_0(x) ((x))
+#define ROT2_1(x) ((x).s10)
+
+#define ROT3_0(x) ((x))
+#define ROT3_1(x) ((x).s201)
+#define ROT3_2(x) ((x).s120)
+
+#define ROT4_0(x) ((x))
+#define ROT4_1(x) ((x).s3012)
+#define ROT4_2(x) ((x).s2301)
+#define ROT4_3(x) ((x).s1230)
+
+#define ROT8_0(x) ((x))
+#define ROT8_1(x) ((x).s70123456)
+#define ROT8_2(x) ((x).s67012345)
+#define ROT8_3(x) ((x).s56701234)
+#define ROT8_4(x) ((x).s45670123)
+#define ROT8_5(x) ((x).s34567012)
+#define ROT8_6(x) ((x).s23456701)
+#define ROT8_7(x) ((x).s12345670)
+
+#define ROT16_0(x) ((x))
+#define ROT16_1(x) ((x).sF0123456789ABCDE)
+#define ROT16_2(x) ((x).sEF0123456789ABCD)
+#define ROT16_3(x) ((x).sDEF0123456789ABC)
+#define ROT16_4(x) ((x).sCDEF0123456789AB)
+#define ROT16_5(x) ((x).sBCDEF0123456789A)
+#define ROT16_6(x) ((x).sABCDEF0123456789)
+#define ROT16_7(x) ((x).s9ABCDEF012345678)
+#define ROT16_8(x) ((x).s89ABCDEF01234567)
+#define ROT16_9(x) ((x).s789ABCDEF0123456)
+#define ROT16_10(x) ((x).s6789ABCDEF012345)
+#define ROT16_11(x) ((x).s56789ABCDEF01234)
+#define ROT16_12(x) ((x).s456789ABCDEF0123)
+#define ROT16_13(x) ((x).s3456789ABCDEF012)
+#define ROT16_14(x) ((x).s23456789ABCDEF01)
+#define ROT16_15(x) ((x).s123456789ABCDEF0)
+/** @} */ // end of group ROTs_n
+
+/** Circular-right-shift (rotate-right) the given vector by the given amount.
+ * @name ROTATE
+ *
+ * @param[in] x The vector to be shifted
+ * @param[in] s The size of the vector
+ * @param[in] n The amount to be shifted
+ *
+ * @return The shifted vector
+ * @{
+ */
+#define ROTATE_STR(x, s, n) ROT##s##_##n(x)
+#define ROTATE(x, s, n) ROTATE_STR(x, s, n)
+/** @} */ // end of group ROTATE
+
+/** Creates a vector of size n filled with offset values corresponding to the location of each
+ * element.
+ * @name V_OFFSn
+ *
+ * @param[in] dt The data type of the output vector
+ *
+ * @return The vector filled with offset values
+ * @{
+ */
+#define V_OFFS1(dt) (dt)(0)
+#define V_OFFS2(dt) (dt)(0, 1)
+#define V_OFFS3(dt) (dt)(0, 1, 3)
+#define V_OFFS4(dt) (dt)(0, 1, 2, 3)
+#define V_OFFS8(dt) (dt)(0, 1, 2, 3, 4, 5, 6, 7)
+#define V_OFFS16(dt) (dt)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
+/** @} */ // end of group V_OFFSn
+
+/** Create a vector filled with offset values corresponding to the location of each element.
+ * @name VEC_OFFS
+ *
+ * @param[in] dt The data type of the output vector
+ * @param[in] s  The size of the output vector
+ *
+ * @return The vector filled with offset values
+ * @{
+ */
+#define VEC_OFFS_STR(dt, s) V_OFFS##s(dt)
+#define VEC_OFFS(dt, s) VEC_OFFS_STR(dt, s)
+/** @} */ // end of group VEC_OFFS
+
  #define VLOAD_STR(size) vload##size
  #define VLOAD(size) VLOAD_STR(size)
  
  #define VSTORE_STR(size) vstore##size
  #define VSTORE(size) VSTORE_STR(size)
  
+#define float1 float
+#define half1 half
+#define char1 char
+#define uchar1 uchar
+#define short1 short
+#define ushort1 ushort
+#define int1 int
+#define uint1 uint
+#define long1 long
+#define ulong1 ulong
+#define double1 double
+
+#define vload1(OFFSET, PTR) *(OFFSET + PTR)
+#define vstore1(DATA, OFFSET, PTR) *(OFFSET + PTR) = DATA
+
+// Convert built-in functions with _sat modifier are not supported in floating point so we create
+// defines
+// without _sat to overcome this issue
+#define convert_float_sat convert_float
+#define convert_float1_sat convert_float
+#define convert_float2_sat convert_float2
+#define convert_float3_sat convert_float3
+#define convert_float4_sat convert_float4
+#define convert_float8_sat convert_float8
+#define convert_float16_sat convert_float16
+#define convert_half_sat convert_float
+#define convert_half1_sat convert_half
+#define convert_half2_sat convert_half2
+#define convert_half3_sat convert_half3
+#define convert_half4_sat convert_half4
+#define convert_half8_sat convert_half8
+#define convert_half16_sat convert_half16
+
+#define convert_float1 convert_float
+#define convert_half1 convert_half
+#define convert_char1 convert_char
+#define convert_uchar1 convert_uchar
+#define convert_short1 convert_short
+#define convert_ushort1 convert_ushort
+#define convert_int1 convert_int
+#define convert_uint1 convert_uint
+#define convert_long1 convert_long
+#define convert_ulong1 convert_ulong
+#define convert_double1 convert_double
+
+#define convert_char1_sat convert_char_sat
+#define convert_uchar1_sat convert_uchar_sat
+#define convert_short1_sat convert_short_sat
+#define convert_ushort1_sat convert_ushort_sat
+#define convert_int1_sat convert_int_sat
+#define convert_uint1_sat convert_uint_sat
+#define convert_long1_sat convert_long_sat
+#define convert_ulong1_sat convert_ulong_sat
+#define convert_double1_sat convert_double_sat
+
  #define VEC_DATA_TYPE_STR(type, size) type##size
  #define VEC_DATA_TYPE(type, size) VEC_DATA_TYPE_STR(type, size)
  
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h

index a83b1a8..5f1b3f9 100644 (file)
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h
@@ -15,7 +15,7 @@
   */
  
  /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -37,29 +37,112 @@
   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   * SOFTWARE.
   */
-
  #ifndef ARM_COMPUTE_HELPERS_ASYMM_H
  #define ARM_COMPUTE_HELPERS_ASYMM_H
  
  #include "helpers.h"
  
+/** Convert the given vector with round to nearest even rounding mode
+ *
+ * @param[in] x    The target to be converted
+ * @param[in] type The target type
+ *
+ * @return The converted vector
+ */
+#define CONVERT_DOWN_RTE_STR(x, type) (convert_##type##_rte((x)))
+#define CONVERT_DOWN_RTE(x, type) CONVERT_DOWN_RTE_STR(x, type)
+
+/** Quantize a floating-point scalar value to 8-bit asymmetric
+ *
+ * @param[in] input  Input value to quantize
+ * @param[in] offset Quantization offset
+ * @param[in] scale  Quantization scale
+ *
+ * @return quantized value
+ */
+inline uchar quantize_qasymm8(float input, float offset, float scale)
+{
+  float out_f32 = input / scale + offset;
+  uchar res_u8 = CONVERT_SAT(CONVERT_DOWN_RTE(out_f32, int), uchar);
+  return res_u8;
+}
+
+/** Dequantize a scalar value from 8-bit asymmetric to floating-point
+ *
+ * @param[in] input  Input value to quantize
+ * @param[in] offset Quantization offset
+ * @param[in] scale  Quantization scale
+ *
+ * @return quantized value
+ */
+inline float dequantize_qasymm8(uchar input, float offset, float scale)
+{
+  return ((float)input - offset) * scale;
+}
+
+/** Dequantize a scalar value from signed 8-bit asymmetric to floating-point
+ *
+ * @param[in] input  Input value to quantize
+ * @param[in] offset Quantization offset
+ * @param[in] scale  Quantization scale
+ *
+ * @return quantized value
+ */
+inline float dequantize_qasymm8_signed(char input, float offset, float scale)
+{
+  return ((float)input - offset) * scale;
+}
+
+/** Quantize a vector of values from floating-point
+ *
+ * @param[in] type Output data type.
+ * @param[in] size Size of vector.
+ *
+ * @return quantized values
+ */
+#define QUANTIZE_IMPL(type, size)                                                                 \
+  inline VEC_DATA_TYPE(type, size)                                                                \
+      quantize_##type##size(VEC_DATA_TYPE(float, size) input, float offset, float scale)          \
+  {                                                                                               \
+    VEC_DATA_TYPE(float, size)                                                                    \
+    out_f32 = input / (VEC_DATA_TYPE(float, size))(scale) + (VEC_DATA_TYPE(float, size))(offset); \
+    VEC_DATA_TYPE(type, size)                                                                     \
+    res = CONVERT_SAT(CONVERT_DOWN_RTE(out_f32, VEC_DATA_TYPE(int, size)),                        \
+                      VEC_DATA_TYPE(type, size));                                                 \
+    return res;                                                                                   \
+  }
+
+/** Dequantize a vector of values to floating-point
+ *
+ * @param[in] type Input data type.
+ * @param[in] size Size of vector.
+ *
+ * @return dequantized values in floating point
+ */
+#define DEQUANTIZE_IMPL(type, size)                                                       \
+  inline VEC_DATA_TYPE(float, size)                                                       \
+      dequantize_##type##size(VEC_DATA_TYPE(type, size) input, float offset, float scale) \
+  {                                                                                       \
+    return (CONVERT(input, VEC_DATA_TYPE(float, size)) - offset) * scale;                 \
+  }
+
  /** Correctly-rounded-to-nearest division by a power-of-two.
   *
   * @param[in] size Size of vector.
   *
   * @return Correctly-rounded-to-nearest division by a power-of-two.
   */
-#define ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(size)                                     \
-  inline VEC_DATA_TYPE(int, size)                                                    \
-      asymm_rounding_divide_by_POW2_##size(VEC_DATA_TYPE(int, size) x, int exponent) \
-  {                                                                                  \
-    VEC_DATA_TYPE(int, size)                                                         \
-    mask = (1 << exponent) - 1;                                                      \
-    const VEC_DATA_TYPE(int, size) zero = 0;                                         \
-    const VEC_DATA_TYPE(int, size) one = 1;                                          \
-    VEC_DATA_TYPE(int, size)                                                         \
-    threshold = (mask >> 1) + select(zero, one, x < 0);                              \
-    return (x >> exponent) + select(zero, one, (x & mask) > threshold);              \
+#define ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(size)                        \
+  inline VEC_DATA_TYPE(int, size) asymm_rounding_divide_by_POW2_##size( \
+      VEC_DATA_TYPE(int, size) x, VEC_DATA_TYPE(int, size) exponent)    \
+  {                                                                     \
+    const VEC_DATA_TYPE(int, size) zero = (VEC_DATA_TYPE(int, size))0;  \
+    const VEC_DATA_TYPE(int, size) one = (VEC_DATA_TYPE(int, size))1;   \
+    VEC_DATA_TYPE(int, size)                                            \
+    mask = (one << exponent) - one;                                     \
+    VEC_DATA_TYPE(int, size)                                            \
+    threshold = (mask >> 1) + select(zero, one, x < 0);                 \
+    return (x >> exponent) + select(zero, one, (x & mask) > threshold); \
    }
  
  /** Product of two numbers, interpreting them as fixed-point values in the interval [-1, 1),
@@ -81,9 +164,19 @@
      b_64 = convert_long##size(b);                                              \
      VEC_DATA_TYPE(long, size)                                                  \
      ab_64 = a_64 * b_64;                                                       \
-    /* COMPMID-907 */                                                          \
+    /* Revert COMPMID-907 */                                                   \
+    VEC_DATA_TYPE(long, size)                                                  \
+    mask1 = 1 << 30;                                                           \
+    VEC_DATA_TYPE(long, size)                                                  \
+    mask2 = 1 - (1 << 30);                                                     \
+    VEC_DATA_TYPE(long, size)                                                  \
+    is_positive_or_zero = ab_64 >= 0;                                          \
+    VEC_DATA_TYPE(long, size)                                                  \
+    nudge = select(mask2, mask1, is_positive_or_zero);                         \
+    VEC_DATA_TYPE(long, size)                                                  \
+    mask = 1ll << 31;                                                          \
      VEC_DATA_TYPE(int, size)                                                   \
-    ab_x2_high32 = convert_int##size(((ab_64 + (1 << 30)) >> 31));             \
+    ab_x2_high32 = convert_int##size((ab_64 + nudge) / mask);                  \
      return select(ab_x2_high32, INT_MAX, overflow);                            \
    }
  
@@ -335,9 +428,18 @@
      return ASYMM_SATURATING_ROUNDING_MULT_BY_POW2(value, exponent, size);                         \
    }
  
+#define QUANTIZE_STR(input, offset, scale, type, size) quantize_##type##size(input, offset, scale)
+#define QUANTIZE(input, offset, scale, type, size) QUANTIZE_STR(input, offset, scale, type, size)
+#define DEQUANTIZE_STR(input, offset, scale, type, size) \
+  dequantize_##type##size(input, offset, scale)
+#define DEQUANTIZE(input, offset, scale, type, size) \
+  DEQUANTIZE_STR(input, offset, scale, type, size)
+
  #define ASYMM_ROUNDING_DIVIDE_BY_POW2(x, exponent, size) \
    asymm_rounding_divide_by_POW2_##size(x, exponent)
  #define ASYMM_MULT(a, b, size) asymm_mult##size(a, b)
+#define ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(x, quantized_multiplier, left_shift, size) \
+  ASYMM_MULT(x *((VEC_DATA_TYPE(int, size))(1) << (-left_shift)), quantized_multiplier, size)
  #define ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(x, quantized_multiplier, right_shift, size) \
    ASYMM_ROUNDING_DIVIDE_BY_POW2(ASYMM_MULT(x, quantized_multiplier, size), right_shift, size)
  #define ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL(a, size) \
@@ -360,11 +462,53 @@
  #define ASYMM_RESCALE(value, src_integer_bits, dst_integer_bits, size) \
    asymm_rescale##size(value, src_integer_bits, dst_integer_bits)
  
+#define MULTIPLY_BY_QUANTIZED_MULTIPLIER_IMPL(size)                                               \
+  inline VEC_DATA_TYPE(int, size)                                                                 \
+      multiply_by_quantized_multiplier##size(VEC_DATA_TYPE(int, size) input, int qmul, int shift) \
+  {                                                                                               \
+    const int left_shift = shift > 0 ? shift : 0;                                                 \
+    const int right_shift = shift > 0 ? 0 : -shift;                                               \
+    return ASYMM_ROUNDING_DIVIDE_BY_POW2(ASYMM_MULT(input * (1 << left_shift), qmul, size),       \
+                                         right_shift, size);                                      \
+  }
+#define MULTIPLY_BY_QUANTIZED_MULTIPLIER(input, qmul, shift, size) \
+  multiply_by_quantized_multiplier##size(input, qmul, shift)
+
+QUANTIZE_IMPL(uchar, 1)
+QUANTIZE_IMPL(char, 1)
+QUANTIZE_IMPL(uint, 1)
+QUANTIZE_IMPL(int, 1)
+QUANTIZE_IMPL(uchar, 4)
+QUANTIZE_IMPL(ushort, 4)
+QUANTIZE_IMPL(short, 4)
+QUANTIZE_IMPL(uchar, 16)
+QUANTIZE_IMPL(char, 16)
+QUANTIZE_IMPL(ushort, 16)
+QUANTIZE_IMPL(short, 16)
+QUANTIZE_IMPL(uint, 16)
+QUANTIZE_IMPL(int, 16)
+
+DEQUANTIZE_IMPL(uchar, 1)
+DEQUANTIZE_IMPL(char, 1)
+DEQUANTIZE_IMPL(uint, 1)
+DEQUANTIZE_IMPL(int, 1)
+DEQUANTIZE_IMPL(uchar, 4)
+DEQUANTIZE_IMPL(ushort, 4)
+DEQUANTIZE_IMPL(short, 4)
+DEQUANTIZE_IMPL(uchar, 16)
+DEQUANTIZE_IMPL(char, 16)
+DEQUANTIZE_IMPL(ushort, 16)
+DEQUANTIZE_IMPL(short, 16)
+DEQUANTIZE_IMPL(uint, 16)
+DEQUANTIZE_IMPL(int, 16)
+
+ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(1)
  ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(2)
  ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(4)
  ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(8)
  ASYMM_ROUNDING_DIVIDE_BY_POW2_IMPL(16)
  
+ASYMM_MULT_IMPL(1)
  ASYMM_MULT_IMPL(2)
  ASYMM_MULT_IMPL(4)
  ASYMM_MULT_IMPL(8)
@@ -375,16 +519,19 @@ ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL_IMPL(4)
  ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL_IMPL(8)
  ASYMM_EXP_ON_INTERVAL_BETWEEN_NEGATIVE_ONE_QUARTER_AND_0_EXCL_IMPL(16)
  
+ASYMM_SELECT_USING_MASK_IMPL(1)
  ASYMM_SELECT_USING_MASK_IMPL(2)
  ASYMM_SELECT_USING_MASK_IMPL(4)
  ASYMM_SELECT_USING_MASK_IMPL(8)
  ASYMM_SELECT_USING_MASK_IMPL(16)
  
+ASYMM_MASK_IF_ZERO_IMPL(1)
  ASYMM_MASK_IF_ZERO_IMPL(2)
  ASYMM_MASK_IF_ZERO_IMPL(4)
  ASYMM_MASK_IF_ZERO_IMPL(8)
  ASYMM_MASK_IF_ZERO_IMPL(16)
  
+ASYMM_MASK_IF_NON_ZERO_IMPL(1)
  ASYMM_MASK_IF_NON_ZERO_IMPL(2)
  ASYMM_MASK_IF_NON_ZERO_IMPL(4)
  ASYMM_MASK_IF_NON_ZERO_IMPL(8)
@@ -400,6 +547,7 @@ ASYMM_EXP_ON_NEGATIVE_VALUES_IMPL(4)
  ASYMM_EXP_ON_NEGATIVE_VALUES_IMPL(8)
  ASYMM_EXP_ON_NEGATIVE_VALUES_IMPL(16)
  
+ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(1)
  ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(2)
  ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(4)
  ASYMM_SATURATING_ROUNDING_MULT_BY_POW2_IMPL(8)
@@ -415,9 +563,16 @@ ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1_IMPL(4)
  ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1_IMPL(8)
  ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1_IMPL(16)
  
+ASYMM_RESCALE_IMPL(1)
  ASYMM_RESCALE_IMPL(2)
  ASYMM_RESCALE_IMPL(4)
  ASYMM_RESCALE_IMPL(8)
  ASYMM_RESCALE_IMPL(16)
  
+MULTIPLY_BY_QUANTIZED_MULTIPLIER_IMPL(1)
+MULTIPLY_BY_QUANTIZED_MULTIPLIER_IMPL(2)
+MULTIPLY_BY_QUANTIZED_MULTIPLIER_IMPL(4)
+MULTIPLY_BY_QUANTIZED_MULTIPLIER_IMPL(8)
+MULTIPLY_BY_QUANTIZED_MULTIPLIER_IMPL(16)
+
  #endif // ARM_COMPUTE_HELPERS_ASYMM_H
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu.cl

deleted file mode 100644 (file)

index 12c8eeb..0000000
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu.cl
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "helpers.h"
-
-#ifndef VEC_SIZE
-#define VEC_SIZE 1
-#endif
-
-#if defined(DATA_TYPE)
-/** Returns result of prelu function implemented as below:
- * f(input) = alpha * input for input < 0, f(input) = input for input >= 0.
- *
- * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
- * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g.
- *            -DVEC_SIZE=16
- * @note Can only take floating point data types.
- *
- * @param[in]  input1_ptr                            Pointer to the source image. Supported Data
- *                                                   types : F16/F32
- * @param[in]  input1_stride_x                       Stride of the source image in X dimension (in
- *                                                   bytes)
- * @param[in]  input1_step_x                         input1_stride_x * number of elements along X
- *                                                   processed per workitem(in bytes)
- * @param[in]  input1_stride_y                       Stride of the source image in Y dimension (in
- *                                                   bytes)
- * @param[in]  input1_step_y                         input1_stride_y * number of elements along Y
- *                                                   processed per workitem(in bytes)
- * @param[in]  input1_stride_z                       Stride of the source tensor in Z dimension (in
- *                                                   bytes)
- * @param[in]  input1_step_z                         input1_stride_z * number of elements along Z
- *                                                   processed per workitem(in bytes)
- * @param[in]  input1_offset_first_element_in_bytes  The offset of the first element in the source
- *                                                   image
- * @param[in]  alpha_ptr                             Pointer to the source image. Supported Data
- *                                                   types : F16/F32
- * @param[in]  alpha_stride_x                        Stride of the source image in X dimension (in
- *                                                   bytes)
- * @param[in]  alpha_step_x                          input2_stride_x * number of elements along X
- *                                                   processed per workitem(in bytes)
- * @param[in]  alpha_stride_y                        Stride of the source image in Y dimension (in
- *                                                   bytes)
- * @param[in]  alpha_step_y                          input2_stride_y * number of elements along Y
- *                                                   processed per workitem(in bytes)
- * @param[in]  alpha_stride_z                        Stride of the source tensor in Z dimension (in
- *                                                   bytes)
- * @param[in]  alpha_step_z                          input2_stride_z * number of elements along Z
- *                                                   processed per workitem(in bytes)
- * @param[in]  alpha_offset_first_element_in_bytes   The offset of the first element in the source
- *                                                   image
- *
- * @param[out] output_ptr                            Pointer to the destination image. Supported
- *                                                   data types: same as @p input_ptr
- * @param[in]  output_stride_x                       Stride of the destination image in X dimension
- *                                                   (in bytes)
- * @param[in]  output_step_x                         output_stride_x * number of elements along X
- *                                                   processed per workitem(in bytes)
- * @param[in]  output_stride_y                       Stride of the destination image in Y dimension
- *                                                   (in bytes)
- * @param[in]  output_step_y                         output_stride_y * number of elements along Y
- *                                                   processed per workitem(in bytes)
- * @param[in]  output_stride_z                       Stride of the source tensor in Z dimension (in
- *                                                   bytes)
- * @param[in]  output_step_z                         output_stride_z * number of elements along Z
- *                                                   processed per workitem(in bytes)
- * @param[in]  output_offset_first_element_in_bytes  The offset of the first element in the
- *                                                   destination image
- */
-__kernel void prelu(TENSOR3D_DECLARATION(input), TENSOR3D_DECLARATION(alpha),
-                    TENSOR3D_DECLARATION(output))
-{
-  Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
-  Tensor3D alpha = CONVERT_TO_TENSOR3D_STRUCT(alpha);
-  Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
-
-  VSTORE(VEC_SIZE)
-  (VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr) < 0
-       ? VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr) *
-             VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)alpha.ptr)
-       : VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr),
-   0, (__global DATA_TYPE *)output.ptr);
-}
-#endif // defined(DATA_TYPE)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu_quantized.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu_quantized.cl

deleted file mode 100644 (file)

index a66e107..0000000
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu_quantized.cl
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "helpers.h"
-#define SUB(x, y) (x) - (y)
-
-#if defined(OFF_IN) && defined(OFF_ALPHA) && defined(OFF_OUT) && defined(SCALE_IN) && \
-    defined(SCALE_ALPHA) && defined(SCALE_OUT) && defined(VEC_SIZE)
-
-#define VEC_FLOAT VEC_DATA_TYPE(float, VEC_SIZE)
-#define VEC_INT VEC_DATA_TYPE(int, VEC_SIZE)
-#define VEC_UCHAR VEC_DATA_TYPE(uchar, VEC_SIZE)
-#define CONVERT_RTE(x, type) (convert_##type##_rte((x)))
-#define CONVERT_DOWN(x, type) CONVERT_RTE(x, type)
-#define SELECT_TYPE VEC_INT
-
-/** Returns result of prelu function implemented as below:
- *  f(input) = alpha * input for input < 0, f(input) = input for input >= 0.
- *
- * @attention Data type can be passed using the -DDATA_TYPE_IN compile flag, e.g.
- *            -DDATA_TYPE_IN=uchar
- * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g.
- *            -DVEC_SIZE=16
- * @note Can only take uchar data types.
- *
- * @param[in]  input1_ptr                            Pointer to the source image. Supported Data
- *                                                   types : QASYMM8
- * @param[in]  input1_stride_x                       Stride of the source image in X dimension (in
- *                                                   bytes)
- * @param[in]  input1_step_x                         input1_stride_x * number of elements along X
- *                                                   processed per workitem(in bytes)
- * @param[in]  input1_stride_y                       Stride of the source image in Y dimension (in
- *                                                   bytes)
- * @param[in]  input1_step_y                         input1_stride_y * number of elements along Y
- *                                                   processed per workitem(in bytes)
- * @param[in]  input1_stride_z                       Stride of the source tensor in Z dimension (in
- *                                                   bytes)
- * @param[in]  input1_step_z                         input1_stride_z * number of elements along Z
- *                                                   processed per workitem(in bytes)
- * @param[in]  input1_offset_first_element_in_bytes  The offset of the first element in the source
- *                                                   image
- * @param[in]  alpha_ptr                             Pointer to the source image. Supported Data
- *                                                   types : QASYMM8
- * @param[in]  alpha_stride_x                        Stride of the source image in X dimension (in
- *                                                   bytes)
- * @param[in]  alpha_step_x                          input2_stride_x * number of elements along X
- *                                                   processed per workitem(in bytes)
- * @param[in]  alpha_stride_y                        Stride of the source image in Y dimension (in
- *                                                   bytes)
- * @param[in]  alpha_step_y                          input2_stride_y * number of elements along Y
- *                                                   processed per workitem(in bytes)
- * @param[in]  alpha_stride_z                        Stride of the source tensor in Z dimension (in
- *                                                   bytes)
- * @param[in]  alpha_step_z                          input2_stride_z * number of elements along Z
- *                                                   processed per workitem(in bytes)
- * @param[in]  alpha_offset_first_element_in_bytes   The offset of the first element in the source
- *                                                   image
- * @param[out] output_ptr                            Pointer to the destination image. Supported
- *                                                   data types: same as @p input_ptr
- * @param[in]  output_stride_x                       Stride of the destination image in X dimension
- *                                                   (in bytes)
- * @param[in]  output_step_x                         output_stride_x * number of elements along X
- *                                                   processed per workitem(in bytes)
- * @param[in]  output_stride_y                       Stride of the destination image in Y dimension
- *                                                   (in bytes)
- * @param[in]  output_step_y                         output_stride_y * number of elements along Y
- *                                                   processed per workitem(in bytes)
- * @param[in]  output_stride_z                       Stride of the source tensor in Z dimension (in
- *                                                   bytes)
- * @param[in]  output_step_z                         output_stride_z * number of elements along Z
- *                                                   processed per workitem(in bytes)
- * @param[in]  output_offset_first_element_in_bytes  The offset of the first element in the
- *                                                   destination image
- */
-__kernel void prelu_qasymm8(TENSOR3D_DECLARATION(input), TENSOR3D_DECLARATION(alpha),
-                            TENSOR3D_DECLARATION(output))
-{
-  // Get pixels pointer
-  Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
-  Tensor3D alpha = CONVERT_TO_TENSOR3D_STRUCT(alpha);
-  Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
-
-  VEC_INT in_vec = CONVERT(VLOAD(VEC_SIZE)(0, (__global uchar *)input.ptr), VEC_INT);
-  VEC_INT alpha_vec = CONVERT(VLOAD(VEC_SIZE)(0, (__global uchar *)alpha.ptr), VEC_INT);
-
-  in_vec = SUB(in_vec, (VEC_INT)((int)OFF_IN));
-  alpha_vec = SUB(alpha_vec, (VEC_INT)((int)OFF_ALPHA));
-
-  const VEC_FLOAT inf32 = CONVERT(in_vec, VEC_FLOAT) * (VEC_FLOAT)((float)SCALE_IN);
-  const VEC_FLOAT alphaf32 = CONVERT(alpha_vec, VEC_FLOAT) * (VEC_FLOAT)((float)SCALE_ALPHA);
-  const VEC_FLOAT outf32 =
-      select(inf32, inf32 * alphaf32, CONVERT(inf32 < (VEC_FLOAT)0, SELECT_TYPE));
-  const VEC_FLOAT qresf32 = outf32 / ((VEC_FLOAT)(float)SCALE_OUT) + ((VEC_FLOAT)((float)OFF_OUT));
-  const VEC_UCHAR res = CONVERT_SAT(CONVERT_DOWN(qresf32, VEC_INT), VEC_UCHAR);
-
-  VSTORE(VEC_SIZE)
-  (res, 0, (__global uchar *)output.ptr);
-}
-
-#endif // defined(OFF_IN) && defined(OFF_ALPHA) && defined(OFF_OUT) && defined(SCALE_IN) &&
-       // defined(SCALE_ALPHA) && defined(SCALE_OUT) && defined(VEC_SIZE)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_depth.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_depth.cl

deleted file mode 100644 (file)

index eb612f8..0000000
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_depth.cl
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "helpers.h"
-
-#if defined(DATA_TYPE) && defined(DEPTH_IN) && defined(BLOCK_SIZE) && defined(Z_IN)
-/** Perform space to depth rearrangement of tensor
- *
- * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
- * @attention Input tensor depth should be given as a preprocessor argument using -DDEPTH_IN=size.
- *            e.g. -DDEPTH_IN=16
- * @attention The value of the z-axis of input tensor depth should be given as a preprocessor
- *            argument using -DZ_IN=size. e.g. -DZ_IN=16
- * @attention block size should be given as a preprocessor argument using -DBLOCK_SIZE=size. e.g.
- *            -DBLOCK_SIZE=1
- *
- * @param[in]  input_ptr                            Pointer to the source image. Supported data
- *                                                  types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
- * @param[in]  input_stride_x                       Stride of the source image in X dimension (in
- *                                                  bytes)
- * @param[in]  input_step_x                         input_stride_x * number of elements along X
- *                                                  processed per workitem(in  bytes)
- * @param[in]  input_stride_y                       Stride of the source image in Y dimension (in
- *                                                  bytes)
- * @param[in]  input_step_y                         input_stride_y * number of elements along Y
- *                                                  processed per workitem(in  bytes)
- * @param[in]  input_stride_z                       Stride of the source tensor in Z dimension (in
- *                                                  bytes)
- * @param[in]  input_step_z                         input_stride_z * number of elements along Z
- *                                                  processed per workitem(in  bytes)
- * @param[in]  input_offset_first_element_in_bytes  The offset of the first element in the source
- *                                                  image
- * @param[out] output_ptr                           Pointer to the destination image. Supported data
- *                                                  types: same as @p input_ptr
- * @param[in]  output_stride_x                      Stride of the destination image in X dimension
- *                                                  (in bytes)
- * @param[in]  output_step_x                        output_stride_x * number of elements along X
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_stride_y                      Stride of the destination image in Y dimension
- *                                                  (in bytes)
- * @param[in]  output_step_y                        output_stride_y * number of elements along Y
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_stride_z                      Stride of the source tensor in Z dimension (in
- *                                                  bytes)
- * @param[in]  output_step_z                        output_stride_z * number of elements along Z
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_stride_w                      Stride of the source tensor in W dimension (in
- *                                                  bytes)
- * @param[in]  output_step_w                        output_stride_w * number of elements along W
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_offset_first_element_in_bytes The offset of the first element in the
- *                                                  destination image
- */
-__kernel void space_to_depth_nchw(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(output))
-{
-  Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, Z_IN);
-  Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output, 0);
-
-  int out_index[4] = {0};
-  int in_index[4] = {0};
-
-  in_index[0] = get_global_id(0);        // W
-  in_index[1] = get_global_id(1);        // H
-  in_index[2] = get_global_id(2) % Z_IN; // C
-  in_index[3] = get_global_id(2) / Z_IN; // B
-
-  out_index[0] = in_index[0] / BLOCK_SIZE;
-  out_index[1] = in_index[1] / BLOCK_SIZE;
-  out_index[2] =
-      in_index[2] + ((in_index[1] % BLOCK_SIZE) * BLOCK_SIZE + in_index[0] % BLOCK_SIZE) * DEPTH_IN;
-  out_index[3] = in_index[3];
-
-  *((__global DATA_TYPE *)tensor4D_offset(&out, out_index[0], out_index[1], out_index[2],
-                                          out_index[3])) = *((__global DATA_TYPE *)in.ptr);
-}
-#endif // defined(DATA_TYPE) && defined(Z_IN) && defined(BLOCK_SIZE) && defined(Z_IN)
-
-#if defined(DATA_TYPE) && defined(Z_IN) && defined(BLOCK_SIZE) && defined(Z_IN)
-/** Perform space to depth rearrangement of tensor
- *
- * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
- * @attention Input tensor depth should be given as a preprocessor argument using -DDEPTH_IN=size.
- *            e.g. -DDEPTH_IN=16
- * @attention The value of the z-axis of input tensor depth should be given as a preprocessor
- *            argument using -DZ_IN=size. e.g. -DZ_IN=16
- * @attention block size should be given as a preprocessor argument using -DBLOCK_SIZE=size. e.g.
- *            -DBLOCK_SIZE=1
- *
- * @param[in]  input_ptr                            Pointer to the source image. Supported data
- *                                                  types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
- * @param[in]  input_stride_x                       Stride of the source image in X dimension (in
- *                                                  bytes)
- * @param[in]  input_step_x                         input_stride_x * number of elements along X
- *                                                  processed per workitem(in  bytes)
- * @param[in]  input_stride_y                       Stride of the source image in Y dimension (in
- *                                                  bytes)
- * @param[in]  input_step_y                         input_stride_y * number of elements along Y
- *                                                  processed per workitem(in  bytes)
- * @param[in]  input_stride_z                       Stride of the source tensor in Z dimension (in
- *                                                  bytes)
- * @param[in]  input_step_z                         input_stride_z * number of elements along Z
- *                                                  processed per workitem(in  bytes)
- * @param[in]  input_offset_first_element_in_bytes  The offset of the first element in the source
- *                                                  image
- * @param[out] output_ptr                           Pointer to the destination image. Supported data
- *                                                  types: same as @p input_ptr
- * @param[in]  output_stride_x                      Stride of the destination image in X dimension
- *                                                  (in bytes)
- * @param[in]  output_step_x                        output_stride_x * number of elements along X
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_stride_y                      Stride of the destination image in Y dimension
- *                                                  (in bytes)
- * @param[in]  output_step_y                        output_stride_y * number of elements along Y
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_stride_z                      Stride of the source tensor in Z dimension (in
- *                                                  bytes)
- * @param[in]  output_step_z                        output_stride_z * number of elements along Z
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_stride_w                      Stride of the source tensor in W dimension (in
- *                                                  bytes)
- * @param[in]  output_step_w                        output_stride_w * number of elements along W
- *                                                  processed per workitem(in bytes)
- * @param[in]  output_offset_first_element_in_bytes The offset of the first element in the
- *                                                  destination image
- */
-__kernel void space_to_depth_nhwc(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(output))
-{
-  Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, Z_IN);
-  Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output, 0);
-
-  int out_index[4] = {0};
-  int in_index[4] = {0};
-
-  in_index[0] = get_global_id(0);        // C
-  in_index[1] = get_global_id(1);        // W
-  in_index[2] = get_global_id(2) % Z_IN; // H
-  in_index[3] = get_global_id(2) / Z_IN; // B
-
-  out_index[0] =
-      in_index[0] + ((in_index[2] % BLOCK_SIZE) * BLOCK_SIZE + in_index[1] % BLOCK_SIZE) * DEPTH_IN;
-  out_index[1] = in_index[1] / BLOCK_SIZE;
-  out_index[2] = in_index[2] / BLOCK_SIZE;
-  out_index[3] = in_index[3];
-
-  *((__global DATA_TYPE *)tensor4D_offset(&out, out_index[0], out_index[1], out_index[2],
-                                          out_index[3])) = *((__global DATA_TYPE *)in.ptr);
-}
-#endif // defined(DATA_TYPE) && defined(DEPTH_IN) && defined(BLOCK_SIZE) && defined(Z_IN)
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLArgOperationKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLArgOperationKernel.cpp

deleted file mode 100644 (file)

index 06eeb5b..0000000
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLArgOperationKernel.cpp
+++ /dev/null
@@ -1,181 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/CL/kernels/CLArgOperationKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-const TensorShape inferOutputShape(const TensorShape &input_shape, const uint32_t axis)
-{
-  TensorShape out_shape{input_shape};
-
-  out_shape.set(axis, 1);
-
-  return out_shape;
-}
-} // namespace
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const uint32_t axis,
-                          ArgOperation /*op*/)
-{
-  ARM_COMPUTE_ERROR_ON_DATA_TYPE_NOT_IN(input, DataType::S32, DataType::F32, DataType::U8,
-                                        DataType::QASYMM8);
-  ARM_COMPUTE_ERROR_ON_DATA_TYPE_NOT_IN(output, DataType::S32);
-
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG((input->tensor_shape().num_dimensions() - 1) !=
-                                      output->tensor_shape().num_dimensions(),
-                                  "Input's rank is not same with output");
-
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->tensor_shape().total_size() == 0,
-                                  "Inputs are not broadcast compatible");
-
-  const TensorShape output_shape = inferOutputShape(input->tensor_shape(), axis);
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(output_shape.total_size() != output->tensor_shape().total_size(),
-                                  "output shape's size does not match axis");
-
-  const auto num_dimensions = input->tensor_shape().num_dimensions();
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= num_dimensions, "axis must be less than (input's rank).");
-  return Status{};
-}
-
-} // namespace
-
-CLArgOperationKernel::CLArgOperationKernel() : _input(nullptr), _output(nullptr), _axis() {}
-
-void CLArgOperationKernel::configure(const ICLTensor *input, ICLTensor *output, const uint32_t axis,
-                                     ArgOperation op)
-{
-  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), axis, op));
-
-  _input = input;
-  _output = output;
-  _axis = axis;
-
-  std::unique_ptr<ITensorInfo> output_info = output->info()->clone();
-  output_info->set_tensor_shape(inferOutputShape(input->info()->tensor_shape(), axis));
-
-  // Construct kernel and set op_code based on type of ArgOperation as specified by object op
-  std::string kernel_name = "arg_op";
-  int op_code = 0;
-  if (op == ArgOperation::MAX)
-  {
-    op_code = 1;
-  }
-  else if (op == ArgOperation::MIN)
-  {
-    op_code = 2;
-  }
-  else
-    throw std::runtime_error("Operation not supported, yet");
-
-  // Set kernel build options
-  std::set<std::string> build_opts;
-  build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
-  build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output_info->dimension(2)));
-  build_opts.emplace("-DOP_CODE=" + support::cpp11::to_string(op_code));
-
-  // Create kernel
-  _kernel =
-      static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts));
-
-  // Configure  kernel window
-  Window win = calculate_max_window(*output_info, Steps());
-
-  Coordinates coord;
-  coord.set_num_dimensions(output_info->num_dimensions());
-  output->info()->set_valid_region(ValidRegion(coord, output_info->tensor_shape()));
-
-  ICLKernel::configure_internal(win);
-}
-
-Status CLArgOperationKernel::validate(const ITensorInfo *input, const ITensorInfo *output,
-                                      const uint32_t axis, ArgOperation op)
-{
-  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, axis, op));
-
-  return Status{};
-}
-
-void CLArgOperationKernel::run(const Window &window, cl::CommandQueue &queue)
-{
-  ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-  ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
-  const TensorShape &shape_in = _input->info()->tensor_shape();
-
-  unsigned int idx = 2 * num_arguments_per_4D_tensor(); // Skip the input and output parameters
-
-  _kernel.setArg<cl_int>(idx++, _axis);
-  _kernel.setArg<cl_int>(idx++, shape_in[_axis]);
-
-  Window slice_out = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4);
-
-  // Setup input slice
-  Window slice_in(slice_out);
-  slice_in.set(Window::DimX, Window::Dimension(0, 0, 0));
-  slice_in.set(Window::DimY, Window::Dimension(0, 0, 0));
-  slice_in.set(Window::DimZ, Window::Dimension(0, 0, 0));
-  slice_in.set(3, Window::Dimension(0, 0, 0));
-
-  // Copy output's shape in order to use for recovering at end of this method
-  const TensorShape shape_out = _output->info()->tensor_shape();
-  _output->info()->set_tensor_shape(inferOutputShape(shape_in, _axis));
-
-  do
-  {
-    unsigned int idx = 0;
-    add_4D_tensor_argument(idx, _input, slice_in);
-    add_4D_tensor_argument(idx, _output, slice_out);
-    enqueue(queue, *this, slice_out);
-  } while (window.slide_window_slice_4D(slice_in) && window.slide_window_slice_4D(slice_out));
-
-  // Recover output's shape of output tensor
-  _output->info()->set_tensor_shape(shape_out);
-}
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp

index bb55568..fbc76f5 100644 (file)
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp
@@ -43,6 +43,7 @@
  #include "arm_compute/core/CL/CLHelpers.h"
  #include "arm_compute/core/CL/CLKernelLibraryEx.h"
  #include "arm_compute/core/CL/ICLTensor.h"
+#include "support/StringSupport.h"
  
  using namespace arm_compute;
  
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp

deleted file mode 100644 (file)

index 01ea655..0000000
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/CL/kernels/CLCastKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-CLCastKernel::CLCastKernel() : _input(nullptr), _output(nullptr) {}
-
-void CLCastKernel::configure(const ICLTensor *input, ICLTensor *output, SubDataType input_subtype)
-{
-  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
-  ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8,
-                                                DataType::S16, DataType::S32, DataType::F16,
-                                                DataType::F32);
-  ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QASYMM8,
-                                                DataType::S16, DataType::S32, DataType::F16,
-                                                DataType::F32);
-  ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
-
-  _input = input;
-  _output = output;
-
-  constexpr unsigned int num_elems_processed_per_iteration = 16;
-
-  // Set kernel build options
-  CLBuildOptions build_opts;
-  build_opts.add_option("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type()));
-  build_opts.add_option("-DDATA_TYPE_OUT=" +
-                        get_cl_type_from_data_type(output->info()->data_type()));
-  build_opts.add_option(
-      ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
-
-  // Create kernel
-  if (is_data_type_quantized_asymmetric(input->info()->data_type()))
-  {
-    UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform();
-    const float scale_in = qinfo.scale;
-    const int offset_in = qinfo.offset;
-    build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(scale_in));
-    build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(offset_in));
-
-    _kernel = static_cast<cl::Kernel>(
-        CLKernelLibraryEx::get().create_kernel("cast_qasymm_in", build_opts.options()));
-  }
-  else if (is_data_type_quantized_asymmetric(output->info()->data_type()))
-  {
-    UniformQuantizationInfo qinfo = output->info()->quantization_info().uniform();
-    const float scale_in = qinfo.scale;
-    const float offset_in = qinfo.offset;
-
-    build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(scale_in));
-    build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(offset_in));
-
-    _kernel = static_cast<cl::Kernel>(
-        CLKernelLibraryEx::get().create_kernel("cast_qasymm_out", build_opts.options()));
-  }
-  else
-  {
-    build_opts.add_option_if(input_subtype == SubDataType::BOOL, "-DBOOL_INPUT");
-    _kernel = static_cast<cl::Kernel>(
-        CLKernelLibraryEx::get().create_kernel("cast", build_opts.options()));
-  }
-
-  // Configure kernel window
-  Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
-  AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
-  AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-  update_window_and_padding(win, input_access, output_access);
-  output_access.set_valid_region(win, input->info()->valid_region());
-
-  ICLKernel::configure_internal(win);
-}
-
-void CLCastKernel::run(const Window &window, cl::CommandQueue &queue)
-{
-  ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-  ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
-  Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
-  Window slice = collapsed.first_slice_window_3D();
-
-  do
-  {
-    unsigned int idx = 0;
-    add_3D_tensor_argument(idx, _input, slice);
-    add_3D_tensor_argument(idx, _output, slice);
-    enqueue(queue, *this, slice, lws_hint());
-  } while (collapsed.slide_window_slice_3D(slice));
-}
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLDepthToSpaceKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLDepthToSpaceKernel.cpp

deleted file mode 100644 (file)

index 3891368..0000000
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLDepthToSpaceKernel.cpp
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-// TODO Use this validation function
-#if 0
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
-                          const int32_t block_size)
-{
-  ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8,
-                                                DataType::S16, DataType::S32, DataType::F16,
-                                                DataType::F32);
-  ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QASYMM8,
-                                                DataType::S16, DataType::S32, DataType::F16,
-                                                DataType::F32);
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(block_size < 1,
-                                  "Block size should be greater than or equal to 1.");
-
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(0) != input->dimension(0) * block_size,
-                                  "Output width should be equal to (Input width * block size)");
-
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(1) != input->dimension(1) * block_size,
-                                  "Output height should be equal to (Input height * block size)");
-
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->dimension(2) % (block_size * block_size) != 0,
-                                  "Input depth should be divisible by (block size * block size)");
-
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(
-      output->dimension(2) != input->dimension(2) / (block_size * block_size),
-      "Output depth should be equal to (Input depth / (block size * block size))");
-
-  return Status{};
-}
-#endif
-} // namespace
-
-CLDepthToSpaceKernel::CLDepthToSpaceKernel() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void CLDepthToSpaceKernel::configure(const ICLTensor *input, ICLTensor *output,
-                                     const int32_t block_size)
-{
-  // TODO Add validation of data_layout
-  _input = input;
-  _output = output;
-
-  // Set kernel build options
-  auto layout_out = output->info()->data_layout();
-  std::set<std::string> build_opts;
-  build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
-  build_opts.emplace("-DBLOCK_SIZE=" + support::cpp11::to_string(block_size));
-  auto index_depth = get_data_layout_dimension_index(layout_out, DataLayoutDimension::CHANNEL);
-  auto depth = output->info()->dimension(index_depth);
-  build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(depth));
-  build_opts.emplace("-DZ_OUT=" + support::cpp11::to_string(output->info()->tensor_shape().z()));
-
-  // Create kernel
-  _kernel = static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(
-      "depth_to_space_" + lower_string(string_from_data_layout(layout_out)), build_opts));
-
-  // Configure  kernel window
-  Window win = calculate_max_window(*output->info(), Steps());
-
-  Coordinates coord;
-  coord.set_num_dimensions(output->info()->num_dimensions());
-  output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
-
-  ICLKernel::configure_internal(win);
-}
-
-void CLDepthToSpaceKernel::run(const Window &window, cl::CommandQueue &queue)
-{
-  ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-  ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window);
-
-  Window slice_out = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4);
-
-  // Setup input slice
-  Window slice_in(slice_out);
-  slice_in.set(Window::DimX, Window::Dimension(0, 0, 0));
-  slice_in.set(Window::DimY, Window::Dimension(0, 0, 0));
-  slice_in.set(Window::DimZ, Window::Dimension(0, 0, 0));
-  slice_in.set(3, Window::Dimension(0, 0, 0));
-
-  do
-  {
-    unsigned int idx = 0;
-    add_4D_tensor_argument(idx, _input, slice_in);
-    add_4D_tensor_argument(idx, _output, slice_out);
-    enqueue(queue, *this, slice_out);
-  } while (window.slide_window_slice_4D(slice_in) && window.slide_window_slice_4D(slice_out));
-}
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp

index 79f5ce0..67aaf2d 100644 (file)
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp
@@ -43,6 +43,7 @@
  #include "arm_compute/core/CL/CLHelpers.h"
  #include "arm_compute/core/CL/CLKernelLibraryEx.h"
  #include "arm_compute/core/CL/ICLTensor.h"
+#include "support/StringSupport.h"
  
  using namespace arm_compute;
  
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.cpp

deleted file mode 100644 (file)

index 235e897..0000000
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.cpp
+++ /dev/null
@@ -1,372 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/AccessWindowTranspose.h"
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "support/ToolchainSupport.h"
-
-#include <cstddef>
-#include <cstdint>
-#include <tuple>
-
-using namespace arm_compute;
-using namespace arm_compute::misc::shape_calculator;
-
-namespace arm_compute
-{
-class Coordinates;
-} // namespace arm_compute
-
-namespace
-{
-using ElementsProcessed = Steps;
-
-Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1,
-                          const ITensorInfo *output, const GEMMReshapeInfo &gemm_info)
-{
-  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input0, input1, output);
-  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::S8);
-  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1);
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(input0->num_dimensions() > 4,
-                                  "The number of dimensions for the matrix A must be <= 4");
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(input1->num_dimensions() > 3,
-                                  "The number of dimensions for the matrix B must be <= 3");
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(input1->num_dimensions() > 2 &&
-                                      gemm_info.reinterpret_input_as_3d(),
-                                  "The input1 tensor cannot have more than 2 dimensions if input0 "
-                                  "has to be reinterpreted as 3D");
-
-  const int m = gemm_info.m();
-  const int n = gemm_info.n();
-  const int k = gemm_info.k();
-
-  ARM_COMPUTE_UNUSED(m);
-  ARM_COMPUTE_UNUSED(n);
-  ARM_COMPUTE_UNUSED(k);
-
-  ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(0) != static_cast<unsigned int>(k));
-  ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(0) != static_cast<unsigned int>(n));
-  ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(1) != static_cast<unsigned int>(k));
-  if (gemm_info.reinterpret_input_as_3d())
-  {
-    ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(1) * input0->dimension(2) !=
-                                static_cast<unsigned int>(m));
-  }
-  else
-  {
-    ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(1) != static_cast<unsigned int>(m));
-  }
-
-  if (output->total_size() != 0)
-  {
-    const TensorInfo tensor_info_output =
-        output->clone()->set_tensor_shape(compute_mm_shape(*input0, *input1, false, gemm_info));
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &tensor_info_output);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S32);
-  }
-
-  return Status{};
-}
-
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input0, ITensorInfo *input1,
-                                                        ITensorInfo *output,
-                                                        const GEMMReshapeInfo &gemm_info,
-                                                        ElementsProcessed &num_elements_processed)
-{
-  unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0];
-  unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1];
-  bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
-  bool reinterpret_output_as_3d = (gemm_info.depth_output_gemm3d() != 0);
-
-  Window win{};
-  Window win_out{};
-  bool window_changed = false;
-
-  // In case both input and output have to be reinterpreted as 3D tensors,
-  // force reinterpret_input_as_3d and reinterpret_output_as_3d to be false.
-  if (reinterpret_input_as_3d == reinterpret_output_as_3d)
-  {
-    reinterpret_input_as_3d = false;
-    reinterpret_output_as_3d = false;
-  }
-
-  // Output tensor auto inizialitation if not yet initialized
-  auto_init_if_empty(*output,
-                     input0->clone()
-                         ->set_tensor_shape(compute_mm_shape(*input0, *input1, false, gemm_info))
-                         .set_data_type(DataType::S32));
-
-  TensorInfo tmp_info(*output);
-
-  if (reinterpret_output_as_3d)
-  {
-    // Since the output tensor has to be reinterpreted as 3D and the execute window is based on a 2D
-    // GEMM,
-    // the window needs to be constructed on the 2D collapsed version of the tensor
-    TensorShape tmp_shape(output->tensor_shape());
-    tmp_shape.collapse(2U, 1U);
-    tmp_info.set_tensor_shape(tmp_shape);
-  }
-
-  // Special case for 1xN, 2xN, 3xN and 4xN input0 tensor. num_elems_processed_per_iteration_x
-  // Note: if the dot product instruction is available, the 8x2 tile has to be used
-  num_elems_processed_per_iteration_x = 4;
-  num_elems_processed_per_iteration_y = std::min(static_cast<int>(output->dimension(1)), 4);
-
-  // Note: bottom paddings are calculated manually as the output can be reinterpreted as 3D tensor
-  // The only way to set properly the paddings, it is to set those explicitly through the
-  // AccessWindowStatic
-  const int m = reinterpret_input_as_3d ? input0->tensor_shape()[1] * input0->tensor_shape()[2]
-                                        : input0->tensor_shape()[1];
-  const int bottom_pad =
-      (num_elems_processed_per_iteration_y - (m % num_elems_processed_per_iteration_y)) %
-      num_elems_processed_per_iteration_y;
-
-  // Configure window
-  win = calculate_max_window(
-      tmp_info, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
-  win_out = calculate_max_window(
-      *output, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
-
-  AccessWindowStatic input0_access(input0, 0, 0, input0->dimension(0),
-                                   input0->dimension(1) + bottom_pad);
-  AccessWindowStatic input1_access(
-      input1, 0, 0, ceil_to_multiple(input1->dimension(0), num_elems_processed_per_iteration_x),
-      input1->dimension(1));
-  AccessWindowStatic output_access(
-      output, 0, 0, ceil_to_multiple(output->dimension(0), num_elems_processed_per_iteration_x),
-      output->dimension(1) + bottom_pad);
-
-  window_changed =
-      update_window_and_padding(win, input0_access,
-                                input1_access) || // window used by the execute_window_loop
-      update_window_and_padding(
-          win_out,
-          output_access); // window used to update the padding requirements of output tensor
-
-  Coordinates coord;
-  coord.set_num_dimensions(output->num_dimensions());
-  output_access.set_valid_region(win_out, ValidRegion(coord, output->tensor_shape()));
-
-  // Collapse along the Z direction
-  // This collapse needs to be here in order to tune the Z dimension of LWS
-  Window collapsed = win;
-  const unsigned int dimension_to_collapse =
-      std::min(static_cast<unsigned int>(output->num_dimensions()), 2u);
-  collapsed = win.collapse(win, dimension_to_collapse);
-
-  Status err = (window_changed)
-                   ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
-                   : Status{};
-  return std::make_pair(err, collapsed);
-}
-} // namespace
-
-CLGEMMLowpMatrixMultiplyKernelEx::CLGEMMLowpMatrixMultiplyKernelEx()
-    : _input0(nullptr), _input1(nullptr), _output(nullptr), _slide_matrix_b(true),
-      _reinterpret_input_as_3d(false), _reinterpret_output_as_3d(false)
-{
-}
-
-void CLGEMMLowpMatrixMultiplyKernelEx::configure(const ICLTensor *input0, const ICLTensor *input1,
-                                                 ICLTensor *output,
-                                                 const GEMMReshapeInfo &gemm_info)
-{
-  ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output);
-
-  ARM_COMPUTE_ERROR_THROW_ON(
-      validate_arguments(input0->info(), input1->info(), output->info(), gemm_info));
-
-  _input0 = input0;
-  _input1 = input1;
-  _output = output;
-  _reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
-  _reinterpret_output_as_3d = (gemm_info.depth_output_gemm3d() != 0);
-
-  // In case both input and output have to be reinterpreted as 3D tensors,
-  // force reinterpret_input_as_3d and reinterpret_output_as_3d to be false.
-  if (_reinterpret_input_as_3d == _reinterpret_output_as_3d)
-  {
-    _reinterpret_input_as_3d = false;
-    _reinterpret_output_as_3d = false;
-  }
-
-  // Check if we need to slide the matrix B
-  const unsigned int num_dimensions_input0 = _reinterpret_input_as_3d
-                                                 ? _input0->info()->num_dimensions() - 1
-                                                 : _input0->info()->num_dimensions();
-  _slide_matrix_b = (_input1->info()->num_dimensions() >= num_dimensions_input0);
-
-  ElementsProcessed num_elements_processed{};
-
-  // Configure kernel window
-  auto win_config = validate_and_configure_window(input0->info(), input1->info(), output->info(),
-                                                  gemm_info, num_elements_processed);
-  ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-  ICLKernel::configure_internal(win_config.second);
-
-  // Create build options
-  std::string kernel_name(" ");
-  CLBuildOptions build_opts;
-  build_opts.add_option_if(_reinterpret_input_as_3d, "-DREINTERPRET_INPUT_AS_3D");
-  build_opts.add_option_if(_reinterpret_output_as_3d, "-DREINTERPRET_OUTPUT_AS_3D");
-  build_opts.add_option_if(_reinterpret_input_as_3d || _reinterpret_output_as_3d,
-                           "-DHEIGHT_GEMM3D=" +
-                               support::cpp11::to_string(output->info()->dimension(1)));
-  build_opts.add_option_if(_reinterpret_input_as_3d || _reinterpret_output_as_3d,
-                           "-DDEPTH_GEMM3D=" +
-                               support::cpp11::to_string(output->info()->dimension(2)));
-  build_opts.add_option_if(!_slide_matrix_b,
-                           "-DMATRIX_B_DEPTH=" +
-                               support::cpp11::to_string(input1->info()->dimension(2)));
-  build_opts.add_option("-DCOLS_A=" + support::cpp11::to_string(input0->info()->dimension(0)));
-  build_opts.add_option("-DNUM_ELEMS_PROCESSED_PER_THREAD_X=" +
-                        support::cpp11::to_string(num_elements_processed.x()));
-  build_opts.add_option("-DNUM_ELEMS_PROCESSED_PER_THREAD_Y=" +
-                        support::cpp11::to_string(num_elements_processed.y()));
-
-  kernel_name = "gemmlowp_mm_midgard_ex";
-
-  // Create kernel
-  _kernel = static_cast<cl::Kernel>(
-      CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts.options()));
-
-  // Set config_id for enabling LWS tuning
-  _config_id = kernel_name;
-  _config_id += "_";
-  _config_id += (_reinterpret_input_as_3d ? "3di_" : "");
-  _config_id += (_reinterpret_output_as_3d ? "3do_" : "");
-  _config_id += lower_string(string_from_data_type(input0->info()->data_type()));
-  _config_id += "_";
-  _config_id += support::cpp11::to_string(output->info()->dimension(1));
-  _config_id += "_";
-  _config_id += support::cpp11::to_string(output->info()->dimension(0));
-}
-
-Status CLGEMMLowpMatrixMultiplyKernelEx::validate(const ITensorInfo *input0,
-                                                  const ITensorInfo *input1,
-                                                  const ITensorInfo *output,
-                                                  const GEMMReshapeInfo &gemm_info)
-{
-  ElementsProcessed num_elements_processed{};
-  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input0, input1, output, gemm_info));
-  ARM_COMPUTE_RETURN_ON_ERROR(
-      validate_and_configure_window(input0->clone().get(), input1->clone().get(),
-                                    output->clone().get(), gemm_info, num_elements_processed)
-          .first);
-
-  return Status{};
-}
-
-void CLGEMMLowpMatrixMultiplyKernelEx::run(const Window &window, cl::CommandQueue &queue)
-{
-  ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-  ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
-  if (_input1->info()->num_dimensions() < 3)
-  {
-    // The stride_z for matrix B must be zero if we do not slice
-    ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0);
-  }
-
-  Window slice = window.first_slice_window_3D();
-  Window slice_matrix_b = slice;
-
-  slice_matrix_b.set(Window::DimX, Window::Dimension(0, 1, 1));
-  slice_matrix_b.set(Window::DimY, Window::Dimension(0, 1, 1));
-
-  if (_reinterpret_input_as_3d)
-  {
-    // Pass bottom paddings to the kernel if the input has to be reinterpreted as 3D tensor
-    const unsigned int idx0 = 3 * num_arguments_per_2D_tensor() + 3;
-    const unsigned int total_cross_plane_pad =
-        _input0->info()->padding().top + _input0->info()->padding().bottom;
-    _kernel.setArg<cl_uint>(idx0, static_cast<unsigned int>(total_cross_plane_pad));
-  }
-
-  if (_reinterpret_output_as_3d)
-  {
-    // Pass bottom paddings to the kernel if the output has to be reinterpreted as 3D tensor
-    const unsigned int idx0 =
-        3 * num_arguments_per_2D_tensor() + 3 + (_reinterpret_input_as_3d ? 1 : 0);
-    const unsigned int total_cross_plane_pad =
-        _output->info()->padding().top + _output->info()->padding().bottom;
-    _kernel.setArg<cl_uint>(idx0, static_cast<unsigned int>(total_cross_plane_pad));
-  }
-
-  do
-  {
-    Window slice_b = slice;
-    // Don't slice matrix B along the z dimension if matrix B has just 2 dimensions and matrix A
-    // more than 2
-    // This scenario can happen when the matrix multiplication is used to perform a convolution
-    // operation
-    if (!_slide_matrix_b)
-    {
-      slice_b = slice_matrix_b;
-    }
-
-    unsigned int idx = 0;
-    add_2D_tensor_argument(idx, _input0, slice);
-    add_2D_tensor_argument(idx, _input1, slice_b);
-    add_2D_tensor_argument(idx, _output, slice);
-    _kernel.setArg<cl_uint>(idx++,
-                            static_cast<unsigned int>(_input0->info()->strides_in_bytes()[2]));
-    _kernel.setArg<cl_uint>(idx++,
-                            static_cast<unsigned int>(_input1->info()->strides_in_bytes()[2]));
-    _kernel.setArg<cl_uint>(idx++,
-                            static_cast<unsigned int>(_output->info()->strides_in_bytes()[2]));
-    enqueue(queue, *this, slice, lws_hint());
-  } while (window.slide_window_slice_3D(slice));
-}
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp

index 3a25987..3bfe3e4 100644 (file)
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp
@@ -45,6 +45,7 @@
  #include "arm_compute/core/CL/ICLTensor.h"
  #include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
  #include "arm_compute/core/UtilsEx.h"
+#include "support/StringSupport.h"
  
  using namespace arm_compute;
  
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp

index 7fbdcda..930e7c9 100644 (file)
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp
@@ -43,6 +43,7 @@
  #include "arm_compute/core/CL/CLHelpers.h"
  #include "arm_compute/core/CL/CLKernelLibraryEx.h"
  #include "arm_compute/core/CL/ICLTensor.h"
+#include "support/StringSupport.h"
  
  using namespace arm_compute;
  
@@ -110,7 +111,7 @@ void CLHashtableLookupKernel::configure(const ICLTensor *lookups, const ICLTenso
    _hits = hits;
  
    // Make _lookup_indices tensor
-  _lookup_indices = arm_compute::support::cpp14::make_unique<CLTensor>();
+  _lookup_indices = support::cpp14::make_unique<CLTensor>();
    _lookup_indices->allocator()->init(
        TensorInfo(lookups->info()->tensor_shape(), lookups->info()->num_channels(), DataType::S32));
    _lookup_indices->allocator()->allocate();
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp

index b45f6bb..61c14d2 100644 (file)
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp
@@ -48,7 +48,7 @@
  #include "arm_compute/core/TensorInfo.h"
  #include "arm_compute/core/Utils.h"
  #include "arm_compute/core/Window.h"
-
+#include "support/StringSupport.h"
  #include "support/ToolchainSupport.h"
  
  namespace arm_compute
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp

index d305896..6b27c99 100644 (file)
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp
@@ -49,6 +49,7 @@
  #include "arm_compute/core/Utils.h"
  #include "arm_compute/core/Validate.h"
  #include "arm_compute/core/Window.h"
+#include "support/StringSupport.h"
  
  using namespace arm_compute;
  
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp

index 74f7b41..643c8b1 100644 (file)
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp
@@ -43,6 +43,7 @@
  #include "arm_compute/core/CL/CLHelpers.h"
  #include "arm_compute/core/CL/CLKernelLibraryEx.h"
  #include "arm_compute/core/CL/ICLTensor.h"
+#include "support/StringSupport.h"
  
  using namespace arm_compute;
  
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp

deleted file mode 100644 (file)

index 8910a7b..0000000
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/CL/kernels/CLPReLUKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-constexpr unsigned int num_elems_processed_per_iteration = 16;
-
-Status validate_info(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output)
-{
-  const TensorShape &out_shape =
-      TensorShape::broadcast_shape(input->tensor_shape(), alpha->tensor_shape());
-
-  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32,
-                                                       DataType::QASYMM8);
-  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(alpha, 1, DataType::F16, DataType::F32,
-                                                       DataType::QASYMM8);
-
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0,
-                                  "Inputs are not broadcast compatible");
-  // Validate in case of configured output
-  if (output->total_size() > 0)
-  {
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32,
-                                                         DataType::QASYMM8);
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(
-        detail::have_different_dimensions(out_shape, output->tensor_shape(), 0),
-        "Wrong shape for output");
-  }
-  return Status{};
-}
-} // namespace
-
-CLPReLUKernel::CLPReLUKernel() : _input(nullptr), _alpha(nullptr), _output(nullptr) {}
-
-void CLPReLUKernel::configure(const ICLTensor *input, const ICLTensor *alpha, ICLTensor *output)
-{
-  ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, alpha);
-  ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-  ARM_COMPUTE_ERROR_THROW_ON(validate_info(input->info(), alpha->info(), output->info()));
-
-  _input = input;
-  _alpha = alpha;
-  _output = output;
-
-  // Create kernel
-  std::string kernel_name = "prelu";
-  std::set<std::string> build_opts;
-  build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
-  build_opts.emplace(
-      ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
-
-  if (is_data_type_quantized_asymmetric(input->info()->data_type()))
-  {
-    build_opts.emplace("-DOFF_IN=" + support::cpp11::to_string(
-                                         input->info()->quantization_info().uniform().offset));
-    build_opts.emplace("-DOFF_ALPHA=" + support::cpp11::to_string(
-                                            alpha->info()->quantization_info().uniform().offset));
-    build_opts.emplace("-DOFF_OUT=" + support::cpp11::to_string(
-                                          output->info()->quantization_info().uniform().offset));
-    build_opts.emplace("-DSCALE_IN=" + support::cpp11::to_string(
-                                           input->info()->quantization_info().uniform().scale));
-    build_opts.emplace("-DSCALE_ALPHA=" + support::cpp11::to_string(
-                                              alpha->info()->quantization_info().uniform().scale));
-    build_opts.emplace("-DSCALE_OUT=" + support::cpp11::to_string(
-                                            output->info()->quantization_info().uniform().scale));
-    kernel_name += "_qasymm8";
-  }
-  _kernel =
-      static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts));
-
-  const std::pair<TensorShape, ValidRegion> broadcast_pair =
-      ITensorInfo::broadcast_shape_and_valid_region(*input->info(), *alpha->info());
-
-  const TensorShape &out_shape = broadcast_pair.first;
-  const ValidRegion &valid_region = broadcast_pair.second;
-
-  // Auto initialize output if not initialized
-  {
-    set_shape_if_empty(*output->info(), out_shape);
-
-    if (input->info()->data_type() == DataType::F16 && alpha->info()->data_type() == DataType::F16)
-    {
-      set_format_if_unknown(*output->info(), Format::F16);
-    }
-    else if (input->info()->data_type() == DataType::F32 ||
-             alpha->info()->data_type() == DataType::F32)
-    {
-      set_format_if_unknown(*output->info(), Format::F32);
-    }
-  }
-
-  Window win = calculate_max_window(valid_region, Steps(num_elems_processed_per_iteration));
-  Window win_input1 = win.broadcast_if_dimension_le_one(*input->info());
-  Window win_input2 = win.broadcast_if_dimension_le_one(*alpha->info());
-
-  AccessWindowHorizontal input1_access(input->info(), 0, num_elems_processed_per_iteration);
-  AccessWindowHorizontal input2_access(alpha->info(), 0, num_elems_processed_per_iteration);
-  AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
-  update_window_and_padding(win_input1, input1_access) ||
-      update_window_and_padding(win_input2, input2_access) ||
-      update_window_and_padding(win, output_access);
-
-  output_access.set_valid_region(win, valid_region);
-
-  ICLKernel::configure_internal(win);
-}
-
-void CLPReLUKernel::run(const Window &window, cl::CommandQueue &queue)
-{
-  ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-  ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
-  const TensorShape &in_shape1 = _input->info()->tensor_shape();
-  const TensorShape &in_shape2 = _alpha->info()->tensor_shape();
-  const TensorShape &out_shape = _output->info()->tensor_shape();
-
-  bool can_collapse = true;
-  if (std::min(in_shape1.total_size(), in_shape2.total_size()) > 1)
-  {
-    can_collapse =
-        (std::min(in_shape1.num_dimensions(), in_shape2.num_dimensions()) > Window::DimZ);
-    for (size_t d = Window::DimZ; can_collapse && (d < out_shape.num_dimensions()); d++)
-    {
-      can_collapse = (in_shape1[d] == in_shape2[d]);
-    }
-  }
-
-  bool has_collapsed = false;
-  Window collapsed =
-      can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed)
-                   : window;
-
-  const TensorShape &in_shape1_collapsed =
-      has_collapsed ? in_shape1.collapsed_from(Window::DimZ) : in_shape1;
-  const TensorShape &in_shape2_collapsed =
-      has_collapsed ? in_shape2.collapsed_from(Window::DimZ) : in_shape2;
-
-  Window slice = collapsed.first_slice_window_3D();
-  Window slice_input1 = slice.broadcast_if_dimension_le_one(in_shape1_collapsed);
-  Window slice_input2 = slice.broadcast_if_dimension_le_one(in_shape2_collapsed);
-
-  do
-  {
-    unsigned int idx = 0;
-    add_3D_tensor_argument(idx, _input, slice_input1);
-    add_3D_tensor_argument(idx, _alpha, slice_input2);
-    add_3D_tensor_argument(idx, _output, slice);
-
-    enqueue(queue, *this, slice);
-
-    collapsed.slide_window_slice_3D(slice_input1);
-    collapsed.slide_window_slice_3D(slice_input2);
-  } while (collapsed.slide_window_slice_3D(slice));
-}
-
-BorderSize CLPReLUKernel::border_size() const
-{
-  const unsigned int replicateSize =
-      _output->info()->dimension(0) -
-      std::min(_input->info()->dimension(0), _alpha->info()->dimension(0));
-  const unsigned int border =
-      std::min<unsigned int>(num_elems_processed_per_iteration - 1U, replicateSize);
-  return BorderSize(0, border, 0, 0);
-}
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp

index 2d551f6..1a7a18c 100644 (file)
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp
@@ -49,6 +49,7 @@
  #include "arm_compute/core/Utils.h"
  #include "arm_compute/core/Validate.h"
  #include "arm_compute/core/Window.h"
+#include "support/StringSupport.h"
  
  namespace arm_compute
  {
@@ -69,7 +70,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *scale_fac
  
    // Output must always be initialized
    ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape().total_size() == 0);
-  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S8);
+  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8_SIGNED);
    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
  
    return Status{};
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp

index a983183..06c2579 100644 (file)
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp
@@ -43,6 +43,7 @@
  #include "arm_compute/core/CL/CLHelpers.h"
  #include "arm_compute/core/CL/CLKernelLibraryEx.h"
  #include "arm_compute/core/CL/ICLTensor.h"
+#include "support/StringSupport.h"
  
  using namespace arm_compute;
  namespace
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp

index ff1904a..8d8853c 100644 (file)
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp
@@ -48,6 +48,7 @@
  #include "arm_compute/core/Validate.h"
  #include "arm_compute/core/Window.h"
  #include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "support/StringSupport.h"
  
  #include <climits>
  
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToDepthKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToDepthKernel.cpp

deleted file mode 100644 (file)

index 64fc038..0000000
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToDepthKernel.cpp
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
-                          const int32_t block_size)
-{
-  ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8,
-                                                DataType::S16, DataType::S32, DataType::F16,
-                                                DataType::F32);
-  ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QASYMM8,
-                                                DataType::S16, DataType::S32, DataType::F16,
-                                                DataType::F32);
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(block_size < 1,
-                                  "Block size should be greater than or equal to 1.");
-
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->dimension(3) != output->dimension(3),
-                                  "Input batch should be equal to Output batch");
-
-  auto layout_out = input->data_layout();
-  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
-
-  auto index_depth = get_data_layout_dimension_index(layout_out, DataLayoutDimension::CHANNEL);
-  auto index_height = get_data_layout_dimension_index(layout_out, DataLayoutDimension::HEIGHT);
-  auto index_width = get_data_layout_dimension_index(layout_out, DataLayoutDimension::WIDTH);
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(
-      input->dimension(index_depth) * block_size * block_size != output->dimension(index_depth),
-      "Output depth should be equal to (input depth * block size *block size)");
-
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG((input->dimension(index_width) % block_size) ||
-                                      (input->dimension(index_height) % block_size),
-                                  "Input height and width should be divisible by block size");
-
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(
-      (output->dimension(index_width) != (input->dimension(index_width) / block_size)) ||
-          (output->dimension(index_height) != (input->dimension(index_height) / block_size)),
-      "Output height and width should be equal to "
-      "input_height/blocksize and input_width/blocksize respectively");
-
-  return Status{};
-}
-
-} // namespace
-
-CLSpaceToDepthKernel::CLSpaceToDepthKernel() : _input(nullptr), _output(nullptr) {}
-
-void CLSpaceToDepthKernel::configure(const ICLTensor *input, ICLTensor *output,
-                                     const int32_t block_size)
-{
-
-  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), block_size));
-
-  _input = input;
-  _output = output;
-
-  // Set kernel build options
-  auto layout_out = input->info()->data_layout();
-  std::set<std::string> build_opts;
-  build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
-  build_opts.emplace("-DBLOCK_SIZE=" + support::cpp11::to_string(block_size));
-  auto index_depth = get_data_layout_dimension_index(layout_out, DataLayoutDimension::CHANNEL);
-  auto depth = input->info()->dimension(index_depth);
-  build_opts.emplace("-DDEPTH_IN=" + support::cpp11::to_string(depth));
-  build_opts.emplace("-DZ_IN=" + support::cpp11::to_string(input->info()->tensor_shape().z()));
-
-  // Create kernel
-  _kernel = static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(
-      "space_to_depth_" + lower_string(string_from_data_layout(layout_out)), build_opts));
-
-  // Configure  kernel window
-  Window win = calculate_max_window(*input->info(), Steps());
-
-  Coordinates coord;
-  coord.set_num_dimensions(output->info()->num_dimensions());
-  output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
-
-  ICLKernel::configure_internal(win);
-}
-
-void CLSpaceToDepthKernel::run(const Window &window, cl::CommandQueue &queue)
-{
-  ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-  ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window);
-
-  Window slice_in = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4);
-
-  // Setup output slice
-  Window slice_out(slice_in);
-  slice_out.set(Window::DimX, Window::Dimension(0, 0, 0));
-  slice_out.set(Window::DimY, Window::Dimension(0, 0, 0));
-  slice_out.set(Window::DimZ, Window::Dimension(0, 0, 0));
-  slice_out.set(3, Window::Dimension(0, 0, 0));
-
-  do
-  {
-    unsigned int idx = 0;
-    add_4D_tensor_argument(idx, _input, slice_in);
-    add_4D_tensor_argument(idx, _output, slice_out);
-    enqueue(queue, *this, slice_in);
-  } while (window.slide_window_slice_4D(slice_in) && window.slide_window_slice_4D(slice_out));
-}
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.cpp

deleted file mode 100644 (file)

index 61999cb..0000000
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.cpp
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-
-using namespace arm_compute;
-
-CLTransposeConvLayerUpsampleKernel::CLTransposeConvLayerUpsampleKernel()
-    : _input(nullptr), _output(nullptr), _inner_border(), _info()
-{
-}
-
-Status CLTransposeConvLayerUpsampleKernel::validate(const ITensorInfo *input,
-                                                    const ITensorInfo *output,
-                                                    const BorderSize &inner_border,
-                                                    const PadStrideInfo &info)
-{
-  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
-  ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
-  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16,
-                                                       DataType::F32);
-  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
-
-  const DataLayout data_layout = input->data_layout();
-
-  const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
-  const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
-  const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
-
-  ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(idx_w) == 0);
-  ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(idx_h) == 0);
-
-  ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(idx_c) != output->dimension(idx_c));
-  for (size_t i = 3; i < Coordinates::num_max_dimensions; ++i)
-  {
-    ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(i) != output->dimension(i));
-  }
-
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(inner_border.right > info.stride().first - 1,
-                                  "inner_border_right must be smaller that stride_x");
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(inner_border.top > info.stride().second - 1,
-                                  "inner_border_top must be smaller that stride_y");
-
-  return Status{};
-}
-
-void CLTransposeConvLayerUpsampleKernel::configure(const ICLTensor *input, ICLTensor *output,
-                                                   const BorderSize &inner_border,
-                                                   const PadStrideInfo &info)
-{
-  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
-  _input = input;
-  _output = output;
-  _inner_border = inner_border;
-  _info = info;
-
-  // Perform validation step
-  ARM_COMPUTE_ERROR_THROW_ON(CLTransposeConvLayerUpsampleKernel::validate(
-      input->info(), output->info(), inner_border, info));
-
-  // Create kernel
-  CLBuildOptions build_opts;
-  build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
-  _kernel = static_cast<cl::Kernel>(
-      CLKernelLibrary::get().create_kernel("deconvolution_upsample", build_opts.options()));
-
-  constexpr unsigned int num_elems_processed_per_iteration = 1;
-
-  // Configure kernel window
-  Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
-  AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-  output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
-  ICLKernel::configure_internal(win);
-}
-
-void CLTransposeConvLayerUpsampleKernel::run(const Window &window, cl::CommandQueue &queue)
-{
-  ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-  ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
-  const DataLayout data_layout = _input->info()->data_layout();
-
-  const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
-  const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
-
-  const int out_start_x = _info.pad_left();
-  const int out_end_x = _output->info()->dimension(idx_w) - _inner_border.right -
-                        _info.pad_right() + _info.stride().first - 1;
-  const int out_step_x = _info.stride().first;
-
-  const int out_start_y = _inner_border.top + _info.pad_top();
-  const int out_end_y =
-      _output->info()->dimension(idx_h) - _info.pad_bottom() + _info.stride().second - 1;
-  const int out_step_y = _info.stride().second;
-
-  switch (data_layout)
-  {
-    case DataLayout::NCHW:
-    {
-      Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
-
-      Window slice_out = collapsed.first_slice_window_3D();
-      slice_out.set(Window::DimX, Window::Dimension(out_start_x, out_end_x, out_step_x));
-      slice_out.set(Window::DimY, Window::Dimension(out_start_y, out_end_y, out_step_y));
-
-      Window slice_in = collapsed.first_slice_window_3D();
-
-      do
-      {
-        unsigned int idx = 0;
-        add_3D_tensor_argument(idx, _input, slice_in);
-        add_3D_tensor_argument(idx, _output, slice_out);
-        enqueue(queue, *this, slice_out);
-      } while (collapsed.slide_window_slice_3D(slice_in) &&
-               collapsed.slide_window_slice_3D(slice_out));
-      break;
-    }
-    case DataLayout::NHWC:
-    {
-      // NOTE: not collapsing in NHWC
-      Window slice_out = window.first_slice_window_3D();
-      slice_out.set(Window::DimY, Window::Dimension(out_start_x, out_end_x, out_step_x));
-      slice_out.set(Window::DimZ, Window::Dimension(out_start_y, out_end_y, out_step_y));
-
-      Window slice_in = window.first_slice_window_3D();
-
-      do
-      {
-        unsigned int idx = 0;
-        add_3D_tensor_argument(idx, _input, slice_in);
-        add_3D_tensor_argument(idx, _output, slice_out);
-        enqueue(queue, *this, slice_out);
-      } while (window.slide_window_slice_3D(slice_in) && window.slide_window_slice_3D(slice_out));
-      break;
-    }
-    default:
-      ARM_COMPUTE_ERROR("Unsupported data layout");
-  }
-}
diff --git a/compute/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp b/compute/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp

deleted file mode 100644 (file)

index 648afb3..0000000
--- a/compute/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-
-#include <cstddef>
-#include <cstdint>
-
-namespace arm_compute
-{
-CPPUpsampleKernelEx::CPPUpsampleKernelEx() : _input(nullptr), _output(nullptr), _info() {}
-
-bool CPPUpsampleKernelEx::is_parallelisable() const { return false; }
-
-void CPPUpsampleKernelEx::configure(const ITensor *input, ITensor *output,
-                                    const PadStrideInfo &info)
-{
-  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
-  _input = input;
-  _output = output;
-  _info = info;
-
-  // Configure kernel window
-  Window win = calculate_max_window(*input->info(), Steps());
-
-  // The CPPUpsampleKernelEx doesn't need padding so update_window_and_padding() can be skipped
-  Coordinates coord;
-  coord.set_num_dimensions(output->info()->num_dimensions());
-  output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
-
-  ICPPKernel::configure(win);
-}
-
-void CPPUpsampleKernelEx::run(const Window &window, const ThreadInfo &info)
-{
-  ARM_COMPUTE_UNUSED(info);
-  ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-  ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window);
-
-  // Initialize _scaled_output buffer
-  const int width_scaled = _output->info()->dimension(0);
-  const int height_scaled = _output->info()->dimension(1);
-  const int stride_x = _info.stride().first;
-  const int stride_y = _info.stride().second;
-  const int start_x = _info.pad_left();
-  const int start_y = _info.pad_top();
-  const int end_y = height_scaled - _info.pad_bottom();
-  const int end_x = width_scaled - _info.pad_top();
-  const size_t element_size = _input->info()->element_size();
-
-  // The fill value is normally 0, but for QASYMM8 the '0' corresponds to the offset
-  const uint8_t fill_value =
-      _output->info()->data_type() == DataType::QASYMM8
-          ? utility::clamp<uint8_t>(_output->info()->quantization_info().uniform().offset)
-          : 0;
-  // Filling a value different than 0 works only for QASYMM8 datatype since we are filling 1byte
-  // values in a buffer of uint8_ts
-  std::fill_n(_output->buffer(), _output->info()->total_size(), fill_value);
-
-  // Create window
-  Window window_out(window);
-  window_out.set(Window::DimX, Window::Dimension(start_x, end_x, stride_x));
-  window_out.set(Window::DimY, Window::Dimension(start_y, end_y, stride_y));
-
-  // Create iterators
-  Iterator in(_input, window);
-  Iterator out(_output, window_out);
-
-  execute_window_loop(
-      window, [&](const Coordinates &) { memcpy(out.ptr(), in.ptr(), element_size); }, in, out);
-}
-} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NECastKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NECastKernel.cpp

deleted file mode 100644 (file)

index fbb9dbc..0000000
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NECastKernel.cpp
+++ /dev/null
@@ -1,671 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/NEON/kernels/NECastKernel.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/CPP/Validate.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/NEON/NEAsymm.h"
-#include "arm_compute/core/NEON/wrapper/wrapper.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-
-#include <arm_neon.h>
-
-namespace arm_compute
-{
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
-                          SubDataType input_subtype)
-{
-  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
-  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8,
-                                                       DataType::QASYMM8, DataType::U32,
-                                                       DataType::S32, DataType::F32);
-  ARM_COMPUTE_RETURN_ERROR_ON(input_subtype == SubDataType::BOOL &&
-                              input->data_type() != DataType::U8);
-
-  if (output->tensor_shape().total_size() > 0)
-  {
-    ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(output);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S8,
-                                                         DataType::QASYMM8, DataType::U32,
-                                                         DataType::S32, DataType::F32);
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
-  }
-
-  return Status{};
-}
-
-std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
-{
-  // Configure kernel window
-  Window win = calculate_max_window(*input, Steps());
-
-  // Output tensor auto initialization if not yet initialized
-  auto_init_if_empty(*output, input->tensor_shape(), 1, DataType::F32);
-
-  // NECastKernel doesn't need padding so update_window_and_padding() can be skipped
-  Coordinates coord;
-  coord.set_num_dimensions(output->num_dimensions());
-  output->set_valid_region(ValidRegion(coord, output->tensor_shape()));
-
-  return std::make_tuple(Status{}, win);
-}
-
-typedef struct bool8x16
-{
-  uint8x16_t val;
-} bool8x16_t;
-
-static inline uint8x16_t vreinterpretq_u8_b8(bool8x16_t __a) { return (uint8x16_t)__a.val; }
-
-template <typename ToV, typename FromV> inline ToV vcast(const FromV &v) { return v; }
-template <> inline uint8x16_t vcast(const bool8x16_t &v)
-{
-  const uint8x16_t vu8 = vreinterpretq_u8_b8(v);
-  const uint8x16_t zero_uint8x16 = vdupq_n_u8(0);
-  uint8x16_t mask = vcgtq_u8(vu8, zero_uint8x16);
-  return vshrq_n_u8(mask, 7); // true -> 1, false -> 0
-}
-
-template <> inline uint32x4x4_t vcast(const bool8x16_t &v)
-{
-  const uint8x16_t vu8 = vreinterpretq_u8_b8(v);
-  const uint8x16_t zero_uint8x16 = vdupq_n_u8(0);
-  uint8x16_t mask = vcgtq_u8(vu8, zero_uint8x16);
-  uint8x16_t vb = vshrq_n_u8(mask, 7); // true -> 1, false -> 0
-
-  const uint32x4x4_t ret = {{
-      vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(vb)))),
-      vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(vb)))),
-      vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(vb)))),
-      vmovl_u16(vget_high_u16(vmovl_u8(vget_high_u8(vb)))),
-  }};
-
-  return ret;
-}
-
-template <> inline int32x4x4_t vcast(const bool8x16_t &v)
-{
-  const uint8x16_t vu8 = vreinterpretq_u8_b8(v);
-  const uint8x16_t zero_uint8x16 = vdupq_n_u8(0);
-  uint8x16_t mask = vcgtq_u8(vu8, zero_uint8x16);
-  uint8x16_t vb = vshrq_n_u8(mask, 7); // true -> 1, false -> 0
-
-  const int32x4x4_t ret = {{
-      vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(vb))))),
-      vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(vb))))),
-      vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(vb))))),
-      vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_high_u8(vb))))),
-  }};
-
-  return ret;
-}
-
-template <> inline float32x4x4_t vcast(const bool8x16_t &v)
-{
-  const uint8x16_t vu8 = vreinterpretq_u8_b8(v);
-  const uint8x16_t zero_uint8x16 = vdupq_n_u8(0);
-  uint8x16_t mask = vcgtq_u8(vu8, zero_uint8x16);
-  uint8x16_t vb = vshrq_n_u8(mask, 7); // true -> 1, false -> 0
-
-  const float32x4x4_t ret = {{
-      vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(vb))))),
-      vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(vb))))),
-      vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(vb))))),
-      vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_high_u8(vb))))),
-  }};
-
-  return ret;
-}
-
-template <> inline uint32x4x4_t vcast(const uint8x16_t &v)
-{
-  const uint32x4x4_t ret = {{
-      vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(v)))),
-      vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(v)))),
-      vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(v)))),
-      vmovl_u16(vget_high_u16(vmovl_u8(vget_high_u8(v)))),
-  }};
-
-  return ret;
-}
-
-template <> inline int32x4x4_t vcast(const uint8x16_t &v)
-{
-  const int32x4x4_t ret = {{
-      vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(v))))),
-      vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(v))))),
-      vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(v))))),
-      vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_high_u8(v))))),
-  }};
-
-  return ret;
-}
-
-template <> inline float32x4x4_t vcast(const uint8x16_t &v)
-{
-  const float32x4x4_t ret = {{
-      vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(v))))),
-      vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(v))))),
-      vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(v))))),
-      vcvtq_f32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_high_u8(v))))),
-  }};
-
-  return ret;
-}
-
-template <> inline uint8x16_t vcast(const int32x4x4_t &v)
-{
-  // Saturate cast
-  return vcombine_u8(vqmovn_u16(vcombine_u16(vqmovun_s32(v.val[0]), vqmovun_s32(v.val[1]))),
-                     vqmovn_u16(vcombine_u16(vqmovun_s32(v.val[2]), vqmovun_s32(v.val[3]))));
-}
-
-template <> inline uint32x4x4_t vcast(const int32x4x4_t &v)
-{
-  // Saturate cast
-  const uint32x4x4_t ret = {{
-      vcombine_u32(vqmovun_s64(vmovl_s32(vget_low_s32(v.val[0]))),
-                   vqmovun_s64(vmovl_s32(vget_high_s32(v.val[0])))),
-      vcombine_u32(vqmovun_s64(vmovl_s32(vget_low_s32(v.val[1]))),
-                   vqmovun_s64(vmovl_s32(vget_high_s32(v.val[1])))),
-      vcombine_u32(vqmovun_s64(vmovl_s32(vget_low_s32(v.val[2]))),
-                   vqmovun_s64(vmovl_s32(vget_high_s32(v.val[2])))),
-      vcombine_u32(vqmovun_s64(vmovl_s32(vget_low_s32(v.val[3]))),
-                   vqmovun_s64(vmovl_s32(vget_high_s32(v.val[3])))),
-  }};
-
-  return ret;
-}
-
-template <> inline float32x4x4_t vcast(const int32x4x4_t &v)
-{
-  const float32x4x4_t ret = {{
-      vcvtq_f32_s32(v.val[0]), vcvtq_f32_s32(v.val[1]), vcvtq_f32_s32(v.val[2]),
-      vcvtq_f32_s32(v.val[3]),
-  }};
-
-  return ret;
-}
-
-template <> inline uint8x16_t vcast(const uint32x4x4_t &v)
-{
-  return vcombine_u8(vqmovn_u16(vcombine_u16(vqmovn_u32(v.val[0]), vqmovn_u32(v.val[1]))),
-                     vqmovn_u16(vcombine_u16(vqmovn_u32(v.val[2]), vqmovn_u32(v.val[3]))));
-}
-
-template <> inline int32x4x4_t vcast(const uint32x4x4_t &v)
-{
-  const int32x4x4_t ret = {{
-      vcombine_s32(vmovn_s64(vreinterpretq_s64_u64(vmovl_u32(vget_low_u32(v.val[0])))),
-                   vmovn_s64(vreinterpretq_s64_u64(vmovl_u32(vget_high_u32(v.val[0]))))),
-      vcombine_s32(vmovn_s64(vreinterpretq_s64_u64(vmovl_u32(vget_low_u32(v.val[1])))),
-                   vmovn_s64(vreinterpretq_s64_u64(vmovl_u32(vget_high_u32(v.val[1]))))),
-      vcombine_s32(vmovn_s64(vreinterpretq_s64_u64(vmovl_u32(vget_low_u32(v.val[2])))),
-                   vmovn_s64(vreinterpretq_s64_u64(vmovl_u32(vget_high_u32(v.val[2]))))),
-      vcombine_s32(vmovn_s64(vreinterpretq_s64_u64(vmovl_u32(vget_low_u32(v.val[3])))),
-                   vmovn_s64(vreinterpretq_s64_u64(vmovl_u32(vget_high_u32(v.val[3]))))),
-  }};
-
-  return ret;
-}
-
-template <> inline float32x4x4_t vcast(const uint32x4x4_t &v)
-{
-  const float32x4x4_t ret = {{
-      vcvtq_f32_u32(v.val[0]), vcvtq_f32_u32(v.val[1]), vcvtq_f32_u32(v.val[2]),
-      vcvtq_f32_u32(v.val[3]),
-  }};
-
-  return ret;
-}
-
-template <> inline uint8x16_t vcast(const float32x4x4_t &v)
-{
-  // Saturate cast
-  return vcombine_u8(vqmovn_u16(vcombine_u16(vqmovun_s32(vcvtq_s32_f32(v.val[0])),
-                                             vqmovun_s32(vcvtq_s32_f32(v.val[1])))),
-                     vqmovn_u16(vcombine_u16(vqmovun_s32(vcvtq_s32_f32(v.val[2])),
-                                             vqmovun_s32(vcvtq_s32_f32(v.val[3])))));
-}
-
-template <> inline uint32x4x4_t vcast(const float32x4x4_t &v)
-{
-  const uint32x4x4_t ret = {{
-      vcvtq_u32_f32(v.val[0]), vcvtq_u32_f32(v.val[1]), vcvtq_u32_f32(v.val[2]),
-      vcvtq_u32_f32(v.val[3]),
-  }};
-
-  return ret;
-}
-
-template <> inline int32x4x4_t vcast(const float32x4x4_t &v)
-{
-  const int32x4x4_t ret = {{
-      vcvtq_s32_f32(v.val[0]), vcvtq_s32_f32(v.val[1]), vcvtq_s32_f32(v.val[2]),
-      vcvtq_s32_f32(v.val[3]),
-  }};
-
-  return ret;
-}
-
-template <typename T> struct cast_vector;
-template <> struct cast_vector<bool>
-{
-  using type = bool8x16_t;
-};
-template <> struct cast_vector<uint8_t>
-{
-  using type = uint8x16_t;
-};
-template <> struct cast_vector<uint32_t>
-{
-  using type = uint32x4x4_t;
-};
-template <> struct cast_vector<int32_t>
-{
-  using type = int32x4x4_t;
-};
-template <> struct cast_vector<float>
-{
-  using type = float32x4x4_t;
-};
-
-template <typename T> inline void store_result(T *ptr, const typename cast_vector<T>::type &v)
-{
-  wrapper::vstore(ptr, v.val[0]);
-  wrapper::vstore(ptr + 4, v.val[1]);
-  wrapper::vstore(ptr + 8, v.val[2]);
-  wrapper::vstore(ptr + 12, v.val[3]);
-}
-
-template <> inline void store_result<uint8_t>(uint8_t *ptr, const uint8x16_t &v)
-{
-  wrapper::vstore(ptr, v);
-}
-
-inline bool8x16_t vloadq(const bool *ptr)
-{
-  bool8x16_t ret;
-  ret.val = wrapper::vloadq(reinterpret_cast<const uint8_t *>(ptr));
-  return ret;
-}
-
-template <typename T> inline typename cast_vector<T>::type load_input(const T *ptr)
-{
-  return wrapper::vloadq(ptr);
-}
-
-template <> inline typename cast_vector<bool>::type load_input(const bool *ptr)
-{
-  return vloadq(ptr);
-}
-
-template <> inline typename cast_vector<uint32_t>::type load_input(const uint32_t *ptr)
-{
-  return vld4q_u32(ptr);
-}
-
-template <> inline typename cast_vector<int32_t>::type load_input(const int32_t *ptr)
-{
-  return vld4q_s32(ptr);
-}
-
-template <> inline typename cast_vector<float>::type load_input(const float *ptr)
-{
-  return vld4q_f32(ptr);
-}
-
-template <typename T> inline T get_value(const T *ptr) { return *ptr; }
-
-template <> inline bool get_value(const bool *ptr)
-{
-  bool ret = (*ptr != 0);
-  return ret;
-}
-
-template <typename FromT> void run_cast(const ITensor *input, ITensor *output, const Window &window)
-{
-  const int window_step_x = 16;
-  const auto window_start_x = static_cast<int>(window.x().start());
-  const auto window_end_x = static_cast<int>(window.x().end());
-
-  // Collapse window and reset first dimension to handle tail calculations manually
-  Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
-  win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
-
-  // Create iterators
-  Iterator in(input, win_collapsed);
-  Iterator out(output, win_collapsed);
-
-#ifdef __aarch64__
-  constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_EVEN;
-#else  //__aarch64__
-  constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO;
-#endif //__aarch64__
-
-  execute_window_loop(
-      win_collapsed,
-      [&](const Coordinates &) {
-        const auto in_ptr = reinterpret_cast<const FromT *>(in.ptr());
-
-        int x = window_start_x;
-        for (; x <= (window_end_x - window_step_x); x += window_step_x)
-        {
-          using from_vector = typename cast_vector<FromT>::type;
-          const from_vector vin = load_input(in_ptr + x);
-
-          switch (output->info()->data_type())
-          {
-            case DataType::U8:
-            {
-              using to_vector = typename cast_vector<uint8_t>::type;
-              const to_vector vout = vcast<to_vector, from_vector>(vin);
-              store_result<uint8_t>(reinterpret_cast<uint8_t *>(out.ptr()) + x, vout);
-              break;
-            }
-            case DataType::QASYMM8:
-            {
-              using to_vector = typename cast_vector<float>::type;
-              const UniformQuantizationInfo &qinfo_out =
-                  output->info()->quantization_info().uniform();
-              const auto vf = vcast<to_vector, from_vector>(vin);
-              const auto vout = vquantize(vf, qinfo_out);
-              store_result<qasymm8_t>(reinterpret_cast<qasymm8_t *>(out.ptr()) + x, vout);
-              break;
-            }
-            case DataType::U32:
-            {
-              using to_vector = typename cast_vector<uint32_t>::type;
-              const to_vector vout = vcast<to_vector, from_vector>(vin);
-              store_result<uint32_t>(reinterpret_cast<uint32_t *>(out.ptr()) + x, vout);
-              break;
-            }
-            case DataType::S32:
-            {
-              using to_vector = typename cast_vector<int32_t>::type;
-              const to_vector vout = vcast<to_vector, from_vector>(vin);
-              store_result<int32_t>(reinterpret_cast<int32_t *>(out.ptr()) + x, vout);
-              break;
-            }
-            case DataType::F32:
-            {
-              using to_vector = typename cast_vector<float>::type;
-              const to_vector vout = vcast<to_vector, from_vector>(vin);
-              store_result<float>(reinterpret_cast<float *>(out.ptr()) + x, vout);
-              break;
-            }
-            default:
-              ARM_COMPUTE_ERROR("Unsupported data type.");
-          }
-        }
-
-        // Compute left-over elements
-        for (; x < window_end_x; ++x)
-        {
-          FromT val = get_value(in_ptr + x);
-          switch (output->info()->data_type())
-          {
-            case DataType::U8:
-            {
-              *(reinterpret_cast<uint8_t *>(out.ptr()) + x) = static_cast<uint8_t>(val);
-              break;
-            }
-            case DataType::QASYMM8:
-            {
-              const QuantizationInfo &qinfo_out = output->info()->quantization_info();
-              const auto qval =
-                  quantize_qasymm8(static_cast<float>(val), qinfo_out, rounding_policy);
-              *(reinterpret_cast<qasymm8_t *>(out.ptr()) + x) = qval;
-              break;
-            }
-            case DataType::U32:
-            {
-              *(reinterpret_cast<uint32_t *>(out.ptr()) + x) = static_cast<uint32_t>(val);
-              break;
-            }
-            case DataType::S32:
-            {
-              *(reinterpret_cast<int32_t *>(out.ptr()) + x) = static_cast<int32_t>(val);
-              break;
-            }
-            case DataType::F32:
-            {
-              *(reinterpret_cast<float *>(out.ptr()) + x) = static_cast<float>(val);
-              break;
-            }
-            default:
-              ARM_COMPUTE_ERROR("Unsupported data type.");
-          }
-        }
-      },
-      in, out);
-}
-
-void run_cast_qasymm8(const ITensor *input, ITensor *output, const Window &window)
-{
-  const int window_step_x = 16;
-  const auto window_start_x = static_cast<int>(window.x().start());
-  const auto window_end_x = static_cast<int>(window.x().end());
-
-  // Collapse window and reset first dimension to handle tail calculations manually
-  Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
-  win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
-
-  // Create iterators
-  Iterator in(input, win_collapsed);
-  Iterator out(output, win_collapsed);
-
-#ifdef __aarch64__
-  constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_EVEN;
-#else  //__aarch64__
-  constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO;
-#endif //__aarch64__
-  const auto &qinfo_in = input->info()->quantization_info().uniform();
-  const auto &qinfo_out = output->info()->quantization_info().uniform();
-
-  execute_window_loop(
-      win_collapsed,
-      [&](const Coordinates &) {
-        const auto in_ptr = reinterpret_cast<const qasymm8_t *>(in.ptr());
-
-        int x = window_start_x;
-        for (; x <= (window_end_x - window_step_x); x += window_step_x)
-        {
-          using from_vector = typename cast_vector<float>::type;
-          const auto vf = wrapper::vloadq(in_ptr + x);
-          const auto vin = vdequantize(vf, qinfo_in);
-          switch (output->info()->data_type())
-          {
-            case DataType::U8:
-            {
-              using to_vector = typename cast_vector<uint8_t>::type;
-              const to_vector vout = vcast<to_vector, from_vector>(vin);
-              store_result<uint8_t>(reinterpret_cast<uint8_t *>(out.ptr()) + x, vout);
-              break;
-            }
-            case DataType::QASYMM8:
-            {
-              using to_vector = typename cast_vector<float>::type;
-              const auto vf = vcast<to_vector, from_vector>(vin);
-              const auto vout = vquantize(vf, qinfo_out);
-              store_result<qasymm8_t>(reinterpret_cast<qasymm8_t *>(out.ptr()) + x, vout);
-              break;
-            }
-            case DataType::U32:
-            {
-              using to_vector = typename cast_vector<uint32_t>::type;
-              const to_vector vout = vcast<to_vector, from_vector>(vin);
-              store_result<uint32_t>(reinterpret_cast<uint32_t *>(out.ptr()) + x, vout);
-              break;
-            }
-            case DataType::S32:
-            {
-              using to_vector = typename cast_vector<int32_t>::type;
-              const to_vector vout = vcast<to_vector, from_vector>(vin);
-              store_result<int32_t>(reinterpret_cast<int32_t *>(out.ptr()) + x, vout);
-              break;
-            }
-            case DataType::F32:
-            {
-              using to_vector = typename cast_vector<float>::type;
-              const to_vector vout = vcast<to_vector, from_vector>(vin);
-              store_result<float>(reinterpret_cast<float *>(out.ptr()) + x, vout);
-              break;
-            }
-            default:
-              ARM_COMPUTE_ERROR("Unsupported data type.");
-          }
-        }
-
-        // Compute left-over elements
-        for (; x < window_end_x; ++x)
-        {
-          qasymm8_t qval_in = *(in_ptr + x);
-          const auto val = dequantize_qasymm8(qval_in, qinfo_in);
-
-          switch (output->info()->data_type())
-          {
-            case DataType::U8:
-            {
-              *(reinterpret_cast<uint8_t *>(out.ptr()) + x) = static_cast<uint8_t>(val);
-              break;
-            }
-            case DataType::QASYMM8:
-            {
-              const auto qval_out = quantize_qasymm8(val, qinfo_out, rounding_policy);
-              *(reinterpret_cast<qasymm8_t *>(out.ptr()) + x) = qval_out;
-              break;
-            }
-            case DataType::U32:
-            {
-              *(reinterpret_cast<uint32_t *>(out.ptr()) + x) = static_cast<uint32_t>(val);
-              break;
-            }
-            case DataType::S32:
-            {
-              *(reinterpret_cast<int32_t *>(out.ptr()) + x) = static_cast<int32_t>(val);
-              break;
-            }
-            case DataType::F32:
-            {
-              *(reinterpret_cast<float *>(out.ptr()) + x) = static_cast<float>(val);
-              break;
-            }
-            default:
-              ARM_COMPUTE_ERROR("Unsupported data type.");
-          }
-        }
-      },
-      in, out);
-}
-} // namespace
-
-NECastKernel::NECastKernel() : _input(nullptr), _output(nullptr), _input_subtype(SubDataType::NONE)
-{
-}
-
-void NECastKernel::configure(const ITensor *input, ITensor *output, SubDataType input_subtype)
-{
-  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), input_subtype));
-
-  _input = input;
-  _output = output;
-  _input_subtype = input_subtype;
-
-  // Configure kernel window
-  auto win_config = validate_and_configure_window(input->info(), output->info());
-
-  ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
-
-  INEKernel::configure(std::get<1>(win_config));
-}
-
-Status NECastKernel::validate(const ITensorInfo *input, const ITensorInfo *output,
-                              SubDataType input_subtype)
-{
-  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, input_subtype));
-  ARM_COMPUTE_RETURN_ON_ERROR(
-      std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get())));
-  return Status{};
-}
-
-void NECastKernel::run(const Window &window, const ThreadInfo &info)
-{
-  ARM_COMPUTE_UNUSED(info);
-  ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-  ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
-
-  switch (_input->info()->data_type())
-  {
-    case DataType::U8:
-      if (_input_subtype == SubDataType::BOOL)
-      {
-        run_cast<bool>(_input, _output, window);
-      }
-      else
-      {
-        run_cast<uint8_t>(_input, _output, window);
-      }
-      break;
-    case DataType::QASYMM8:
-      run_cast_qasymm8(_input, _output, window);
-      break;
-    case DataType::U32:
-      run_cast<uint32_t>(_input, _output, window);
-      break;
-    case DataType::S32:
-      run_cast<int32_t>(_input, _output, window);
-      break;
-    case DataType::F32:
-      run_cast<float>(_input, _output, window);
-      break;
-    default:
-      ARM_COMPUTE_ERROR("Unsupported data type.");
-  }
-}
-} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.cpp

deleted file mode 100644 (file)

index 95e269d..0000000
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.cpp
+++ /dev/null
@@ -1,181 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/wrapper/wrapper.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
-#include <arm_neon.h>
-#include <cstdint>
-
-using namespace arm_compute::misc::shape_calculator;
-
-namespace arm_compute
-{
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape)
-{
-  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
-  ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4);
-  ARM_COMPUTE_RETURN_ERROR_ON(block_shape < 2);
-
-  const DataLayout data_layout = input->data_layout();
-  const int idx_channel =
-      get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
-  ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_channel] % (block_shape * block_shape) !=
-                              0);
-  // Validate output if initialized
-  if (output->total_size() != 0)
-  {
-    const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
-    const int idx_height =
-        get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
-    ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape()[idx_width] !=
-                                (block_shape * input->tensor_shape()[idx_width]));
-    ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape()[idx_height] !=
-                                (block_shape * input->tensor_shape()[idx_height]));
-    ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() > 4);
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-  }
-
-  return Status{};
-}
-} // namespace
-
-NEDepthToSpaceLayerKernelEx::NEDepthToSpaceLayerKernelEx()
-    : _input(nullptr), _output(nullptr), _block_shape()
-{
-}
-
-void NEDepthToSpaceLayerKernelEx::configure(const ITensor *input, ITensor *output,
-                                            int32_t block_shape)
-{
-  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-  TensorShape output_shape = compute_depth_to_space_shape_ex(input->info(), block_shape);
-  // Output auto inizialitation if not yet initialized
-  auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape));
-
-  // Perform validation step
-  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), block_shape));
-
-  _input = input;
-  _output = output;
-  _block_shape = block_shape;
-
-  // Configure kernel window
-  Window win = calculate_max_window(*input->info(), Steps());
-  ICPPKernel::configure(win);
-}
-
-Status NEDepthToSpaceLayerKernelEx::validate(const ITensorInfo *input, const ITensorInfo *output,
-                                             int32_t block_shape)
-{
-  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
-  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, block_shape));
-  return Status{};
-}
-
-void NEDepthToSpaceLayerKernelEx::run(const Window &window, const ThreadInfo &info)
-{
-  ARM_COMPUTE_UNUSED(info);
-  ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-  ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window);
-
-  const int idx_channel =
-      get_data_layout_dimension_index(_input->info()->data_layout(), DataLayoutDimension::CHANNEL);
-  const int depth_size = _input->info()->dimension(idx_channel);
-  const int r = (depth_size / (_block_shape * _block_shape));
-  const int element_size = _input->info()->element_size();
-
-  Window slice_out = window.first_slice_window_3D();
-
-  // The slice_out slice does not move
-  slice_out.set(Window::DimX, Window::Dimension(0, 0, 0));
-  slice_out.set(Window::DimY, Window::Dimension(0, 0, 0));
-  slice_out.set(Window::DimZ, Window::Dimension(0, 0, 0));
-
-  // Main loop for NCHW and NHWC
-  if (_input->info()->data_layout() == DataLayout::NCHW)
-  {
-    Window slice_in = window.first_slice_window_2D();
-    do
-    {
-      Iterator in(_input, slice_in);
-      execute_window_loop(slice_in,
-                          [&](const Coordinates &id) {
-                            const int x = id.x();
-                            const int y = id.y();
-
-                            const int z = id.z() % r;
-                            const int out_x = x * _block_shape + (id.z() / r) % _block_shape;
-                            const int out_y = y * _block_shape + (id.z() / r) / _block_shape;
-                            Coordinates output_coords{out_x, out_y, z, id[3]};
-                            memcpy(_output->ptr_to_element(output_coords), in.ptr(), element_size);
-                          },
-                          in);
-    } while (window.slide_window_slice_2D(slice_in));
-  }
-  else
-  {
-    Window slice_in = window.first_slice_window_3D();
-    do
-    {
-      Iterator in(_input, slice_in);
-      execute_window_loop(slice_in,
-                          [&](const Coordinates &id) {
-                            const int x = id.y();
-                            const int y = id.z();
-
-                            const int z = id.x() % r;
-                            const int out_x = x * _block_shape + (id.x() / r) % _block_shape;
-                            const int out_y = y * _block_shape + (id.x() / r) / _block_shape;
-                            Coordinates output_coords{z, out_x, out_y, id[3]};
-                            memcpy(_output->ptr_to_element(output_coords), in.ptr(), element_size);
-                          },
-                          in);
-    } while (window.slide_window_slice_3D(slice_in));
-  }
-}
-} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEElementwiseUnaryKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEElementwiseUnaryKernelEx.cpp

deleted file mode 100644 (file)

index 200fc4f..0000000
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEElementwiseUnaryKernelEx.cpp
+++ /dev/null
@@ -1,221 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/NEON/kernels/NEElementwiseUnaryKernelEx.h"
-
-#include "arm_compute/core/CPP/Validate.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/IAccessWindow.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/NEAsymm.h"
-#include "arm_compute/core/NEON/NEFixedPoint.h"
-#include "arm_compute/core/NEON/wrapper/wrapper.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-
-#include <algorithm>
-#include <arm_neon.h>
-#include <cstdint>
-#include <map>
-#include <string>
-
-namespace arm_compute
-{
-class Coordinates;
-
-namespace
-{
-template <ElementWiseUnaryEx op, typename ScalarType>
-inline ScalarType elementwise_op_scalar(const ScalarType &a)
-{
-  switch (op)
-  {
-    case ElementWiseUnaryEx::NEG:
-      return -a;
-    default:
-      ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
-  }
-}
-
-template <ElementWiseUnaryEx op, typename VectorType>
-inline VectorType elementwise_op(const VectorType &a)
-{
-  switch (op)
-  {
-    case ElementWiseUnaryEx::NEG:
-      return wrapper::vneg(a);
-    default:
-      ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
-  }
-}
-
-template <ElementWiseUnaryEx op, typename ScalarType>
-void elementwise_op(const ITensor *in, ITensor *out, const Window &window)
-{
-  const int window_step_x = 16 / sizeof(ScalarType);
-  const auto window_start_x = static_cast<int>(window.x().start());
-  const auto window_end_x = static_cast<int>(window.x().end());
-
-  Window win = window;
-  win.set(Window::DimX, Window::Dimension(0, 1, 1));
-
-  Iterator input(in, win);
-  Iterator output(out, win);
-
-  execute_window_loop(win,
-                      [&](const Coordinates &) {
-                        auto output_ptr = reinterpret_cast<ScalarType *>(output.ptr());
-                        const auto input_ptr = reinterpret_cast<const ScalarType *>(input.ptr());
-
-                        int x = window_start_x;
-                        for (; x <= window_end_x - window_step_x; x += window_step_x)
-                        {
-                          wrapper::vstore(output_ptr + x,
-                                          elementwise_op<op>(wrapper::vloadq(input_ptr + x)));
-                        }
-                        for (; x < window_end_x; ++x)
-                        {
-                          *(output_ptr + x) = elementwise_op_scalar<op>(*(input_ptr + x));
-                        }
-                      },
-                      input, output);
-}
-
-template <ElementWiseUnaryEx op>
-std::function<void(const ITensor *input, ITensor *output, const Window &window)>
-configure_func(const ITensor *input, ITensor *output)
-{
-  std::string function_to_call("op_");
-  function_to_call += string_from_data_type(input->info()->data_type()) + "_";
-  function_to_call += string_from_data_type(output->info()->data_type());
-
-  static std::map<std::string, NEElementwiseUnaryKernelEx::ElementwiseUnaryFunction *>
-      map_function = {
-          {"op_F32_F32", &elementwise_op<op, float>}, {"op_S32_S32", &elementwise_op<op, int32_t>},
-      };
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-  map_function["op_F16_F16"] = &elementwise_op<op, float16_t>;
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-
-  auto it = map_function.find(function_to_call);
-
-  if (it != map_function.end())
-  {
-    auto func = it->second;
-    return [func](const ITensor *input, ITensor *output, const Window &window) {
-      func(input, output, window);
-    };
-  }
-  return nullptr;
-}
-} // namespace
-
-NEElementwiseUnaryKernelEx::NEElementwiseUnaryKernelEx()
-    : _function(nullptr), _input(nullptr), _output(nullptr)
-{
-}
-
-void NEElementwiseUnaryKernelEx::configure(ElementWiseUnaryEx op, const ITensor *input,
-                                           ITensor *output)
-{
-  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input->info(), *output->info()));
-  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
-  // Configure kernel window
-  const std::pair<TensorShape, ValidRegion> broadcast_pair =
-      ITensorInfo::broadcast_shape_and_valid_region(*input->info());
-  const TensorShape &out_shape = broadcast_pair.first;
-  const ValidRegion &valid_region = broadcast_pair.second;
-
-  // Auto initialize output if not initialized
-  auto_init_if_empty(*output->info(), out_shape, 1, input->info()->data_type());
-
-  Window win = calculate_max_window(valid_region);
-
-  _input = input;
-  _output = output;
-
-  INEKernel::configure(win);
-
-  switch (op)
-  {
-    case ElementWiseUnaryEx::NEG:
-      _function = configure_func<ElementWiseUnaryEx::NEG>(input, output);
-      break;
-    default:
-      ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
-  }
-}
-
-Status NEElementwiseUnaryKernelEx::validate_arguments(const ITensorInfo &input,
-                                                      const ITensorInfo &output)
-{
-  ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&input);
-  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input, 1, DataType::F16, DataType::F32,
-                                                       DataType::S32);
-
-  // Validate in case of configured output
-  if (output.total_size() > 0)
-  {
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input, &output);
-  }
-
-  return Status{};
-}
-
-Status NEElementwiseUnaryKernelEx::validate(ElementWiseUnaryEx op, const ITensorInfo *input,
-                                            const ITensorInfo *output)
-{
-  ARM_COMPUTE_UNUSED(op);
-  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
-  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input, *output));
-  return Status{};
-}
-
-void NEElementwiseUnaryKernelEx::run(const Window &window, const ThreadInfo &info)
-{
-  ARM_COMPUTE_UNUSED(info);
-  ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-  ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
-  ARM_COMPUTE_ERROR_ON(_function == nullptr);
-  _function(_input, _output, window);
-}
-} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEPReLUKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEPReLUKernel.cpp

deleted file mode 100644 (file)

index 641641b..0000000
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEPReLUKernel.cpp
+++ /dev/null
@@ -1,291 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/NEON/kernels/NEPReLUKernel.h"
-
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/NEAsymm.h"
-#include "arm_compute/core/NEON/NEElementwiseOperationFuncs.h"
-#include "arm_compute/core/NEON/wrapper/wrapper.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Window.h"
-
-#include <arm_neon.h>
-
-using namespace arm_compute;
-namespace
-{
-
-/** Conditional element-wise operations */
-enum class ConditionalOperation
-{
-  PRELU, /**< (x * y) for x < 0, x for x >= 0 */
-};
-
-template <ConditionalOperation op, typename ScalarType>
-inline ScalarType elementwise_conditional_op_scalar(const ScalarType &a, const ScalarType &b)
-{
-  auto res = ScalarType(0);
-
-  switch (op)
-  {
-    case ConditionalOperation::PRELU:
-      res = a < 0 ? a * b : a;
-      break;
-    default:
-      ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
-  }
-  return res;
-}
-
-template <ConditionalOperation op>
-inline uint8_t elementwise_conditional_op_quantized_scalar(const float &a, const float &b,
-                                                           QuantizationInfo qinfo)
-{
-  return quantize_qasymm8(elementwise_conditional_op_scalar<op>(a, b), qinfo,
-                          RoundingPolicy::TO_NEAREST_UP);
-}
-
-template <ConditionalOperation op, typename VectorType>
-inline VectorType elementwise_conditional_op(const VectorType &a, const VectorType &b)
-{
-  VectorType res = {0, 0, 0, 0};
-  VectorType const_0 = {0, 0, 0, 0};
-
-  switch (op)
-  {
-    case ConditionalOperation::PRELU:
-      res = wrapper::vbsl(wrapper::vcgt(a, const_0), a, wrapper::vmul(a, b));
-      ;
-      break;
-    default:
-      ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
-  }
-  return res;
-}
-
-template <ConditionalOperation op>
-inline float32x4x4_t elementwise_conditional_op(const float32x4x4_t &a, const float32x4x4_t &b)
-{
-  float32x4x4_t out = {{
-      elementwise_conditional_op<op>(a.val[0], b.val[0]),
-      elementwise_conditional_op<op>(a.val[1], b.val[1]),
-      elementwise_conditional_op<op>(a.val[2], b.val[2]),
-      elementwise_conditional_op<op>(a.val[3], b.val[3]),
-  }};
-  return out;
-}
-
-template <ConditionalOperation op, typename ScalarType, typename VectorType>
-inline VectorType elementwise_conditional_op_broadcast(const VectorType &a,
-                                                       const ScalarType &broadcast_value,
-                                                       const bool reorder)
-{
-  VectorType broadcast_vector = wrapper::vdup_n(broadcast_value, wrapper::traits::vector_128_tag());
-  return elementwise_conditional_op<op>(reorder ? broadcast_vector : a,
-                                        reorder ? a : broadcast_vector);
-}
-
-template <ConditionalOperation op, typename ScalarType, typename VectorType>
-inline int elementwise_conditional_op_loop(int window_start_x, int window_end_x, int window_step_x,
-                                           const ScalarType *input1_ptr,
-                                           const ScalarType *input2_ptr, ScalarType *output_ptr)
-{
-  int x = window_start_x;
-  for (; x <= (window_end_x - window_step_x); x += window_step_x)
-  {
-    const auto a = wrapper::vloadq(input1_ptr + x);
-    const auto b = wrapper::vloadq(input2_ptr + x);
-    wrapper::vstore(output_ptr + x, elementwise_conditional_op<op>(a, b));
-  }
-  return x;
-}
-
-template <ConditionalOperation op>
-inline int elementwise_conditional_op_quantized_loop(int window_start_x, int window_end_x,
-                                                     int window_step_x, const uint8_t *input1_ptr,
-                                                     const uint8_t *input2_ptr, uint8_t *output_ptr,
-                                                     int32x4_t voffset1, int32x4_t voffset2,
-                                                     float32x4_t vscale1, float32x4_t vscale2,
-                                                     float32x4_t voffseto, float32x4_t invvscaleo)
-{
-  int x = window_start_x;
-  for (; x <= (window_end_x - window_step_x); x += window_step_x)
-  {
-    // Get inputs and compute output
-    const float32x4x4_t af = load_quantized(input1_ptr + x, voffset1, vscale1);
-    const float32x4x4_t bf = load_quantized(input2_ptr + x, voffset2, vscale2);
-    const float32x4x4_t rf = elementwise_conditional_op<op>(af, bf);
-    store_quantized(output_ptr + x, rf, voffseto, invvscaleo);
-  }
-  return x;
-}
-
-template <ConditionalOperation op, typename ScalarType, typename VectorType>
-inline int elementwise_conditional_op_broadcast_loop(int window_start_x, int window_end_x,
-                                                     int window_step_x,
-                                                     const ScalarType *non_broadcast_input_ptr,
-                                                     const ScalarType &broadcast_value,
-                                                     ScalarType *output_ptr, const bool reorder)
-{
-  int x = window_start_x;
-  for (; x <= (window_end_x - window_step_x); x += window_step_x)
-  {
-    const auto a = wrapper::vloadq((non_broadcast_input_ptr + x));
-    wrapper::vstore(output_ptr + x,
-                    elementwise_conditional_op_broadcast<op>(a, broadcast_value, reorder));
-  }
-  return x;
-}
-
-template <ConditionalOperation op>
-inline int elementwise_conditional_op_quantized_broadcast_loop(
-    int window_start_x, int window_end_x, int window_step_x, const uint8_t *non_broadcast_input_ptr,
-    float32x4x4_t broadcast_vector, uint8_t *output_ptr, int32x4_t voffset_non_broadcast,
-    float32x4_t vscale_non_broadcast, float32x4_t voffseto, float32x4_t invvscaleo, bool reorder)
-{
-  int x = window_start_x;
-  for (; x <= (window_end_x - window_step_x); x += window_step_x)
-  {
-    const float32x4x4_t af =
-        load_quantized(non_broadcast_input_ptr + x, voffset_non_broadcast, vscale_non_broadcast);
-    const float32x4x4_t rf = elementwise_conditional_op<op>(reorder ? broadcast_vector : af,
-                                                            reorder ? af : broadcast_vector);
-    store_quantized(output_ptr + x, rf, voffseto, invvscaleo);
-  }
-  return x;
-}
-
-template <ConditionalOperation op, typename ScalarType, typename VectorType>
-void elementwise_conditional_op(const ITensor *in1, const ITensor *in2, ITensor *out,
-                                const Window &window)
-{
-  elementwise_op(in1, in2, out, window, &elementwise_conditional_op_scalar<op, ScalarType>,
-                 &elementwise_conditional_op_broadcast_loop<op, ScalarType, VectorType>,
-                 &elementwise_conditional_op_loop<op, ScalarType, VectorType>);
-}
-
-template <ConditionalOperation op>
-void elementwise_conditional_op_quantized(const ITensor *in1, const ITensor *in2, ITensor *out,
-                                          const Window &window)
-{
-  elementwise_op_quantized(in1, in2, out, window, &elementwise_conditional_op_quantized_scalar<op>,
-                           &elementwise_conditional_op_quantized_broadcast_loop<op>,
-                           &elementwise_conditional_op_quantized_loop<op>);
-}
-} // namespace
-
-NEPReLUKernel::NEPReLUKernel() : _input(nullptr), _alpha(nullptr), _output(nullptr) {}
-
-void NEPReLUKernel::configure(const ITensor *input, const ITensor *alpha, ITensor *output)
-{
-  ARM_COMPUTE_ERROR_ON_NULLPTR(input, alpha, output);
-  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input->info(), *alpha->info(), *output->info()));
-
-  // Configure kernel window
-  const std::pair<TensorShape, ValidRegion> broadcast_pair =
-      ITensorInfo::broadcast_shape_and_valid_region(*input->info(), *alpha->info());
-  const TensorShape &out_shape = broadcast_pair.first;
-  const ValidRegion &valid_region = broadcast_pair.second;
-
-  // Auto initialize output if not initialized
-  auto_init_if_empty(*output->info(), out_shape, 1, input->info()->data_type());
-
-  Window win = calculate_max_window(valid_region);
-
-  _input = input;
-  _alpha = alpha;
-  _output = output;
-  INEKernel::configure(win);
-}
-
-void NEPReLUKernel::run(const Window &window, const ThreadInfo &info)
-{
-  ARM_COMPUTE_UNUSED(info);
-  ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-  ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
-  if (_input->info()->data_type() == DataType::F32)
-  {
-    elementwise_conditional_op<ConditionalOperation::PRELU, float, float32x4_t>(_input, _alpha,
-                                                                                _output, window);
-  }
-  else if (_input->info()->data_type() == DataType::QASYMM8)
-  {
-    elementwise_conditional_op_quantized<ConditionalOperation::PRELU>(_input, _alpha, _output,
-                                                                      window);
-  }
-  else
-  {
-    ARM_COMPUTE_ERROR("Wrong Type");
-  }
-}
-
-Status NEPReLUKernel::validate_arguments(const ITensorInfo &input, const ITensorInfo &alpha,
-                                         const ITensorInfo &output)
-{
-  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input, 1, DataType::QASYMM8, DataType::F32);
-  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input, &alpha, &output);
-
-  const TensorShape out_shape =
-      TensorShape::broadcast_shape(input.tensor_shape(), alpha.tensor_shape());
-
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0,
-                                  "Inputs are not broadcast compatible");
-
-  // Checks performed when output is configured
-  if (output.total_size() > 0)
-  {
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(
-        detail::have_different_dimensions(out_shape, output.tensor_shape(), 0),
-        "Wrong shape for output");
-  }
-
-  return Status{};
-}
-
-Status NEPReLUKernel::validate(const ITensorInfo *input, const ITensorInfo *alpha,
-                               const ITensorInfo *output)
-{
-  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, alpha, output);
-  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input, *alpha, *output));
-
-  return Status{};
-}
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp

index 6ba0f1f..5841f1d 100644 (file)
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp
@@ -64,7 +64,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
    ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 2);
    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
    ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape().total_size() == 0);
-  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S8);
+  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8_SIGNED);
    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(scale_factor, 1, DataType::F16,
                                                         DataType::F32);
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NESpaceToDepthLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NESpaceToDepthLayerKernelEx.cpp

deleted file mode 100644 (file)

index 44feb20..0000000
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NESpaceToDepthLayerKernelEx.cpp
+++ /dev/null
@@ -1,181 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernelEx.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/wrapper/wrapper.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
-#include <arm_neon.h>
-#include <cstdint>
-
-using namespace arm_compute::misc::shape_calculator;
-
-namespace arm_compute
-{
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape)
-{
-  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
-  ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4);
-
-  ARM_COMPUTE_RETURN_ERROR_ON(block_shape < 1);
-
-  // Validate output if initialized
-  if (output->total_size() != 0)
-  {
-    const DataLayout data_layout = input->data_layout();
-    const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
-    const int idx_height =
-        get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
-    const int idx_channel =
-        get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
-    const int idx_batch =
-        get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
-    ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_width] % block_shape != 0);
-    ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_height] % block_shape != 0);
-    ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_batch] !=
-                                output->tensor_shape()[idx_batch]);
-    ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape()[idx_channel] % (block_shape * block_shape) !=
-                                0);
-    ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape().total_size() !=
-                                output->tensor_shape().total_size());
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-  }
-
-  return Status{};
-}
-} // namespace
-
-NESpaceToDepthLayerKernelEx::NESpaceToDepthLayerKernelEx()
-    : _input(nullptr), _output(nullptr), _block_shape()
-{
-}
-
-void NESpaceToDepthLayerKernelEx::configure(const ITensor *input, ITensor *output,
-                                            int32_t block_shape)
-{
-  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
-  TensorShape output_shape = compute_space_to_depth_shape_ex(input->info(), block_shape);
-  auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type());
-
-  ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), block_shape));
-
-  _input = input;
-  _block_shape = block_shape;
-  _output = output;
-
-  // Configure kernel window
-  Window win = calculate_max_window(*output->info(), Steps());
-  INEKernel::configure(win);
-}
-
-Status NESpaceToDepthLayerKernelEx::validate(const ITensorInfo *input, const ITensorInfo *output,
-                                             int32_t block_shape)
-{
-  ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, block_shape));
-  return Status{};
-}
-
-void NESpaceToDepthLayerKernelEx::run(const Window &window, const ThreadInfo &info)
-{
-  ARM_COMPUTE_UNUSED(info);
-  ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-  ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window);
-
-  const DataLayout data_layout = _input->info()->data_layout();
-  const int channel_idx =
-      get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
-  const int element_size = _input->info()->element_size();
-
-  const size_t channel_size = _input->info()->dimension(channel_idx);
-
-  Window slice_out = window.first_slice_window_3D();
-
-  int batch_id = 0;
-
-  // Main loop for NCHW and NHWC
-  if (_output->info()->data_layout() == DataLayout::NCHW)
-  {
-    do
-    {
-      Iterator out(_output, slice_out);
-      execute_window_loop(slice_out,
-                          [&](const Coordinates &id) {
-                            const size_t channel_id = id.z();
-                            const size_t in_x =
-                                id.x() * _block_shape + (channel_id / channel_size) % _block_shape;
-                            const size_t in_y =
-                                id.y() * _block_shape + (channel_id / channel_size) / _block_shape;
-                            const int z = channel_id % channel_size;
-                            Coordinates input_coords{in_x, in_y, z, batch_id};
-                            memcpy(out.ptr(), _input->ptr_to_element(input_coords), element_size);
-                          },
-                          out);
-      ++batch_id;
-    } while (window.slide_window_slice_3D(slice_out));
-  }
-  else
-  {
-    do
-    {
-      Iterator out(_output, slice_out);
-      execute_window_loop(slice_out,
-                          [&](const Coordinates &id) {
-                            const size_t channel_id = id.x();
-                            const size_t in_x =
-                                id.y() * _block_shape + (channel_id / channel_size) % _block_shape;
-                            const size_t in_y =
-                                id.z() * _block_shape + (channel_id / channel_size) / _block_shape;
-                            const int z = channel_id % channel_size;
-                            Coordinates input_coords{z, in_x, in_y, batch_id};
-                            memcpy(out.ptr(), _input->ptr_to_element(input_coords), element_size);
-                          },
-                          out);
-      ++batch_id;
-    } while (window.slide_window_slice_3D(slice_out));
-  }
-}
-} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLArgOperation.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLArgOperation.cpp

deleted file mode 100644 (file)

index 2d379cf..0000000
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLArgOperation.cpp
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/CL/functions/CLArgOperation.h"
-
-#include "arm_compute/core/CL/kernels/CLArgOperationKernel.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-
-namespace arm_compute
-{
-
-CLArgOperation::CLArgOperation()
-{
-  // DO NOTHING
-}
-
-void CLArgOperation::configure(ICLTensor *input, ICLTensor *output, std::vector<uint32_t> axis,
-                               ArgOperation op)
-{
-  ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), axis, output->info(), op));
-  _input = input;
-  _output = output;
-  _axis = axis;
-  _arg_op = op;
-  // NOTE The argminmax_axis must have no duplication.
-  _num_of_kernels = axis.size();
-  const size_t num_of_interm_tensors = _num_of_kernels - 1;
-
-  _interm_tensors = arm_compute::support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors);
-  _argop_kernels =
-      arm_compute::support::cpp14::make_unique<CLArgOperationKernel[]>(_num_of_kernels);
-
-  TensorShape shape{input->info()->tensor_shape()};
-  for (size_t i = 0; i < num_of_interm_tensors; i++)
-  {
-    shape.set(_axis[i], 1);
-    _interm_tensors[i].allocator()->init(
-        TensorInfo(shape, input->info()->num_channels(), input->info()->data_type())
-            .set_data_layout(input->info()->data_layout()));
-    _interm_tensors[i].allocator()->allocate();
-  }
-
-  // Set a vector that is ordered ICLTensors sequentially.
-  std::vector<ICLTensor *> tensors;
-  tensors.emplace_back(input);
-  for (size_t i = 0; i < num_of_interm_tensors; i++)
-  {
-    tensors.emplace_back(_interm_tensors.get() + i);
-  }
-  tensors.emplace_back(output);
-
-  // Apply ArgMinMax on all kernels
-  for (size_t i = 0; i < _num_of_kernels; i++)
-  {
-    _argop_kernels[i].configure(tensors[i], tensors[i + 1], _axis[i], op);
-  }
-}
-
-Status CLArgOperation::validate(const ITensorInfo *input, const std::vector<uint32_t> &axis,
-                                const ITensorInfo *output, ArgOperation op)
-{
-  const size_t num_of_kernels = axis.size();
-  const size_t num_of_interm_tensors = num_of_kernels - 1;
-
-  // Create temporary tensor infos
-  auto interm_tensors =
-      arm_compute::support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors);
-
-  // Create intermediate tensor info
-  TensorShape shape{input->tensor_shape()};
-
-  for (size_t i = 0; i < num_of_interm_tensors; i++)
-  {
-    shape.set(axis[i], 1);
-    interm_tensors[i].set_data_type(input->data_type());
-    interm_tensors[i].set_tensor_shape(shape);
-    interm_tensors[i].set_num_channels(input->num_channels());
-  }
-
-  // Set a vector that is ordered ITensorInfo sequentially.
-  std::vector<const ITensorInfo *> tensors;
-  tensors.emplace_back(input);
-  for (size_t i = 0; i < num_of_interm_tensors; i++)
-  {
-    tensors.emplace_back(interm_tensors.get() + i);
-  }
-  tensors.emplace_back(output);
-
-  // Validate argminmax only on all kernels
-  for (size_t i = 0; i < num_of_kernels; i++)
-  {
-    ARM_COMPUTE_RETURN_ON_ERROR(
-        CLArgOperationKernel::validate(tensors[i], tensors[i + 1], axis[i], op));
-  }
-
-  return Status{};
-}
-
-void CLArgOperation::run()
-{
-  for (size_t i = 0; i < _num_of_kernels; ++i)
-  {
-    CLScheduler::get().enqueue(_argop_kernels[i]);
-  }
-}
-
-} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp

index 92ee69a..e5122ab 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp
@@ -48,7 +48,7 @@ using namespace arm_compute;
  void CLBinaryLogicalOp::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output,
                                    BinaryLogicalOperation op)
  {
-  auto k = arm_compute::support::cpp14::make_unique<CLBinaryLogicalOpKernel>();
+  auto k = support::cpp14::make_unique<CLBinaryLogicalOpKernel>();
    k->configure(input1, input2, output, op);
    _kernel = std::move(k);
  
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp

deleted file mode 100644 (file)

index b3118f3..0000000
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/CL/functions/CLCast.h"
-
-#include "arm_compute/core/CL/kernels/CLCastKernel.h"
-
-using namespace arm_compute;
-
-void CLCast::configure(ICLTensor *input, ICLTensor *output, SubDataType input_subtype)
-{
-  auto k = arm_compute::support::cpp14::make_unique<CLCastKernel>();
-  k->configure(input, output, input_subtype);
-  _kernel = std::move(k);
-}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp

deleted file mode 100644 (file)

index db66250..0000000
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/CL/functions/CLDepthToSpace.h"
-
-#include "arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h"
-
-using namespace arm_compute;
-
-void CLDepthToSpace::configure(ICLTensor *input, ICLTensor *output, const int32_t block_size)
-{
-  auto k = arm_compute::support::cpp14::make_unique<CLDepthToSpaceKernel>();
-  k->configure(input, output, block_size);
-  _kernel = std::move(k);
-}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp

new file mode 100644 (file)

index 0000000..3dede05
--- /dev/null
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLDirectTransposeConvLayer.cpp
@@ -0,0 +1,267 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLDirectTransposeConvLayer.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/UtilsEx.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+
+#include <memory>
+#include <tuple>
+
+namespace arm_compute
+{
+using namespace arm_compute::misc::shape_calculator;
+
+CLDirectTransposeConvLayer::CLDirectTransposeConvLayer(
+    std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
+    : _memory_group(std::move(memory_manager)),
+      _scale_f(),
+      _conv_f(),
+      _flip_weights(),
+      _scaled_output(),
+      _original_weights(nullptr),
+      _weights_flipped(),
+      _flip_axis(),
+      _is_prepared(false)
+{
+}
+
+Status CLDirectTransposeConvLayer::validate(const ITensorInfo *input, const ITensorInfo *weights,
+                                            const ITensorInfo *bias, ITensorInfo *output,
+                                            const PadStrideInfo &info, unsigned int invalid_right,
+                                            unsigned int invalid_bottom,
+                                            const WeightsInfo &weights_info)
+{
+  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
+  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
+      input, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::F16, DataType::F32);
+  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights);
+  const DataLayout data_layout = input->data_layout();
+
+  const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+  const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+  const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
+
+  ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) != weights->dimension(idx_h));
+  ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) < 1);
+
+  auto out_dims = transposeconv_output_dimensions(
+      input->dimension(idx_w), input->dimension(idx_h), weights->dimension(idx_w),
+      weights->dimension(idx_h), info, invalid_right, invalid_bottom);
+
+  const TensorShape output_shape = compute_transposeconv_output_shape(out_dims, *input, *weights);
+
+  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights);
+
+  if (bias != nullptr)
+  {
+    if (is_data_type_quantized_asymmetric(input->data_type()))
+    {
+      ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32);
+    }
+    else
+    {
+      ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias);
+    }
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, bias);
+  }
+
+  ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_w) != output_shape[idx_w],
+                                  "Output's width is invalid.");
+  ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_h) != output_shape[idx_h],
+                                  "Output's height is invalid.");
+  ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_c) != output_shape[idx_c],
+                                  "Output's depth is invalid.");
+
+  unsigned int pad_left = 0;
+  unsigned int pad_right = 0;
+  unsigned int pad_top = 0;
+  unsigned int pad_bottom = 0;
+  const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
+      *input, *weights, info, out_dims, invalid_right, invalid_bottom, pad_left, pad_right, pad_top,
+      pad_bottom);
+  TensorInfo scale_out_info(input->clone()
+                                ->set_is_resizable(true)
+                                .reset_padding()
+                                .set_tensor_shape(scale_out_shape)
+                                .set_data_layout(data_layout));
+  const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
+
+  ARM_COMPUTE_RETURN_ON_ERROR(CLDeconvolutionLayerUpsample::validate(input, &scale_out_info, info));
+  ARM_COMPUTE_RETURN_ON_ERROR(CLConvolutionLayer::validate(&scale_out_info, weights, bias, output,
+                                                           conv_info, weights_info));
+
+  return Status{};
+}
+
+void CLDirectTransposeConvLayer::configure(ICLTensor *input, ICLTensor *weights,
+                                           const ICLTensor *bias, ICLTensor *output,
+                                           const PadStrideInfo &info, unsigned int invalid_right,
+                                           unsigned int invalid_bottom,
+                                           const WeightsInfo &weights_info)
+{
+  configure(CLKernelLibrary::get().get_compile_context(), input, weights, bias, output, info,
+            invalid_right, invalid_bottom, weights_info);
+}
+
+void CLDirectTransposeConvLayer::configure(const CLCompileContext &compile_context,
+                                           ICLTensor *input, ICLTensor *weights,
+                                           const ICLTensor *bias, ICLTensor *output,
+                                           const PadStrideInfo &info, unsigned int invalid_right,
+                                           unsigned int invalid_bottom,
+                                           const WeightsInfo &weights_info)
+{
+  ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
+
+  unsigned int pad_left = 0;
+  unsigned int pad_right = 0;
+  unsigned int pad_top = 0;
+  unsigned int pad_bottom = 0;
+  const unsigned int stride_x = info.stride().first;
+  const unsigned int stride_y = info.stride().second;
+
+  const DataLayout data_layout = input->info()->data_layout();
+
+  const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+  const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+
+  _original_weights = weights;
+  _flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32));
+  _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout));
+  _flip_weights.configure(compile_context, weights, &_weights_flipped, &_flip_axis);
+
+  auto out_dims = transposeconv_output_dimensions(
+      input->info()->dimension(idx_w), input->info()->dimension(idx_h),
+      weights->info()->dimension(idx_w), weights->info()->dimension(idx_h), info, invalid_right,
+      invalid_bottom);
+
+  const TensorShape output_shape =
+      compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info());
+
+  // Output auto initialization if not yet initialized
+  auto_init_if_empty(
+      *output->info(),
+      input->info()->clone()->set_tensor_shape(output_shape).set_data_layout(data_layout));
+
+  // Perform validation step
+  ARM_COMPUTE_ERROR_THROW_ON(CLDirectTransposeConvLayer::validate(
+      input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(),
+      info, invalid_right, invalid_bottom));
+
+  _is_prepared = weights_info.retain_internal_weights();
+
+  _memory_group.manage(&_scaled_output);
+
+  // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order
+  // to match output shape
+  const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
+      *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left,
+      pad_right, pad_top, pad_bottom);
+
+  TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(),
+                            input->info()->quantization_info());
+  scale_out_info.set_data_layout(data_layout);
+  _scaled_output.allocator()->init(scale_out_info);
+
+  // configure scale function
+  const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom,
+                                    DimensionRoundingType::FLOOR);
+  _scale_f.configure(input, &_scaled_output, upsample_info);
+
+  // Setup the function to convolve the upscaled output
+  const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
+  _conv_f.configure(compile_context, &_scaled_output, &_weights_flipped, bias, output, conv_info,
+                    weights_info);
+  _scaled_output.allocator()->allocate();
+
+  // Setup flip axis data
+  _flip_axis.allocator()->allocate();
+  _flip_axis.map(true);
+  auto axis_data = reinterpret_cast<uint32_t *>(_flip_axis.buffer());
+  if (weights->info()->data_layout() == DataLayout::NHWC)
+  {
+    axis_data[0] = 1;
+    axis_data[1] = 2;
+  }
+  else
+  {
+    axis_data[0] = 0;
+    axis_data[1] = 1;
+  }
+  _flip_axis.unmap();
+}
+
+void CLDirectTransposeConvLayer::run()
+{
+  prepare();
+
+  MemoryGroupResourceScope scope_mg(_memory_group);
+
+  _scale_f.run();
+  _conv_f.run();
+}
+
+void CLDirectTransposeConvLayer::prepare()
+{
+  if (!_is_prepared)
+  {
+    ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
+
+    // Run weights flipping and mark original weights tensor as unused
+    _weights_flipped.allocator()->allocate();
+    _flip_weights.run();
+    _original_weights->mark_as_unused();
+
+    // Prepare convolution
+    _conv_f.prepare();
+
+    // Free flipped weights
+    if (!_weights_flipped.is_used())
+    {
+      _weights_flipped.allocator()->free();
+    }
+
+    _is_prepared = true;
+  }
+}
+} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp

index 3d9a28a..ae9d8af 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp
@@ -47,7 +47,7 @@ using namespace arm_compute;
  void CLEmbeddingLookup::configure(const ICLTensor *input, ICLTensor *output,
                                    const ICLTensor *lookups)
  {
-  auto k = arm_compute::support::cpp14::make_unique<CLEmbeddingLookupKernel>();
+  auto k = support::cpp14::make_unique<CLEmbeddingLookupKernel>();
    k->configure(input, output, lookups);
    _kernel = std::move(k);
  }
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp

index f098832..0198946 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp
@@ -45,7 +45,7 @@
  #include "arm_compute/core/utils/misc/ShapeCalculator.h"
  #include "arm_compute/core/utils/quantization/AsymmHelpers.h"
  #include "arm_compute/runtime/CL/CLScheduler.h"
-#include "support/ToolchainSupport.h"
+#include "support/MemorySupport.h"
  
  #include <algorithm>
  
@@ -60,7 +60,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
    ARM_COMPUTE_UNUSED(weights);
    ARM_COMPUTE_UNUSED(output);
    ARM_COMPUTE_RETURN_ON_ERROR(
-      CLGEMMLowpMatrixMultiplyCoreEx::validate(&input, &weights, nullptr, &output));
+      CLGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output));
  
    return Status{};
  }
@@ -68,7 +68,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
  
  void CLFullyConnectedHybridLayerReshapeWeights::configure(const ICLTensor *input, ICLTensor *output)
  {
-  auto k = arm_compute::support::cpp14::make_unique<CLTransposeKernel>();
+  auto k = support::cpp14::make_unique<CLTransposeKernel>();
    k->configure(input, output);
    _kernel = std::move(k);
  }
@@ -172,7 +172,8 @@ void CLFullyConnectedHybridLayer::configure(const ICLTensor *input, const ICLTen
  
    // Quantize input
    _quantized_input.allocator()->init(
-      input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S8));
+      input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(
+          DataType::QASYMM8_SIGNED));
    _memory_group.manage(&_quantized_input);
    _quant_input_kernel.configure(input, &_scale_factor, &_quantized_input);
  
@@ -199,7 +200,7 @@ Status CLFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe
  {
    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
-  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::S8);
+  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8_SIGNED);
    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
    ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2);
  
@@ -256,8 +257,9 @@ Status CLFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe
    ARM_COMPUTE_RETURN_ON_ERROR(CLScaleFactorSymm8Kernel::validate(input, &scale_factor));
  
    // Validate quantization symm8 kernel
-  const ITensorInfo &quantized_input = TensorInfo(
-      input->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S8));
+  const ITensorInfo &quantized_input =
+      TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_data_type(
+          DataType::QASYMM8_SIGNED));
    ARM_COMPUTE_RETURN_ON_ERROR(
        CLQuantizationSymmetricKernel::validate(input, &scale_factor, &quantized_input));
  
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp

index 63e291b..2ff4b96 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp
@@ -46,7 +46,7 @@
  #include "arm_compute/core/utils/misc/ShapeCalculator.h"
  #include "arm_compute/core/utils/quantization/AsymmHelpers.h"
  #include "arm_compute/runtime/CL/CLScheduler.h"
-#include "support/ToolchainSupport.h"
+#include "support/MemorySupport.h"
  
  #include <algorithm>
  
@@ -141,7 +141,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
  
  void CLFullyConnectedLayerReshapeWeightsEx::configure(const ICLTensor *input, ICLTensor *output)
  {
-  auto k = arm_compute::support::cpp14::make_unique<CLTransposeKernel>();
+  auto k = support::cpp14::make_unique<CLTransposeKernel>();
    k->configure(input, output);
    _kernel = std::move(k);
  }
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp

index 9aebc47..157b4d9 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp
@@ -53,18 +53,21 @@ void CLFullyConnectedReshapingLayer::configure(const arm_compute::ICLTensor *inp
        fc->configure(input_to_use, _weights, _biases, _output);
        return std::unique_ptr<arm_compute::IFunction>(fc);
      }
-    else
+    else if (kernel_type == KernelType::PREPROCESSED_WEIGHTS)
      {
-      assert(kernel_type == KernelType::PREPROCESSED_WEIGHTS);
-
        bool is_hybrid = (input->info()->data_type() == DataType::F32 ||
                          input->info()->data_type() == DataType::F16) &&
-                       weights->info()->data_type() == DataType::S8;
+                       (weights->info()->data_type() == DataType::S8 ||
+                        weights->info()->data_type() == DataType::QASYMM8_SIGNED);
  
        if (is_hybrid)
        {
          auto fc = new arm_compute::CLFullyConnectedHybridLayer{_memory_manager};
+        ITensorInfo *weights_info = const_cast<ITensorInfo *>(_weights->info());
+        const auto orgin_weights_data_type = weights_info->data_type();
+        weights_info->set_data_type(DataType::QASYMM8_SIGNED);
          fc->configure(input_to_use, _weights, _biases, _output);
+        weights_info->set_data_type(orgin_weights_data_type);
          return std::unique_ptr<arm_compute::IFunction>(fc);
        }
        else
@@ -74,6 +77,11 @@ void CLFullyConnectedReshapingLayer::configure(const arm_compute::ICLTensor *inp
          return std::unique_ptr<arm_compute::IFunction>(fc);
        }
      }
+    else
+    {
+      throw std::runtime_error("CLFullyConnectedReshapingLayer: Unsupported kernel type");
+    }
+
    }();
  
    if (_needs_reshape)
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.cpp

deleted file mode 100644 (file)

index ca5499d..0000000
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.cpp
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.h"
-
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-
-namespace arm_compute
-{
-using namespace arm_compute::misc::shape_calculator;
-using namespace arm_compute::cl_gemm;
-
-namespace
-{
-inline bool is_gemm_reshaped(bool reshape_b_only_on_first_run, GPUTarget gpu_target)
-{
-  return (get_arch_from_target(gpu_target) != GPUTarget::MIDGARD) && (reshape_b_only_on_first_run);
-}
-} // namespace
-
-CLGEMMLowpMatrixMultiplyCoreEx::CLGEMMLowpMatrixMultiplyCoreEx(
-    std::shared_ptr<IMemoryManager> memory_manager)
-    : _memory_group(std::move(memory_manager)), _mm_midgard_kernel(), _mtx_a_reduction_kernel(),
-      _mtx_b_reduction_kernel(), _vector_sum_col(), _vector_sum_row(), _a_offset(0), _b_offset(0),
-      _reshape_b_only_on_first_run(false), _is_prepared(false)
-{
-}
-
-void CLGEMMLowpMatrixMultiplyCoreEx::configure(const ICLTensor *a, const ICLTensor *b,
-                                               const ICLTensor *c, ICLTensor *output,
-                                               const GEMMInfo &gemm_info)
-{
-  ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output);
-  ARM_COMPUTE_UNUSED(c);
-  ARM_COMPUTE_ERROR_THROW_ON(CLGEMMLowpMatrixMultiplyCoreEx::validate(
-      a->info(), b->info(), c != nullptr ? c->info() : nullptr, output->info(), gemm_info));
-
-  _is_prepared = false;
-  _reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();
-  _a_offset = a->info()->quantization_info().uniform().offset;
-  _b_offset = b->info()->quantization_info().uniform().offset;
-
-  // Get the GPU target
-  const GPUTarget gpu_target = CLScheduler::get().target();
-
-  // Set the target for the kernels
-  _mm_midgard_kernel.set_target(gpu_target);
-
-  // GEMMRHSMatrixInfo rhs_info;
-  // GEMMLHSMatrixInfo lhs_info;
-
-  // Arguments used by GEMMReshapeInfo
-  // If we pass the matrix A and matrix B reshaped to CLGEMMMatrixMultiplyKernel, we need to pass m,
-  // n, k, mult_transpose1xW_width and mult_interleave4x4_height to CLGEMMReshapeInfo
-  // in order to know how the matrices have been reshaped
-  bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
-  const unsigned int m = reinterpret_input_as_3d
-                             ? (a->info()->dimension(1) * a->info()->dimension(2))
-                             : a->info()->dimension(1);
-  const unsigned int n = b->info()->dimension(0);
-  const unsigned int k = a->info()->dimension(0);
-  const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
-
-  const ICLTensor *matrix_b = b;
-  // Configure matrix multiply kernel
-  _mm_midgard_kernel.configure(
-      a, matrix_b, output,
-      GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d));
-}
-
-Status CLGEMMLowpMatrixMultiplyCoreEx::validate(const ITensorInfo *a, const ITensorInfo *b,
-                                                const ITensorInfo *c, const ITensorInfo *output,
-                                                const GEMMInfo &gemm_info)
-{
-  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::S8);
-  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(a, b);
-  ARM_COMPUTE_UNUSED(c);
-
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_a_reshaped(),
-                                  "Matrix A already reshaped is not supported");
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_b_reshaped(),
-                                  "Matrix B already reshaped is not supported");
-
-  const ITensorInfo *matrix_a_info = a;
-
-  // Get the GPU target
-  const GPUTarget gpu_target = CLScheduler::get().target();
-
-  bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
-  const unsigned int m =
-      reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
-  const unsigned int n = b->dimension(0);
-  const unsigned int k = a->dimension(0);
-  const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
-
-  bool reshape_matrix_b = is_gemm_reshaped(gemm_info.reshape_b_only_on_first_run(), gpu_target);
-
-  const GEMMReshapeInfo reshape_info =
-      GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d);
-
-  TensorInfo weights_info(*b);
-  const ITensorInfo *matrix_b_info = &weights_info;
-  if (reshape_matrix_b)
-  {
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(false,
-                                    "CLGEMMLowpMatrixMultiplyCoreEx does not support reshape_b");
-  }
-
-  // Validate matrix multiply
-  ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyKernelEx::validate(
-      matrix_a_info, matrix_b_info, output, reshape_info));
-
-  return Status{};
-}
-
-void CLGEMMLowpMatrixMultiplyCoreEx::run()
-{
-  prepare();
-
-  MemoryGroupResourceScope scope_mg(_memory_group);
-
-  // Run matrix multiply
-  CLScheduler::get().enqueue(_mm_midgard_kernel, false);
-}
-
-void CLGEMMLowpMatrixMultiplyCoreEx::prepare()
-{
-  if (!_is_prepared)
-  {
-    _is_prepared = true;
-  }
-}
-} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp

index f594d7a..e0b833b 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp
@@ -48,7 +48,7 @@ using namespace arm_compute;
  void CLGatherEx::configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output,
                             int axis)
  {
-  auto k = arm_compute::support::cpp14::make_unique<CLGatherExKernel>();
+  auto k = support::cpp14::make_unique<CLGatherExKernel>();
    k->configure(input, indices, output, axis);
    _kernel = std::move(k);
  }
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp

index 27ed8e8..65b89a3 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp
@@ -47,7 +47,7 @@ using namespace arm_compute;
  void CLHashtableLookup::configure(const ICLTensor *lookups, const ICLTensor *keys,
                                    const ICLTensor *input, ICLTensor *output, ICLTensor *hits)
  {
-  auto k = arm_compute::support::cpp14::make_unique<CLHashtableLookupKernel>();
+  auto k = support::cpp14::make_unique<CLHashtableLookupKernel>();
    k->configure(lookups, keys, input, output, hits);
    _kernel = std::move(k);
  }
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp

index 80393e8..5a7e408 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp
@@ -50,7 +50,7 @@ CLInstanceNormalizationLayerEx::CLInstanceNormalizationLayerEx() {}
  void CLInstanceNormalizationLayerEx::configure(ICLTensor *input, ICLTensor *output,
                                                 ICLTensor *gamma, ICLTensor *beta, float epsilon)
  {
-  auto k = arm_compute::support::cpp14::make_unique<CLInstanceNormalizationLayerKernelEx>();
+  auto k = support::cpp14::make_unique<CLInstanceNormalizationLayerKernelEx>();
    k->configure(input, output, gamma, beta, epsilon);
    _kernel = std::move(k);
  }
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp

deleted file mode 100644 (file)

index fbb15ab..0000000
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/CL/functions/CLPReLU.h"
-
-#include "arm_compute/core/CL/kernels/CLPReLUKernel.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-void CLPReLU::configure(ICLTensor *input, ICLTensor *alpha, ICLTensor *output)
-{
-  auto k = arm_compute::support::cpp14::make_unique<CLPReLUKernel>();
-  k->configure(input, alpha, output);
-  _kernel = std::move(k);
-
-  if (output->info()->dimension(0) > 1)
-  {
-    ICLTensor *broadcasted_info = (input->info()->dimension(0) == 1) ? input : alpha;
-
-    if (broadcasted_info->info()->dimension(0) == 1)
-    {
-      _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
-    }
-  }
-}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLRNNLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLRNNLayerEx.cpp

deleted file mode 100644 (file)

index 6049b7e..0000000
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLRNNLayerEx.cpp
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/CL/functions/CLRNNLayerEx.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "support/ToolchainSupport.h"
-
-#include <utility>
-
-using namespace arm_compute;
-using namespace arm_compute::misc::shape_calculator;
-
-CLRNNLayerEx::CLRNNLayerEx(std::shared_ptr<IMemoryManager> memory_manager)
-    : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_kernel(),
-      _activation_kernel(), _fully_connected_kernel(), _copy_kernel(), _fully_connected_out(),
-      _gemm_output(), _add_output(), _is_prepared(false)
-{
-}
-
-Status CLRNNLayerEx::validate(const ITensorInfo *input, const ITensorInfo *weights,
-                              const ITensorInfo *recurrent_weights, const ITensorInfo *bias,
-                              const ITensorInfo *hidden_state, const ITensorInfo *output,
-                              const ActivationLayerInfo &info)
-{
-  const int idx_width = 0;
-  const int idx_height = 1;
-  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, recurrent_weights, bias, hidden_state,
-                                      output);
-  ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(idx_width) != weights->dimension(idx_width));
-  ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_height) !=
-                              recurrent_weights->dimension(idx_width));
-  ARM_COMPUTE_RETURN_ERROR_ON(recurrent_weights->dimension(idx_width) !=
-                              recurrent_weights->dimension(1));
-  ARM_COMPUTE_RETURN_ERROR_ON(bias->num_dimensions() != 1);
-  ARM_COMPUTE_RETURN_ERROR_ON(bias->dimension(idx_width) != weights->dimension(idx_height));
-  ARM_COMPUTE_RETURN_ERROR_ON(hidden_state->dimension(idx_width) != weights->dimension(idx_height));
-  ARM_COMPUTE_RETURN_ERROR_ON(hidden_state->dimension(idx_height) != input->dimension(idx_height));
-  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(),
-                                                     hidden_state->tensor_shape());
-
-  auto shape_info =
-      TensorInfo(compute_rnn_shape(recurrent_weights, hidden_state->dimension(idx_height)), 1,
-                 input->data_type());
-
-  ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(input, weights, bias, &shape_info));
-  ARM_COMPUTE_RETURN_ON_ERROR(
-      CLGEMM::validate(hidden_state, recurrent_weights, nullptr, &shape_info, 1.f, 0.f));
-  ARM_COMPUTE_RETURN_ON_ERROR(CLSaturatedArithmeticOperationKernel::validate(
-      ArithmeticOperation::ADD, &shape_info, &shape_info, &shape_info, ConvertPolicy::SATURATE));
-  ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayerKernel::validate(&shape_info, &shape_info, info));
-
-  return Status{};
-}
-
-void CLRNNLayerEx::configure(const ICLTensor *input, const ICLTensor *weights,
-                             const ICLTensor *recurrent_weights, const ICLTensor *bias,
-                             ICLTensor *hidden_state, ICLTensor *output, ActivationLayerInfo &info)
-{
-  ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, recurrent_weights, bias, hidden_state, output);
-  ARM_COMPUTE_ERROR_THROW_ON(CLRNNLayerEx::validate(input->info(), weights->info(),
-                                                    recurrent_weights->info(), bias->info(),
-                                                    hidden_state->info(), output->info(), info));
-
-  const int idx_height = 1;
-  TensorShape shape =
-      compute_rnn_shape(recurrent_weights->info(), hidden_state->info()->dimension(idx_height));
-
-  _is_prepared = false;
-
-  _fully_connected_out.allocator()->init(TensorInfo(shape, 1, input->info()->data_type()));
-  _gemm_output.allocator()->init(TensorInfo(shape, 1, input->info()->data_type()));
-
-  // Manage intermediate buffers and configure
-  _memory_group.manage(&_fully_connected_out);
-  _fully_connected_kernel.configure(input, weights, bias, &_fully_connected_out);
-
-  _memory_group.manage(&_gemm_output);
-  _gemm_state_f.configure(hidden_state, recurrent_weights, nullptr, &_gemm_output, 1.f, 0.f);
-
-  _add_output.allocator()->init(TensorInfo(shape, 1, input->info()->data_type()));
-  _memory_group.manage(&_add_output);
-
-  _add_kernel.configure(ArithmeticOperation::ADD, &_fully_connected_out, &_gemm_output,
-                        &_add_output, ConvertPolicy::SATURATE);
-
-  _fully_connected_out.allocator()->allocate();
-  _gemm_output.allocator()->allocate();
-
-  _activation_kernel.configure(&_add_output, hidden_state, info);
-  _add_output.allocator()->allocate();
-
-  _copy_kernel.configure(hidden_state, output);
-}
-
-void CLRNNLayerEx::run()
-{
-  prepare();
-
-  _memory_group.acquire();
-
-  _fully_connected_kernel.run();
-  _gemm_state_f.run();
-  CLScheduler::get().enqueue(_add_kernel);
-  CLScheduler::get().enqueue(_activation_kernel);
-
-  // copy hidden out to output
-  CLScheduler::get().enqueue(_copy_kernel);
-
-  _memory_group.release();
-}
-
-void CLRNNLayerEx::prepare()
-{
-  if (!_is_prepared)
-  {
-    _fully_connected_kernel.prepare();
-    _gemm_state_f.prepare();
-
-    _is_prepared = true;
-  }
-}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp

index 8ce2d74..a41e6db 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp
@@ -60,8 +60,7 @@ Status CLReduceOperation::validate(const ITensorInfo *input, const ITensorInfo *
    const size_t num_of_interm_tensors = num_of_kernels - (keep_dims ? 1 : 0);
  
    // Create temporary tensor infos
-  auto interm_tensors =
-      arm_compute::support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors);
+  auto interm_tensors = support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors);
  
    // Create intermediate tensor info
    TensorShape shape{input->tensor_shape()};
@@ -119,9 +118,8 @@ void CLReduceOperation::configure(ICLTensor *input, ICLTensor *output,
    const size_t num_of_kernels = axis.size();
    const size_t num_of_interm_tensors = num_of_kernels - (keep_dims ? 1 : 0);
  
-  _interm_tensors = arm_compute::support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors);
-  _reduce_kernels =
-      arm_compute::support::cpp14::make_unique<CLReduceOperationKernel[]>(num_of_kernels);
+  _interm_tensors = support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors);
+  _reduce_kernels = support::cpp14::make_unique<CLReduceOperationKernel[]>(num_of_kernels);
  
    // Set a vector that is ordered ICLTensors sequentially.
    std::vector<ICLTensor *> tensors;
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp

deleted file mode 100644 (file)

index 7d7b226..0000000
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/CL/functions/CLSpaceToDepth.h"
-
-#include "arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h"
-
-using namespace arm_compute;
-
-void CLSpaceToDepth::configure(ICLTensor *input, ICLTensor *output, const int32_t block_size)
-{
-  auto k = arm_compute::support::cpp14::make_unique<CLSpaceToDepthKernel>();
-  k->configure(input, output, block_size);
-  _kernel = std::move(k);
-}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp

index e61746e..3215d01 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp
@@ -15,7 +15,7 @@
   */
  
  /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -37,218 +37,124 @@
   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   * SOFTWARE.
   */
-
  #include "arm_compute/runtime/CL/functions/CLTransposeConvLayer.h"
-#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
  
-#include "arm_compute/core/Helpers.h"
  #include "arm_compute/core/Utils.h"
-#include "arm_compute/core/UtilsEx.h"
  #include "arm_compute/core/Validate.h"
  #include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
  #include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/CPP/CPPScheduler.h"
  
+#include <cmath>
  #include <memory>
  #include <tuple>
  
  using namespace arm_compute;
  using namespace arm_compute::misc::shape_calculator;
  
-CLTransposeConvLayer::CLTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
-    : _memory_group(std::move(memory_manager)),
-      _scale_f(),
-      _conv_f(),
-      _flip_weights(),
-      _scaled_output(),
-      _original_weights(nullptr),
-      _weights_flipped(),
-      _is_prepared(false)
+CLTransposeConvLayer::CLTransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager)
+    : _memory_manager(std::move(memory_manager)), _function()
+{
+}
+
+void CLTransposeConvLayer::configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias,
+                                     ICLTensor *output, const PadStrideInfo &deconv_info,
+                                     unsigned int invalid_right, unsigned int invalid_bottom,
+                                     const WeightsInfo &weights_info)
  {
+  configure(CLKernelLibrary::get().get_compile_context(), input, weights, bias, output, deconv_info,
+            invalid_right, invalid_bottom, weights_info);
+}
+
+void CLTransposeConvLayer::configure(const CLCompileContext &compile_context, ICLTensor *input,
+                                     ICLTensor *weights, const ICLTensor *bias, ICLTensor *output,
+                                     const PadStrideInfo &deconv_info, unsigned int invalid_right,
+                                     unsigned int invalid_bottom, const WeightsInfo &weights_info)
+{
+  ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
+
+  switch (CLTransposeConvLayer::get_deconvolution_method(input->info(), weights->info(), nullptr,
+                                                         output->info(), deconv_info, invalid_right,
+                                                         invalid_bottom, weights_info))
+  {
+    case DeconvolutionMethod::DIRECT:
+    {
+      auto f = arm_compute::support::cpp14::make_unique<CLDirectTransposeConvLayer>();
+      f->configure(compile_context, input, weights, bias, output, deconv_info, invalid_right,
+                   invalid_bottom, weights_info);
+      _function = std::move(f);
+      break;
+    }
+    case DeconvolutionMethod::GEMM:
+    {
+      auto f = arm_compute::support::cpp14::make_unique<CLGEMMDeconvolutionLayer>(_memory_manager);
+      f->configure(compile_context, input, weights, bias, output, deconv_info);
+      _function = std::move(f);
+      break;
+    }
+    default:
+      ARM_COMPUTE_ERROR("Not supported.");
+      break;
+  }
  }
  
  Status CLTransposeConvLayer::validate(const ITensorInfo *input, const ITensorInfo *weights,
                                        const ITensorInfo *bias, ITensorInfo *output,
-                                      const PadStrideInfo &info, unsigned int invalid_right,
+                                      const PadStrideInfo &deconv_info, unsigned int invalid_right,
                                        unsigned int invalid_bottom, const WeightsInfo &weights_info)
  {
    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
-  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16,
-                                                       DataType::F32);
-  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights);
-
-  const DataLayout data_layout = input->data_layout();
-
-  const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
-  const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
-  const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
-
-  ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) != weights->dimension(idx_h));
-  ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) < 1);
-
-  const unsigned int kernel_x = weights->dimension(idx_w);
-  const unsigned int kernel_y = weights->dimension(idx_h);
-
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(invalid_right > kernel_x - 1,
-                                  "invalid_right must be smaller than kernel_x");
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(invalid_bottom > kernel_y - 1,
-                                  "inner_border_top must be smaller than kernel_y");
-
-  // NOTE From the existing CLDeconvolutionLayer, invalid_right and invalid_bottom were added.
-  auto out_dims = transposeconv_output_dimensions(
-      input->dimension(idx_w), input->dimension(idx_h), weights->dimension(idx_w),
-      weights->dimension(idx_h), info, invalid_right, invalid_bottom);
-
-  const TensorShape output_shape = compute_transposeconv_output_shape(out_dims, *input, *weights);
-
-  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights);
-
-  if (bias != nullptr)
+  switch (CLTransposeConvLayer::get_deconvolution_method(
+      input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info))
    {
-    if (is_data_type_quantized_asymmetric(input->data_type()))
+    case DeconvolutionMethod::DIRECT:
      {
-      ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32);
+      // Validate direct convolution layer
+      ARM_COMPUTE_RETURN_ON_ERROR(CLDirectTransposeConvLayer::validate(
+          input, weights, bias, output, deconv_info, invalid_right, invalid_bottom, weights_info));
+      break;
      }
-    else
+    case DeconvolutionMethod::GEMM:
      {
-      ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias);
+      // Validate gemm-based convolution layer
+      ARM_COMPUTE_RETURN_ON_ERROR(
+          CLGEMMDeconvolutionLayer::validate(input, weights, bias, output, deconv_info));
+      break;
      }
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, bias);
+    default:
+      ARM_COMPUTE_ERROR("Not supported.");
+      break;
    }
  
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_w) != output_shape[idx_w],
-                                  "Output's width is invalid.");
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_h) != output_shape[idx_h],
-                                  "Output's height is invalid.");
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_c) != output_shape[idx_c],
-                                  "Output's depth is invalid.");
-
-  unsigned int pad_left = 0;
-  unsigned int pad_right = 0;
-  unsigned int pad_top = 0;
-  unsigned int pad_bottom = 0;
-  const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
-      *input, *weights, info, out_dims, invalid_right, invalid_bottom, pad_left, pad_right, pad_top,
-      pad_bottom);
-  TensorInfo scale_out_info(input->clone()
-                                ->set_is_resizable(true)
-                                .reset_padding()
-                                .set_tensor_shape(scale_out_shape)
-                                .set_data_layout(data_layout));
-  const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
-
-  ARM_COMPUTE_RETURN_ON_ERROR(
-      CLTransposeConvLayerUpsample::validate(input, &scale_out_info, BorderSize(0, 0), info));
-  ARM_COMPUTE_RETURN_ON_ERROR(CLConvolutionLayer::validate(&scale_out_info, weights, bias, output,
-                                                           conv_info, weights_info));
-
    return Status{};
  }
  
-void CLTransposeConvLayer::configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias,
-                                     ICLTensor *output, const PadStrideInfo &info,
-                                     unsigned int invalid_right, unsigned int invalid_bottom,
-                                     const WeightsInfo &weights_info)
+DeconvolutionMethod CLTransposeConvLayer::get_deconvolution_method(
+    const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias,
+    ITensorInfo *output, const PadStrideInfo &deconv_info, unsigned int invalid_right,
+    unsigned int invalid_bottom, const WeightsInfo &weights_info)
  {
-  ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
-
-  const unsigned int stride_x = info.stride().first;
-  const unsigned int stride_y = info.stride().second;
+  ARM_COMPUTE_UNUSED(output, bias, weights_info);
  
-  const DataLayout data_layout = input->info()->data_layout();
+  const DataLayout data_layout = input->data_layout();
  
    const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
    const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
  
-  _original_weights = weights;
-  _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout));
-  _flip_weights.configure(weights, &_weights_flipped);
-
-  // NOTE From the existing CLDeconvolutionLayer, invalid_right and invalid_bottom were
-  // added.
-  auto out_dims = transposeconv_output_dimensions(
-      input->info()->dimension(idx_w), input->info()->dimension(idx_h),
-      weights->info()->dimension(idx_w), weights->info()->dimension(idx_h), info, invalid_right,
-      invalid_bottom);
-
-  const TensorShape output_shape =
-      compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info());
-
-  // Output auto initialization if not yet initialized
-  auto_init_if_empty(
-      *output->info(),
-      input->info()->clone()->set_tensor_shape(output_shape).set_data_layout(data_layout));
-
-  // Perform validation step
-  ARM_COMPUTE_ERROR_THROW_ON(CLTransposeConvLayer::validate(
-      input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(),
-      info, invalid_right, invalid_bottom));
-
-  _is_prepared = weights_info.retain_internal_weights();
-
-  _memory_group.manage(&_scaled_output);
-
-  // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order
-  // to match output shape
-  unsigned int pad_left = 0;
-  unsigned int pad_right = 0;
-  unsigned int pad_top = 0;
-  unsigned int pad_bottom = 0;
-  const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
-      *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left,
-      pad_right, pad_top, pad_bottom);
-
-  TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(),
-                            input->info()->quantization_info());
-  scale_out_info.set_data_layout(data_layout);
-  _scaled_output.allocator()->init(scale_out_info);
-
-  // configure scale function
-  const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom,
-                                    DimensionRoundingType::FLOOR);
-  _scale_f.configure(input, &_scaled_output, BorderSize(0, 0), upsample_info);
-
-  // setup the function to convolve the upscaled output
-  const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
-  _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info, weights_info);
-  _scaled_output.allocator()->allocate();
+  if (weights->dimension(idx_w) != deconv_info.stride().first ||
+      weights->dimension(idx_h) != deconv_info.stride().second || invalid_right != 0 ||
+      invalid_bottom != 0)
+  {
+    return DeconvolutionMethod::DIRECT;
+  }
+
+  return DeconvolutionMethod::GEMM;
  }
  
  void CLTransposeConvLayer::run()
  {
    prepare();
-
-  _memory_group.acquire();
-
-  _scale_f.run();
-  _conv_f.run();
-
-  _memory_group.release();
+  _function->run();
  }
  
-void CLTransposeConvLayer::prepare()
-{
-  if (!_is_prepared)
-  {
-    ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
-
-    // Run weights flipping and mark original weights tensor as unused
-    _weights_flipped.allocator()->allocate();
-    _weights_flipped.map(true);
-    _original_weights->map(CLScheduler::get().queue(), true);
-    CPPScheduler::get().schedule(&_flip_weights, Window::DimZ);
-    _weights_flipped.unmap();
-    _original_weights->unmap(CLScheduler::get().queue());
-    _original_weights->mark_as_unused();
-
-    // Prepare convolution
-    _conv_f.prepare();
-
-    if (!_weights_flipped.is_used())
-    {
-      _weights_flipped.allocator()->free();
-    }
-
-    _is_prepared = true;
-  }
-}
+void CLTransposeConvLayer::prepare() { _function->prepare(); }
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp

deleted file mode 100644 (file)

index 07feb5a..0000000
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h"
-
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-#include <cmath>
-#include <memory>
-#include <tuple>
-
-using namespace arm_compute;
-
-CLTransposeConvLayerUpsample::CLTransposeConvLayerUpsample() // NOLINT
-    : _upsample(),
-      _output(nullptr)
-{
-}
-
-Status CLTransposeConvLayerUpsample::validate(const ITensorInfo *input, const ITensorInfo *output,
-                                              const BorderSize &inner_border,
-                                              const PadStrideInfo &info)
-{
-  return CLTransposeConvLayerUpsampleKernel::validate(input, output, inner_border, info);
-}
-
-void CLTransposeConvLayerUpsample::configure(ICLTensor *input, ICLTensor *output,
-                                             const BorderSize &inner_border,
-                                             const PadStrideInfo &info)
-{
-  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
-  _output = output;
-  _upsample.configure(input, _output, inner_border, info);
-}
-
-void CLTransposeConvLayerUpsample::run()
-{
-  _output->map(CLScheduler::get().queue(), true);
-  if (is_data_type_quantized_asymmetric(_output->info()->data_type()))
-  {
-    const uint8_t quantized_zero = _output->info()->quantization_info().uniform().offset;
-    std::fill_n(_output->buffer(), _output->info()->total_size(), quantized_zero);
-  }
-  else
-  {
-    memset(_output->buffer(), 0, _output->info()->total_size());
-  }
-  _output->unmap(CLScheduler::get().queue());
-
-  CLScheduler::get().enqueue(_upsample, false);
-}
diff --git a/compute/ARMComputeEx/src/runtime/CPP/functions/CPPOneHotEx.cpp b/compute/ARMComputeEx/src/runtime/CPP/functions/CPPOneHotEx.cpp

index 114e1a7..768c15b 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/CPP/functions/CPPOneHotEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/CPP/functions/CPPOneHotEx.cpp
@@ -41,14 +41,14 @@
  #include "arm_compute/runtime/CPP/functions/CPPOneHotEx.h"
  
  #include "arm_compute/core/CPP/kernels/CPPOneHotKernelEx.h"
-#include "support/ToolchainSupport.h"
+#include "support/MemorySupport.h"
  
  using namespace arm_compute;
  
  void CPPOneHotEx::configure(const ITensor *indices, const ITensor *depth, const ITensor *on_value,
                              const ITensor *off_value, ITensor *output, const int axis)
  {
-  auto k = arm_compute::support::cpp14::make_unique<CPPOneHotKernelEx>();
+  auto k = support::cpp14::make_unique<CPPOneHotKernelEx>();
    k->configure(indices, depth, on_value, off_value, output, axis);
    _kernel = std::move(k);
  }
diff --git a/compute/ARMComputeEx/src/runtime/CPP/functions/CPPUpsampleEx.cpp b/compute/ARMComputeEx/src/runtime/CPP/functions/CPPUpsampleEx.cpp

deleted file mode 100644 (file)

index 6c90ef3..0000000
--- a/compute/ARMComputeEx/src/runtime/CPP/functions/CPPUpsampleEx.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/CPP/functions/CPPUpsampleEx.h"
-
-#include "arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h"
-#include "support/ToolchainSupport.h"
-
-using namespace arm_compute;
-
-void CPPUpsampleEx::configure(const ITensor *input, ITensor *output, const PadStrideInfo &info)
-{
-  auto k = arm_compute::support::cpp14::make_unique<CPPUpsampleKernelEx>();
-  k->configure(input, output, info);
-  _kernel = std::move(k);
-}
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp

index ff81ff8..2752eb6 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp
@@ -42,7 +42,7 @@
  
  #include "arm_compute/core/NEON/kernels/NEActivationLayerKernelEx.h"
  #include "arm_compute/runtime/IRuntimeContext.h"
-#include "support/ToolchainSupport.h"
+#include "support/MemorySupport.h"
  
  namespace arm_compute
  {
@@ -53,7 +53,7 @@ NEActivationLayerEx::NEActivationLayerEx(IRuntimeContext *ctx) // NOLINT
  void NEActivationLayerEx::configure(ITensor *input, ITensor *output,
                                      ActivationLayerInfo activation_info)
  {
-  auto k = arm_compute::support::cpp14::make_unique<NEActivationLayerKernelEx>();
+  auto k = support::cpp14::make_unique<NEActivationLayerKernelEx>();
    k->configure(input, output, activation_info);
    _kernel = std::move(k);
  }
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp

index e42c453..2fc94b2 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp
@@ -42,7 +42,7 @@
  #include <arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h>
  
  #include "arm_compute/core/ITensor.h"
-#include "support/ToolchainSupport.h"
+#include "support/MemorySupport.h"
  
  #include <utility>
  
@@ -53,7 +53,7 @@ template <BinaryLogicalOperation COP>
  void NEBinaryLogicalOperationStatic<COP>::configure(ITensor *input1, ITensor *input2,
                                                      ITensor *output)
  {
-  auto k = arm_compute::support::cpp14::make_unique<NEBinaryLogicalOperationKernel>();
+  auto k = support::cpp14::make_unique<NEBinaryLogicalOperationKernel>();
    k->configure(COP, input1, input2, output);
    _kernel = std::move(k);
  }
@@ -69,7 +69,7 @@ Status NEBinaryLogicalOperationStatic<COP>::validate(const ITensorInfo *input1,
  void NEBinaryLogicalOperation::configure(ITensor *input1, ITensor *input2, ITensor *output,
                                           BinaryLogicalOperation op)
  {
-  auto k = arm_compute::support::cpp14::make_unique<NEBinaryLogicalOperationKernel>();
+  auto k = support::cpp14::make_unique<NEBinaryLogicalOperationKernel>();
    k->configure(op, input1, input2, output);
    _kernel = std::move(k);
  }
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NECast.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NECast.cpp

deleted file mode 100644 (file)

index dc5c620..0000000
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NECast.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/NEON/functions/NECast.h"
-
-#include "arm_compute/core/NEON/kernels/NECastKernel.h"
-#include "support/ToolchainSupport.h"
-
-namespace arm_compute
-{
-void NECast::configure(const ITensor *input, ITensor *output, SubDataType input_subtype)
-{
-  auto k = arm_compute::support::cpp14::make_unique<NECastKernel>();
-  k->configure(input, output, input_subtype);
-  _kernel = std::move(k);
-}
-
-Status NECast::validate(const ITensorInfo *input, const ITensorInfo *output,
-                        SubDataType input_subtype)
-{
-  return NECastKernel::validate(input, output, input_subtype);
-}
-} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEDepthToSpaceLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEDepthToSpaceLayerEx.cpp

deleted file mode 100644 (file)

index 5ec0b86..0000000
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEDepthToSpaceLayerEx.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/NEON/functions/NEDepthToSpaceLayerEx.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-
-namespace arm_compute
-{
-void NEDepthToSpaceLayerEx::configure(const ITensor *input, ITensor *output, int32_t block_shape)
-{
-  auto k = arm_compute::support::cpp14::make_unique<NEDepthToSpaceLayerKernelEx>();
-  k->configure(input, output, block_shape);
-  _kernel = std::move(k);
-}
-
-Status NEDepthToSpaceLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output,
-                                       int32_t block_shape)
-{
-  return NEDepthToSpaceLayerKernelEx::validate(input, output, block_shape);
-}
-} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp

index 53fb150..e0ab3e0 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp
@@ -41,13 +41,13 @@
  #include "arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h"
  
  #include "arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h"
-#include "support/ToolchainSupport.h"
+#include "support/MemorySupport.h"
  
  using namespace arm_compute;
  
  void NEEmbeddingLookup::configure(const ITensor *input, ITensor *output, const ITensor *lookups)
  {
-  auto k = arm_compute::support::cpp14::make_unique<NEEmbeddingLookupKernel>();
+  auto k = support::cpp14::make_unique<NEEmbeddingLookupKernel>();
    k->configure(input, output, lookups);
    _kernel = std::move(k);
  }
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp

index f457732..a123439 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp
@@ -58,7 +58,7 @@ namespace
  Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo &output)
  {
    ARM_COMPUTE_RETURN_ON_ERROR(
-      NEGEMMLowpMatrixMultiplyCoreEx::validate(&input, &weights, nullptr, &output));
+      NEGEMMLowpMatrixMultiplyCore::validate(&input, &weights, nullptr, &output));
  
    return Status{};
  }
@@ -66,7 +66,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
  
  void NEFullyConnectedHybridLayerReshapeWeights::configure(const ITensor *input, ITensor *output)
  {
-  auto k = arm_compute::support::cpp14::make_unique<NETransposeKernel>();
+  auto k = support::cpp14::make_unique<NETransposeKernel>();
    k->configure(input, output);
    _kernel = std::move(k);
  }
@@ -158,7 +158,8 @@ void NEFullyConnectedHybridLayer::configure(const ITensor *input, const ITensor
  
    // Quantize input
    _quantized_input.allocator()->init(
-      input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S8));
+      input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(
+          DataType::QASYMM8_SIGNED));
    _scale_factor.allocator()->init(
        TensorInfo(TensorShape{output->info()->dimension(1)}, 1, DataType::F32));
    _quant_input_kernel.configure(input, &_quantized_input, &_scale_factor);
@@ -186,7 +187,7 @@ Status NEFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe
    ARM_COMPUTE_UNUSED(fc_info.retain_internal_weights);
    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
-  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::S8);
+  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8_SIGNED);
    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
    ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2);
    ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() > 2);
@@ -224,8 +225,9 @@ Status NEFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe
    ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1));
  
    // Validate quantization kernel
-  const ITensorInfo &quantized_input = TensorInfo(
-      input->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S8));
+  const ITensorInfo &quantized_input =
+      TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_data_type(
+          DataType::QASYMM8_SIGNED));
    const ITensorInfo &scale_factor = TensorInfo(TensorShape{output->dimension(1)}, 1, DataType::F32);
    ARM_COMPUTE_RETURN_ON_ERROR(
        NEQuantizationSymmetricKernel::validate(input, &quantized_input, &scale_factor));
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp

index fcac3c7..dc6c784 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedReshapingLayer.cpp
@@ -56,12 +56,17 @@ void NEFullyConnectedReshapingLayer::configure(const arm_compute::ITensor *input
        assert(kernel_type == KernelType::PREPROCESSED_WEIGHTS);
  
        bool is_hybrid = input->info()->data_type() == DataType::F32 &&
-                       weights->info()->data_type() == DataType::S8;
+                       (weights->info()->data_type() == DataType::S8 ||
+                        weights->info()->data_type() == DataType::QASYMM8_SIGNED);
  
        if (is_hybrid)
        {
          auto fc = new arm_compute::NEFullyConnectedHybridLayer{_memory_manager};
+        ITensorInfo *weights_info = const_cast<ITensorInfo *>(_weights->info());
+        const auto orgin_weights_data_type = weights_info->data_type();
+        weights_info->set_data_type(DataType::QASYMM8_SIGNED);
          fc->configure(input_to_use, _weights, _biases, _output);
+        weights_info->set_data_type(orgin_weights_data_type);
          return std::unique_ptr<arm_compute::IFunction>(fc);
        }
        else
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.cpp

deleted file mode 100644 (file)

index 1290cfd..0000000
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.cpp
+++ /dev/null
@@ -1,513 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "support/ToolchainSupport.h"
-
-using namespace arm_compute;
-using namespace arm_compute::misc::shape_calculator;
-
-NEGEMMLowpMatrixMultiplyCoreEx::NEGEMMLowpMatrixMultiplyCoreEx(
-    std::shared_ptr<IMemoryManager> memory_manager)
-    : _memory_group(memory_manager), _asm_glue(memory_manager), _mm_kernel(nullptr),
-      _mtx_a_reshape_kernel(nullptr), _mtx_b_reshape_kernel(nullptr), _mtx_a_reduction_kernel(),
-      _mtx_b_reduction_kernel(), _offset_contribution_kernel(),
-      _offset_contribution_output_stage_kernel(), _vector_sum_col(), _vector_sum_row(), _tmp_a(),
-      _tmp_b(), _mm_result_s32(), _signed_a(), _signed_output(), _original_b(nullptr), _a_offset(0),
-      _b_offset(0), _run_vector_matrix_multiplication(false), _assembly_path(false),
-      _fused_assembly_path(false), _reshape_b_only_on_first_run(false), _is_prepared(false),
-      _fuse_output_stage(false), _flip_signedness(false)
-{
-}
-
-void NEGEMMLowpMatrixMultiplyCoreEx::configure(const ITensor *a, const ITensor *b, const ITensor *c,
-                                               ITensor *output, const GEMMInfo &gemm_info)
-{
-  ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output);
-  ARM_COMPUTE_UNUSED(c);
-  ARM_COMPUTE_ERROR_THROW_ON(NEGEMMLowpMatrixMultiplyCoreEx::validate(
-      a->info(), b->info(), c != nullptr ? c->info() : nullptr, output->info(), gemm_info));
-
-  const ITensor *matrix_a = a;
-  const ITensor *matrix_b = b;
-  GEMMInfo info = gemm_info;
-
-  // Clear state
-  _mtx_a_reshape_kernel = nullptr;
-  _mtx_b_reshape_kernel = nullptr;
-
-  // Set internal variables
-  _a_offset = a->info()->quantization_info().uniform().offset;
-  _b_offset = b->info()->quantization_info().uniform().offset;
-  _run_vector_matrix_multiplication = a->info()->dimension(1) < 2;
-  _reshape_b_only_on_first_run = info.reshape_b_only_on_first_run();
-  _is_prepared = false;
-  _fused_assembly_path = false;
-  _original_b = b;
-
-  const ITensor *a_to_use = a;
-
-  // If GEMMLowpOutputStage != NONE, fuse the offset contribution with the output stage
-  if (info.gemmlowp_output_stage().type != GEMMLowpOutputStageType::NONE)
-  {
-    _fuse_output_stage = true;
-    _memory_group.manage(&_mm_result_s32);
-    TensorInfo info_mm_result_s32(output->info()->tensor_shape(), 1, DataType::S32);
-    _mm_result_s32.allocator()->init(info_mm_result_s32);
-  }
-
-#ifdef __aarch64__
-  switch (a->info()->data_type())
-  {
-    case DataType::QASYMM8:
-    case DataType::QASYMM8_SIGNED:
-    case DataType::U8:
-    case DataType::S8:
-    {
-      if (a_to_use->info()->data_type() == DataType::QASYMM8 &&
-          info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT)
-      {
-        _asm_glue.configure(a_to_use, b, c, output, gemm_info);
-        _fused_assembly_path = _asm_glue.is_configured();
-      }
-      else
-      {
-        _asm_glue.configure(a_to_use, b, nullptr, _fuse_output_stage ? &_mm_result_s32 : output,
-                            gemm_info);
-      }
-      _assembly_path = _asm_glue.is_configured();
-      break;
-    }
-    default:
-    {
-      ARM_COMPUTE_ERROR("Datatype not supported");
-      break;
-    }
-  }
-#endif /* __aarch64__ */
-  if (!(_assembly_path || _run_vector_matrix_multiplication))
-  {
-    matrix_a = &_tmp_a;
-    matrix_b = &_tmp_b;
-
-    // The interleaved output matrix will have the following shape: [ a_height * 4, ceil(a_width /
-    // 4.0f) ]
-    TensorInfo a_info(compute_interleaved_shape(*a_to_use->info()), 1,
-                      a_to_use->info()->data_type(), a_to_use->info()->quantization_info());
-    // The transpose1xW output matrix will have the following shape: [ b_height * 16, ceil(b_width /
-    // 16.0f) ]
-    TensorInfo b_info(compute_transpose1xW_shape(*b->info()), 1, b->info()->data_type(),
-                      b->info()->quantization_info());
-    _tmp_a.allocator()->init(a_info);
-    _tmp_b.allocator()->init(b_info);
-    _memory_group.manage(&_tmp_a);
-    if (!_reshape_b_only_on_first_run)
-    {
-      _memory_group.manage(&_tmp_b);
-    }
-
-    // Configure interleave kernel
-    {
-      auto k = arm_compute::support::cpp14::make_unique<NEGEMMInterleave4x4Kernel>();
-      k->configure(a_to_use, &_tmp_a);
-      _mtx_a_reshape_kernel = std::move(k);
-    }
-
-    // Configure transpose kernel
-    {
-      auto k = arm_compute::support::cpp14::make_unique<NEGEMMTranspose1xWKernel>();
-      k->configure(b, &_tmp_b);
-      _mtx_b_reshape_kernel = std::move(k);
-    }
-  }
-
-  if (!_fused_assembly_path)
-  {
-    // Initialize matrix B reduction kernel only if _a_offset is not equal to 0
-    if (_a_offset != 0)
-    {
-      TensorInfo info_vector_sum_col(compute_reductionA_shape(*b->info()), 1, DataType::S32);
-
-      _vector_sum_col.allocator()->init(info_vector_sum_col);
-      if (!_reshape_b_only_on_first_run)
-      {
-        _memory_group.manage(&_vector_sum_col);
-      }
-
-      // Configure Matrix B reduction kernel
-      _mtx_b_reduction_kernel.configure(b, &_vector_sum_col, a_to_use->info()->dimension(0), false);
-    }
-
-    // Initialize Matrix A reduction kernel only if _b_offset is not equal to 0
-    if (_b_offset != 0)
-    {
-      TensorInfo info_vector_sum_row(compute_reductionB_shape(*a_to_use->info()), 1, DataType::S32);
-
-      _vector_sum_row.allocator()->init(info_vector_sum_row);
-      _memory_group.manage(&_vector_sum_row);
-
-      // Configure matrix A reduction kernel
-      _mtx_a_reduction_kernel.configure(a_to_use, &_vector_sum_row, a_to_use->info()->dimension(0),
-                                        false);
-    }
-
-    if (_fuse_output_stage)
-    {
-      // Configure matrix multiply kernel
-      if (!_assembly_path)
-      {
-        auto k = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixMultiplyKernel>();
-        k->configure(matrix_a, matrix_b, &_mm_result_s32);
-        _mm_kernel = std::move(k);
-      }
-
-      _offset_contribution_output_stage_kernel.configure(
-          &_mm_result_s32, _a_offset == 0 ? nullptr : &_vector_sum_col,
-          _b_offset == 0 ? nullptr : &_vector_sum_row, c,
-          _flip_signedness ? &_signed_output : output, a->info()->dimension(0), _a_offset,
-          _b_offset, info.gemmlowp_output_stage());
-    }
-    else
-    {
-      // Configure matrix multiply kernel
-      if (!_assembly_path)
-      {
-        auto k = arm_compute::support::cpp14::make_unique<NEGEMMLowpMatrixMultiplyKernel>();
-        k->configure(matrix_a, matrix_b, output);
-        _mm_kernel = std::move(k);
-      }
-      // Configure offset contribution kernel
-      _offset_contribution_kernel.configure(output, _a_offset == 0 ? nullptr : &_vector_sum_col,
-                                            _b_offset == 0 ? nullptr : &_vector_sum_row,
-                                            a_to_use->info()->dimension(0), _a_offset, _b_offset);
-    }
-  }
-
-  // Allocate tensors
-  if (!_assembly_path && !_run_vector_matrix_multiplication)
-  {
-    _tmp_a.allocator()->allocate();
-    if (!_reshape_b_only_on_first_run)
-    {
-      _tmp_b.allocator()->allocate();
-    }
-  }
-
-  if (!_fused_assembly_path)
-  {
-    if (_a_offset != 0 && !_reshape_b_only_on_first_run)
-    {
-      _vector_sum_col.allocator()->allocate();
-    }
-
-    if (_b_offset != 0)
-    {
-      _vector_sum_row.allocator()->allocate();
-    }
-  }
-
-  if (_fuse_output_stage)
-  {
-    _mm_result_s32.allocator()->allocate();
-  }
-}
-
-Status NEGEMMLowpMatrixMultiplyCoreEx::validate(const ITensorInfo *a, const ITensorInfo *b,
-                                                const ITensorInfo *c, const ITensorInfo *output,
-                                                const GEMMInfo &gemm_info)
-{
-  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::S8);
-  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(b, 1, DataType::S8);
-  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S32);
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(
-      c != nullptr && gemm_info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::NONE,
-      "Bias addition not supported in NEGEMMLowpMatrixMultiplyCoreEx for output S32");
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG((a)->dimension(0) != (b)->dimension(1),
-                                  "The product AB is defined only if the number of columns in A is "
-                                  "equal to the number of rows in B");
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_a_reshaped(),
-                                  "Matrix A already reshaped is not supported");
-  ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_b_reshaped(),
-                                  "Matrix B already reshaped is not supported");
-
-  GEMMInfo info = gemm_info;
-  const ITensorInfo *matrix_a_info = a;
-  const ITensorInfo *matrix_b_info = b;
-
-  const ITensorInfo *a_to_use = a;
-
-  TensorInfo tmp_a_info{};
-  TensorInfo tmp_b_info{};
-  TensorInfo mm_result_s32_info{};
-
-  int32_t a_offset = a->quantization_info().uniform().offset;
-  int32_t b_offset = b->quantization_info().uniform().offset;
-
-  bool fuse_output_stage = info.gemmlowp_output_stage().type != GEMMLowpOutputStageType::NONE;
-  if (fuse_output_stage)
-  {
-    auto_init_if_empty(
-        mm_result_s32_info,
-        a->clone()->set_tensor_shape(output->tensor_shape()).set_data_type(DataType::S32));
-  }
-
-  // Check if we need to run the optimized assembly kernel
-  bool run_optimised = false;
-  bool run_optimised_requantized = false;
-  if (a_to_use->data_type() == DataType::QASYMM8 &&
-      info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT)
-  {
-    run_optimised = bool(NEGEMMAssemblyDispatch::validate(a_to_use, b, c, output, gemm_info));
-    run_optimised_requantized = run_optimised;
-  }
-  else
-  {
-    run_optimised = bool(NEGEMMAssemblyDispatch::validate(
-        a_to_use, b, c, fuse_output_stage ? &mm_result_s32_info : output, gemm_info));
-  }
-
-  if (run_optimised)
-  {
-    ARM_COMPUTE_RETURN_ERROR_ON(b->dimension(0) != output->dimension(0));
-    if (info.depth_output_gemm3d() != 0)
-    {
-      if (info.reinterpret_input_as_3d())
-      {
-        ARM_COMPUTE_RETURN_ERROR_ON(a->dimension(1) != output->dimension(1));
-        ARM_COMPUTE_RETURN_ERROR_ON(a->dimension(2) != output->dimension(2));
-      }
-      else
-      {
-        ARM_COMPUTE_RETURN_ERROR_ON(a->dimension(1) != output->dimension(1) * output->dimension(2));
-      }
-    }
-    else
-    {
-      ARM_COMPUTE_RETURN_ERROR_ON(a->dimension(1) != output->dimension(1));
-    }
-  }
-  else
-  {
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.reinterpret_input_as_3d(),
-                                    "NEGEMM cannot reinterpret the input tensor as 3D");
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.depth_output_gemm3d() != 0,
-                                    "NEGEMM cannot reinterpret the output tensor as 3D");
-
-    const bool run_vector_matrix_multiplication = a->dimension(1) < 2;
-    if (!run_vector_matrix_multiplication)
-    {
-      matrix_a_info = &tmp_a_info;
-      matrix_b_info = &tmp_b_info;
-
-      // The interleaved output matrix will have the following shape: [ a_height * 4, ceil(a_width /
-      // 4.0f) ]
-      TensorShape shape_tmp_a = a->tensor_shape();
-      shape_tmp_a.set(0, a->dimension(0) * 4);
-      shape_tmp_a.set(1, std::ceil(a->dimension(1) / 4.f));
-
-      // The transpose1xW output matrix will have the following shape: [ b_height * 16, ceil(b_width
-      // / 16.0f) ]
-      TensorShape shape_tmp_b = b->tensor_shape();
-      shape_tmp_b.set(0, b->dimension(1) * 16);
-      shape_tmp_b.set(1, std::ceil(b->dimension(0) / 16.f));
-
-      // Validate interleave kernel
-      auto_init_if_empty(tmp_a_info, a_to_use->clone()->set_tensor_shape(shape_tmp_a));
-      auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(shape_tmp_b));
-
-      ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMInterleave4x4Kernel::validate(a_to_use, &tmp_a_info));
-      ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMTranspose1xWKernel::validate(b, &tmp_b_info));
-    }
-  }
-
-  if (!run_optimised_requantized)
-  {
-    TensorInfo info_vector_sum_col{};
-    TensorInfo info_vector_sum_row{};
-
-    // Validate matrix B reduction kernel only if _a_offset is not equal to 0
-    if (a_offset != 0)
-    {
-      info_vector_sum_col = TensorInfo(compute_reductionA_shape(*b), 1, DataType::S32);
-
-      // Configure Matrix B reduction kernel
-      ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixBReductionKernel::validate(
-          b, &info_vector_sum_col, a->dimension(0), false));
-    }
-
-    // Validate Matrix A reduction kernel only if _b_offset is not equal to 0
-    if (b_offset != 0)
-    {
-      info_vector_sum_row = TensorInfo(compute_reductionB_shape(*a), 1, DataType::S32);
-
-      // Configure matrix A reduction kernel
-      ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixAReductionKernel::validate(
-          a_to_use, &info_vector_sum_row, a->dimension(0), false));
-    }
-
-    if (fuse_output_stage)
-    {
-      if (!run_optimised)
-      {
-        ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixMultiplyKernel::validate(
-            matrix_a_info, matrix_b_info, &mm_result_s32_info));
-      }
-
-      // Validate offset contribution kernel
-      ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpOffsetContributionOutputStageKernel::validate(
-          &mm_result_s32_info, a_offset == 0 ? nullptr : &info_vector_sum_col,
-          b_offset == 0 ? nullptr : &info_vector_sum_row, c, output, a_offset, b_offset,
-          info.gemmlowp_output_stage()));
-    }
-    else
-    {
-      if (!run_optimised)
-      {
-        ARM_COMPUTE_RETURN_ON_ERROR(
-            NEGEMMLowpMatrixMultiplyKernel::validate(matrix_a_info, matrix_b_info, output));
-      }
-      // Validate offset contribution kernel
-      ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpOffsetContributionKernel::validate(
-          output, a_offset == 0 ? nullptr : &info_vector_sum_col,
-          b_offset == 0 ? nullptr : &info_vector_sum_row, a_offset, b_offset));
-    }
-  }
-  return Status{};
-}
-
-void NEGEMMLowpMatrixMultiplyCoreEx::run()
-{
-  prepare();
-
-  MemoryGroupResourceScope scope_mg(_memory_group);
-
-  // Reshape inputs
-  if (_mtx_a_reshape_kernel)
-  {
-    NEScheduler::get().schedule(_mtx_a_reshape_kernel.get(), Window::DimY);
-  }
-  if (_mtx_b_reshape_kernel && !_reshape_b_only_on_first_run)
-  {
-    NEScheduler::get().schedule(_mtx_b_reshape_kernel.get(), Window::DimY);
-  }
-
-  // Run GEMM
-  if (_asm_glue.is_configured())
-  {
-    _asm_glue.run();
-  }
-  else
-  {
-    NEScheduler::get().schedule(_mm_kernel.get(), Window::DimY);
-  }
-
-  if (!_fused_assembly_path)
-  {
-    // Run matrix A reduction kernel only if _b_offset is not equal to 0
-    if (_b_offset != 0)
-    {
-      NEScheduler::get().schedule(&_mtx_a_reduction_kernel, Window::DimX);
-    }
-
-    // Run matrix B reduction kernel only if _a_offset is not equal to 0
-    if (_a_offset != 0 && !_reshape_b_only_on_first_run)
-    {
-      NEScheduler::get().schedule(&_mtx_b_reduction_kernel, Window::DimX);
-    }
-
-    if (_fuse_output_stage)
-    {
-      // Run offset contribution kernel
-      NEScheduler::get().schedule(&_offset_contribution_output_stage_kernel, Window::DimY);
-    }
-    else
-    {
-      // Run offset contribution kernel
-      NEScheduler::get().schedule(&_offset_contribution_kernel, Window::DimY);
-    }
-  }
-}
-
-void NEGEMMLowpMatrixMultiplyCoreEx::prepare()
-{
-  if (!_is_prepared)
-  {
-    // Run assembly reshape
-    if (_asm_glue.is_configured() && _reshape_b_only_on_first_run)
-    {
-      ARM_COMPUTE_ERROR_ON(!_original_b->is_used());
-
-      _asm_glue.prepare();
-      _original_b->mark_as_unused();
-    }
-    // Run non-assembly reshape
-    else if (_mtx_b_reshape_kernel && _reshape_b_only_on_first_run)
-    {
-      ARM_COMPUTE_ERROR_ON(!_original_b->is_used());
-
-      // Run reshape kernel and mark original weights tensor as unused
-      _tmp_b.allocator()->allocate();
-      NEScheduler::get().schedule(_mtx_b_reshape_kernel.get(), Window::DimY);
-      _original_b->mark_as_unused();
-    }
-
-    // Run matrix B reduction kernel only if _a_offset is not equal to 0
-    if (_a_offset != 0 && _reshape_b_only_on_first_run)
-    {
-      _vector_sum_col.allocator()->allocate();
-      NEScheduler::get().schedule(&_mtx_b_reduction_kernel, Window::DimX);
-    }
-
-    _is_prepared = true;
-  }
-}
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp

index c8bb88a..433c35d 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp
@@ -41,7 +41,7 @@
  #include "arm_compute/runtime/NEON/functions/NEGatherEx.h"
  
  #include "arm_compute/core/NEON/kernels/NEGatherKernelEx.h"
-#include "support/ToolchainSupport.h"
+#include "support/MemorySupport.h"
  
  #include <utility>
  
@@ -49,7 +49,7 @@ namespace arm_compute
  {
  void NEGatherEx::configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis)
  {
-  auto k = arm_compute::support::cpp14::make_unique<NEGatherKernelEx>();
+  auto k = support::cpp14::make_unique<NEGatherKernelEx>();
    k->configure(input, indices, output, axis);
    _kernel = std::move(k);
  }
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp

index 078019f..52d58ac 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp
@@ -41,14 +41,14 @@
  #include "arm_compute/runtime/NEON/functions/NEHashtableLookup.h"
  
  #include "arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h"
-#include "support/ToolchainSupport.h"
+#include "support/MemorySupport.h"
  
  using namespace arm_compute;
  
  void NEHashtableLookup::configure(const ITensor *lookups, const ITensor *keys, const ITensor *input,
                                    ITensor *output, ITensor *hits)
  {
-  auto k = arm_compute::support::cpp14::make_unique<NEHashtableLookupKernel>();
+  auto k = support::cpp14::make_unique<NEHashtableLookupKernel>();
    k->configure(lookups, keys, input, output, hits);
    _kernel = std::move(k);
  }
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEPReLU.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEPReLU.cpp

deleted file mode 100644 (file)

index dac3b84..0000000
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEPReLU.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/NEON/functions/NEPReLU.h"
-
-#include "arm_compute/core/NEON/kernels/NEPReLUKernel.h"
-#include "support/ToolchainSupport.h"
-
-#include <utility>
-
-using namespace arm_compute;
-
-void NEPReLU::configure(const ITensor *input, const ITensor *alpha, ITensor *output)
-{
-  auto k = arm_compute::support::cpp14::make_unique<NEPReLUKernel>();
-  k->configure(input, alpha, output);
-  _kernel = std::move(k);
-}
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NERNNLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NERNNLayerEx.cpp

deleted file mode 100644 (file)

index 0e9a5e9..0000000
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NERNNLayerEx.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/NEON/functions/NERNNLayerEx.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-
-namespace arm_compute
-{
-NERNNLayerEx::NERNNLayerEx(std::shared_ptr<IMemoryManager> memory_manager)
-    : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_kernel(),
-      _activation_kernel(), _fully_connected_kernel(), _copy_kernel(), _fully_connected_out(),
-      _gemm_output(), _add_output(), _is_prepared(false)
-{
-}
-
-Status NERNNLayerEx::validate(const ITensorInfo *input, const ITensorInfo *weights,
-                              const ITensorInfo *recurrent_weights, const ITensorInfo *bias,
-                              const ITensorInfo *hidden_state, const ITensorInfo *output,
-                              const ActivationLayerInfo &info)
-{
-  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, recurrent_weights, bias, hidden_state,
-                                      output);
-
-  const int idx_width = 0;
-  const int idx_height = 1;
-  ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(idx_width) != weights->dimension(idx_width));
-  ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_height) !=
-                              recurrent_weights->dimension(idx_width));
-  ARM_COMPUTE_RETURN_ERROR_ON(recurrent_weights->dimension(idx_width) !=
-                              recurrent_weights->dimension(idx_height));
-  ARM_COMPUTE_RETURN_ERROR_ON(bias->num_dimensions() != 1);
-  ARM_COMPUTE_RETURN_ERROR_ON(bias->dimension(idx_width) != weights->dimension(idx_height));
-  ARM_COMPUTE_RETURN_ERROR_ON(hidden_state->dimension(idx_width) != weights->dimension(idx_height));
-  ARM_COMPUTE_RETURN_ERROR_ON(hidden_state->dimension(idx_height) != input->dimension(idx_height));
-  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(),
-                                                     hidden_state->tensor_shape());
-
-  auto shape_info = TensorInfo(misc::shape_calculator::compute_rnn_shape(
-                                   recurrent_weights, hidden_state->dimension(idx_height)),
-                               1, input->data_type());
-
-  ARM_COMPUTE_RETURN_ON_ERROR(NEFullyConnectedLayer::validate(input, weights, bias, &shape_info));
-  ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAdditionKernel::validate(
-      &shape_info, &shape_info, &shape_info, ConvertPolicy::SATURATE));
-  ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayerKernel::validate(&shape_info, &shape_info, info));
-
-  return Status{};
-}
-
-void NERNNLayerEx::configure(const ITensor *input, const ITensor *weights,
-                             const ITensor *recurrent_weights, const ITensor *bias,
-                             ITensor *hidden_state, ITensor *output, ActivationLayerInfo &info)
-{
-  ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, recurrent_weights, bias, hidden_state, output);
-  ARM_COMPUTE_ERROR_THROW_ON(NERNNLayerEx::validate(input->info(), weights->info(),
-                                                    recurrent_weights->info(), bias->info(),
-                                                    hidden_state->info(), output->info(), info));
-
-  const int idx_height = 1;
-  TensorShape shape = misc::shape_calculator::compute_rnn_shape(
-      recurrent_weights->info(), hidden_state->info()->dimension(idx_height));
-
-  _is_prepared = false;
-
-  // Manage intermediate buffers and configure
-  _fully_connected_out.allocator()->init(TensorInfo(shape, 1, input->info()->data_type()));
-  _gemm_output.allocator()->init(TensorInfo(shape, 1, input->info()->data_type()));
-
-  // Manage intermediate buffers and configure
-  _memory_group.manage(&_fully_connected_out);
-  _fully_connected_kernel.configure(input, weights, bias, &_fully_connected_out);
-
-  _memory_group.manage(&_gemm_output);
-  _gemm_state_f.configure(hidden_state, recurrent_weights, nullptr, &_gemm_output, 1.f, 0.f);
-
-  _add_output.allocator()->init(TensorInfo(shape, 1, input->info()->data_type()));
-  _memory_group.manage(&_add_output);
-
-  _add_kernel.configure(&_fully_connected_out, &_gemm_output, &_add_output,
-                        ConvertPolicy::SATURATE);
-
-  _fully_connected_out.allocator()->allocate();
-  _gemm_output.allocator()->allocate();
-
-  _activation_kernel.configure(&_add_output, hidden_state, info);
-  _add_output.allocator()->allocate();
-
-  _copy_kernel.configure(hidden_state, output);
-}
-
-void NERNNLayerEx::run()
-{
-  prepare();
-
-  MemoryGroupResourceScope scope_mg(_memory_group);
-
-  _fully_connected_kernel.run();
-
-  _gemm_state_f.run();
-
-  NEScheduler::get().schedule(&_add_kernel, Window::DimY);
-  NEScheduler::get().schedule(&_activation_kernel, Window::DimY);
-
-  // copy hidden out to output
-  NEScheduler::get().schedule(&_copy_kernel, Window::DimY);
-}
-
-void NERNNLayerEx::prepare()
-{
-  if (!_is_prepared)
-  {
-    _fully_connected_kernel.prepare();
-    _gemm_state_f.prepare();
-
-    _is_prepared = true;
-  }
-}
-} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceMeanEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceMeanEx.cpp

deleted file mode 100644 (file)

index 116bba3..0000000
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceMeanEx.cpp
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/NEON/functions/NEReduceMeanEx.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-
-using namespace arm_compute;
-
-NEReduceMeanEx::NEReduceMeanEx(std::shared_ptr<IMemoryManager> memory_manager)
-    : _memory_group(std::move(memory_manager)), _reduction_kernels(), _reduced_outs(), _reshape(),
-      _reduction_ops(), _keep_dims()
-{
-}
-
-Status NEReduceMeanEx::validate(const ITensorInfo *input, const Coordinates &reduction_axis,
-                                bool keep_dims, const ITensorInfo *output)
-{
-  ARM_COMPUTE_UNUSED(keep_dims);
-  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
-  ARM_COMPUTE_RETURN_ERROR_ON(reduction_axis.num_dimensions() > input->num_dimensions());
-
-  TensorShape out_shape = input->tensor_shape();
-  const unsigned int reduction_ops = reduction_axis.num_dimensions();
-  const int input_dims = input->num_dimensions();
-  Coordinates axis_local = reduction_axis;
-
-  // Convert negative axis
-  for (unsigned int i = 0; i < reduction_ops; ++i)
-  {
-    axis_local[i] = wrap_around(axis_local[i], input_dims);
-  }
-
-  std::sort(axis_local.begin(), axis_local.begin() + reduction_ops);
-  for (unsigned int i = 0; i < reduction_ops; ++i)
-  {
-    ARM_COMPUTE_RETURN_ERROR_ON(axis_local[i] > 3);
-    ARM_COMPUTE_RETURN_ERROR_ON(static_cast<unsigned int>(axis_local[i]) >
-                                input->num_dimensions() - 1);
-    if (output->total_size() > 0 && keep_dims)
-    {
-      ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(axis_local[i]) != 1);
-    }
-    if (keep_dims)
-    {
-      out_shape.set(axis_local[i], 1);
-    }
-    else
-    {
-      out_shape.remove_dimension(axis_local[i] - i);
-    }
-  }
-  const TensorInfo out_info = input->clone()->set_tensor_shape(out_shape);
-  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &out_info);
-
-  return Status{};
-}
-
-void NEReduceMeanEx::configure(ITensor *input, const Coordinates &reduction_axis, bool keep_dims,
-                               ITensor *output)
-{
-  ARM_COMPUTE_ERROR_ON_NULLPTR(input);
-
-  _reduction_ops = reduction_axis.num_dimensions();
-  _reduction_kernels =
-      arm_compute::support::cpp14::make_unique<NEReductionOperation[]>(_reduction_ops);
-  _reduced_outs =
-      arm_compute::support::cpp14::make_unique<Tensor[]>(_reduction_ops - (keep_dims ? 1 : 0));
-  _keep_dims = keep_dims;
-
-  Coordinates axis_local = reduction_axis;
-  const int input_dims = input->info()->num_dimensions();
-  const unsigned int reduction_ops = reduction_axis.num_dimensions();
-
-  // Convert negative axis
-  for (unsigned int i = 0; i < reduction_ops; ++i)
-  {
-    axis_local[i] = wrap_around(axis_local[i], input_dims);
-  }
-
-  // Perform reduction for every axis
-  for (unsigned int i = 0; i < _reduction_ops; ++i)
-  {
-    TensorShape out_shape = i == 0 ? input->info()->tensor_shape()
-                                   : (_reduced_outs.get() + i - 1)->info()->tensor_shape();
-    out_shape.set(axis_local[i], 1);
-    auto in = (i == 0) ? input : (_reduced_outs.get() + i - 1);
-
-    if (i == _reduction_ops - 1 && keep_dims)
-    {
-      _reduction_kernels[i].configure(in, output, axis_local[i], ReductionOperation::MEAN_SUM);
-    }
-    else
-    {
-      _reduced_outs[i].allocator()->init(TensorInfo(out_shape, input->info()->num_channels(),
-                                                    input->info()->data_type(),
-                                                    input->info()->quantization_info())
-                                             .set_data_layout(output->info()->data_layout()));
-      _memory_group.manage(_reduced_outs.get() + i);
-      _reduction_kernels[i].configure(in, _reduced_outs.get() + i, axis_local[i],
-                                      ReductionOperation::MEAN_SUM);
-    }
-  }
-
-  // Allocate intermediate tensors
-  for (unsigned int i = 0; i < _reduction_ops - (keep_dims ? 1 : 0); ++i)
-  {
-    _reduced_outs[i].allocator()->allocate();
-  }
-
-  // Configure reshape layer if we want to drop the dimensions
-  if (!keep_dims)
-  {
-    TensorShape out_shape = input->info()->tensor_shape();
-
-    // We have to sort the reduction axis vectors in order for remove_dimension
-    // to work properly
-    std::sort(axis_local.begin(), axis_local.begin() + _reduction_ops);
-    for (unsigned int i = 0; i < _reduction_ops; ++i)
-    {
-      out_shape.remove_dimension(axis_local[i] - i);
-    }
-    auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(out_shape));
-    _reshape.configure(_reduced_outs.get() + _reduction_ops - 1, output);
-  }
-}
-
-void NEReduceMeanEx::run()
-{
-  _memory_group.acquire();
-
-  for (unsigned int i = 0; i < _reduction_ops; ++i)
-  {
-    _reduction_kernels[i].run();
-  }
-
-  if (!_keep_dims)
-  {
-    _reshape.run();
-  }
-  _memory_group.release();
-}
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToBatchLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToBatchLayerEx.cpp

deleted file mode 100644 (file)

index 198bb76..0000000
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToBatchLayerEx.cpp
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/NEON/functions/NESpaceToBatchLayerEx.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-
-namespace arm_compute
-{
-NESpaceToBatchLayerEx::NESpaceToBatchLayerEx()
-    : _space_to_batch_kernel(), _memset_kernel(), _has_padding(false)
-{
-}
-
-void NESpaceToBatchLayerEx::configure(const ITensor *input, const ITensor *block_shape,
-                                      const ITensor *paddings, ITensor *output)
-{
-  ARM_COMPUTE_ERROR_ON_NULLPTR(input, block_shape, paddings, output);
-
-  if (input->info()->tensor_shape().total_size() != output->info()->tensor_shape().total_size())
-  {
-    _has_padding = true;
-    _memset_kernel.configure(
-        output, PixelValue(0, output->info()->data_type(), output->info()->quantization_info()));
-  }
-  _space_to_batch_kernel.configure(input, block_shape, paddings, output);
-}
-
-void NESpaceToBatchLayerEx::configure(const ITensor *input, const int block_shape_x,
-                                      const int block_shape_y, const Size2D &padding_left,
-                                      const Size2D &padding_right, ITensor *output)
-{
-  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
-  if (input->info()->tensor_shape().total_size() != output->info()->tensor_shape().total_size())
-  {
-    _has_padding = true;
-    _memset_kernel.configure(
-        output, PixelValue(0, output->info()->data_type(), output->info()->quantization_info()));
-  }
-  _space_to_batch_kernel.configure(input, block_shape_x, block_shape_y, padding_left, padding_right,
-                                   output);
-}
-
-Status NESpaceToBatchLayerEx::validate(const ITensorInfo *input, const ITensorInfo *block_shape,
-                                       const ITensorInfo *paddings, const ITensorInfo *output)
-{
-  ARM_COMPUTE_RETURN_ON_ERROR(
-      NESpaceToBatchLayerKernel::validate(input, block_shape, paddings, output));
-
-  return Status{};
-}
-
-Status NESpaceToBatchLayerEx::validate(const ITensorInfo *input, const int block_shape_x,
-                                       const int block_shape_y, const Size2D &padding_left,
-                                       const Size2D &padding_right, const ITensorInfo *output)
-{
-  ARM_COMPUTE_RETURN_ON_ERROR(NESpaceToBatchLayerKernel::validate(
-      input, block_shape_x, block_shape_y, padding_left, padding_right, output));
-
-  return Status{};
-}
-
-void NESpaceToBatchLayerEx::run()
-{
-  // Zero out output only if we have paddings
-  if (_has_padding)
-  {
-    NEScheduler::get().schedule(&_memset_kernel, Window::DimY);
-  }
-  NEScheduler::get().schedule(&_space_to_batch_kernel, Window::DimY);
-}
-} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToDepthLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToDepthLayerEx.cpp

deleted file mode 100644 (file)

index 97697e3..0000000
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToDepthLayerEx.cpp
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/NEON/functions/NESpaceToDepthLayerEx.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernelEx.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-
-namespace arm_compute
-{
-void NESpaceToDepthLayerEx::configure(const ITensor *input, ITensor *output, int32_t block_shape)
-{
-  auto k = arm_compute::support::cpp14::make_unique<NESpaceToDepthLayerKernelEx>();
-  k->configure(input, output, block_shape);
-  _kernel = std::move(k);
-}
-
-Status NESpaceToDepthLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output,
-                                       int32_t block_shape)
-{
-  ARM_COMPUTE_RETURN_ON_ERROR(NESpaceToDepthLayerKernelEx::validate(input, output, block_shape));
-  return Status{};
-}
-} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp

index df06892..09f1780 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp
@@ -1,21 +1,5 @@
  /*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -37,14 +21,11 @@
   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   * SOFTWARE.
   */
-
  #include "arm_compute/runtime/NEON/functions/NETransposeConvLayer.h"
  
  #include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Utils.h"
  #include "arm_compute/core/UtilsEx.h"
  #include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
  #include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
  #include "arm_compute/runtime/NEON/NEScheduler.h"
  
@@ -52,20 +33,15 @@ using namespace arm_compute::misc::shape_calculator;
  
  namespace arm_compute
  {
+
  NETransposeConvLayer::NETransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
      : _memory_group(std::move(memory_manager)),
        _conv_f(),
        _upsample_f(),
        _flip_weights(),
-      _permute_input(),
-      _permute_weights(),
-      _permute_output(),
        _scaled_output(),
        _weights_flipped(),
-      _permuted_input(),
-      _permuted_weights(),
-      _permuted_output(),
-      _is_nchw(false),
+      _flip_axis(),
        _original_weights(nullptr),
        _input(nullptr),
        _info(),
@@ -80,7 +56,7 @@ Status NETransposeConvLayer::validate(const ITensorInfo *input, const ITensorInf
  {
    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16,
-                                                       DataType::QASYMM8);
+                                                       DataType::QASYMM8, DataType::QASYMM8_SIGNED);
    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, input);
    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(weights, input);
    const unsigned int width_idx =
@@ -95,13 +71,16 @@ Status NETransposeConvLayer::validate(const ITensorInfo *input, const ITensorInf
        weights->dimension(height_idx), info, invalid_right, invalid_bottom);
  
    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
-  if (is_data_type_quantized_asymmetric(input->data_type()) && bias)
+  if (bias != nullptr)
    {
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32);
-  }
-  else if (bias)
-  {
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias);
+    if (is_data_type_quantized_asymmetric(input->data_type()))
+    {
+      ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32);
+    }
+    else
+    {
+      ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias);
+    }
    }
  
    if (output->tensor_shape().total_size() > 0)
@@ -110,12 +89,12 @@ Status NETransposeConvLayer::validate(const ITensorInfo *input, const ITensorInf
  
      const TensorShape output_shape = compute_transposeconv_output_shape(out_dims, *input, *weights);
  
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimX) < output_shape.x(),
-                                    "Output's dim 0 is invalid.");
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimY) < output_shape.y(),
-                                    "Output's dim 1 is invalid.");
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimZ) < output_shape.z(),
-                                    "Output's dim 2 is invalid.");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimX) != output_shape.x(),
+                                    "Output's width is invalid.");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimY) != output_shape.y(),
+                                    "Output's height is invalid.");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimZ) != output_shape.z(),
+                                    "Output's depth is invalid.");
    }
  
    unsigned int pad_left = 0;
@@ -127,7 +106,6 @@ Status NETransposeConvLayer::validate(const ITensorInfo *input, const ITensorInf
        pad_bottom);
    TensorInfo scale_out_info(
        input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(scale_out_shape));
-  scale_out_info.set_data_layout(input->data_layout());
    const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
  
    const unsigned int batches_idx =
@@ -149,19 +127,13 @@ void NETransposeConvLayer::configure(ITensor *input, const ITensor *weights, con
                                       ITensor *output, const PadStrideInfo &info,
                                       unsigned int invalid_right, unsigned int invalid_bottom)
  {
+  // Perform validation step
    ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
+  ARM_COMPUTE_ERROR_THROW_ON(NETransposeConvLayer::validate(
+      input->info(), weights->info(), (bias == nullptr) ? nullptr : bias->info(), output->info(),
+      info, invalid_right, invalid_bottom));
  
    const DataLayout data_layout = input->info()->data_layout();
-
-  _input = input;
-  _original_weights = weights;
-  _info = info;
-  _is_prepared = false;
-  _is_nchw = data_layout == DataLayout::NCHW;
-
-  const unsigned int stride_x = info.stride().first;
-  const unsigned int stride_y = info.stride().second;
-
    const unsigned int width_idx =
        get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
    const unsigned int height_idx =
@@ -173,101 +145,54 @@ void NETransposeConvLayer::configure(ITensor *input, const ITensor *weights, con
  
    const TensorShape output_shape =
        compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info());
+
+  _input = input;
+  _original_weights = weights;
+  _info = info;
+  _is_prepared = false;
+
+  unsigned int pad_left = 0;
+  unsigned int pad_right = 0;
+  unsigned int pad_top = 0;
+  unsigned int pad_bottom = 0;
+  const unsigned int stride_x = info.stride().first;
+  const unsigned int stride_y = info.stride().second;
+
    // Output auto initialization if not yet initialized
    auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(),
                       input->info()->quantization_info());
  
-  // Perform validation step
-  ARM_COMPUTE_ERROR_THROW_ON(NETransposeConvLayer::validate(
-      input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(),
-      info, invalid_right, invalid_bottom));
-
+  _flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32));
    _memory_group.manage(&_scaled_output);
  
-  if (!_is_nchw)
-  {
-    _memory_group.manage(&_permuted_input);
-    _memory_group.manage(&_permuted_weights);
-    _memory_group.manage(&_permuted_output);
-
-    // Configure the function to transform the input tensor from NHWC -> NCHW
-    _permuted_input.info()->set_quantization_info(input->info()->quantization_info());
-    _permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));
-    _permuted_input.info()->set_data_layout(DataLayout::NCHW);
-
-    // Configure the function to transform the weights tensor from NHWC -> NCHW
-    _permuted_weights.info()->set_quantization_info(weights->info()->quantization_info());
-    _permute_weights.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));
-    _permuted_weights.info()->set_data_layout(DataLayout::NCHW);
-
-    // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in
-    // order to match output shape
-
-    unsigned int pad_left = 0;
-    unsigned int pad_right = 0;
-    unsigned int pad_top = 0;
-    unsigned int pad_bottom = 0;
-    const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
-        *_permuted_input.info(), *_permuted_weights.info(), info, out_dims, invalid_right,
-        invalid_bottom, pad_left, pad_right, pad_top, pad_bottom);
-
-    TensorInfo scale_out_info(scale_out_shape, 1, _permuted_input.info()->data_type(),
-                              _permuted_input.info()->quantization_info());
-    scale_out_info.set_data_layout(DataLayout::NCHW);
-    _scaled_output.allocator()->init(scale_out_info);
-
-    const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom,
-                                      DimensionRoundingType::CEIL);
-    _upsample_f.configure(&_permuted_input, &_scaled_output, upsample_info);
-
-    _weights_flipped.allocator()->init(*_permuted_weights.info()->clone());
-    _weights_flipped.info()->set_quantization_info(weights->info()->quantization_info());
-    _flip_weights.configure(&_permuted_weights, &_weights_flipped);
-
-    // setup the function to convolve the upscaled output
-    const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
-
-    const auto out_shape = output->info()->tensor_shape();
-    TensorShape permuted_out_shape{out_shape[1], out_shape[2], out_shape[0], out_shape[3]};
-    TensorInfo permuted_out_info(permuted_out_shape, 1, output->info()->data_type(),
-                                 output->info()->quantization_info());
-    _permuted_output.allocator()->init(permuted_out_info);
-    _permuted_output.info()->set_data_layout(DataLayout::NCHW);
-    _conv_f.configure(&_scaled_output, &_weights_flipped, bias, &_permuted_output, conv_info);
-
-    // Configure the function to transform the convoluted output to NHWC
-    _permute_output.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));
-
-    _permuted_input.allocator()->allocate();
-    _permuted_weights.allocator()->allocate();
-    _permuted_output.allocator()->allocate();
-  }
-  else
-  {
-    // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in
-    // order to match output shape
-    unsigned int pad_left = 0;
-    unsigned int pad_right = 0;
-    unsigned int pad_top = 0;
-    unsigned int pad_bottom = 0;
-    const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
-        *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left,
-        pad_right, pad_top, pad_bottom);
-
-    TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(),
-                              input->info()->quantization_info());
-    _scaled_output.allocator()->init(scale_out_info);
-    const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom,
-                                      DimensionRoundingType::FLOOR);
-    _upsample_f.configure(input, &_scaled_output, upsample_info);
-
-    _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout));
-    _flip_weights.configure(weights, &_weights_flipped);
-
-    // setup the function to convolve the upscaled output
-    const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
-    _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info);
-  }
+  _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout));
+  _flip_weights.configure(weights, &_weights_flipped, &_flip_axis);
+
+  // setup the function to convolve the upscaled output
+  const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
+
+  const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
+      *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left,
+      pad_right, pad_top, pad_bottom);
+
+  const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom,
+                                    DimensionRoundingType::FLOOR);
+
+  TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(),
+                            input->info()->quantization_info());
+  scale_out_info.set_data_layout(data_layout);
+  _scaled_output.allocator()->init(scale_out_info);
+
+  _upsample_f.configure(input, &_scaled_output, upsample_info);
+
+  _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info);
+
+  // Setup flip axis data
+  _flip_axis.allocator()->allocate();
+  auto axis_data = reinterpret_cast<uint32_t *>(_flip_axis.buffer());
+  axis_data[0] = static_cast<uint32_t>(width_idx);
+  axis_data[1] = static_cast<uint32_t>(height_idx);
+
    _scaled_output.allocator()->allocate();
  }
  
@@ -275,22 +200,10 @@ void NETransposeConvLayer::run()
  {
    prepare();
  
-  // MemoryGroupResourceScope scope_mg(_memory_group);
-
-  // Permute input
-  if (!_is_nchw)
-  {
-    _permute_input.run();
-  }
+  MemoryGroupResourceScope scope_mg(_memory_group);
  
    _upsample_f.run();
    _conv_f.run();
-
-  // Permute output
-  if (!_is_nchw)
-  {
-    _permute_output.run();
-  }
  }
  
  void NETransposeConvLayer::prepare()
@@ -301,22 +214,12 @@ void NETransposeConvLayer::prepare()
  
      // Run weights flipping and mark original weights tensor as unused
      _weights_flipped.allocator()->allocate();
-    // Permute weights
-    if (!_is_nchw)
-    {
-      _permute_weights.run();
-    }
-    NEScheduler::get().schedule(&_flip_weights, Window::DimZ);
+    _flip_weights.run();
      _original_weights->mark_as_unused();
  
      // Prepare convolution
      _conv_f.prepare();
  
-    if (!_weights_flipped.is_used())
-    {
-      _weights_flipped.allocator()->free();
-    }
-
      _is_prepared = true;
    }
  }
diff --git a/compute/cker/CMakeLists.txt b/compute/cker/CMakeLists.txt

index 09f6725..609dd45 100644 (file)
--- a/compute/cker/CMakeLists.txt
+++ b/compute/cker/CMakeLists.txt
@@ -8,6 +8,9 @@ target_link_libraries(nnfw_lib_cker INTERFACE gemmlowp)
  target_link_libraries(nnfw_lib_cker INTERFACE ruy)
  target_link_libraries(nnfw_lib_cker INTERFACE ruy_instrumentation)
  target_compile_definitions(nnfw_lib_cker INTERFACE USE_RUY_GEMV)
+if(EXPERIMENTAL_RUY_FEATURE)
+  target_compile_definitions(nnfw_lib_cker INTERFACE EXPERIMENTAL_RUY_FEATURE)
+endif(EXPERIMENTAL_RUY_FEATURE)
  if(PROFILE_RUY)
    target_link_libraries(nnfw_lib_cker INTERFACE ruy_profiler)
  endif(PROFILE_RUY)
diff --git a/compute/cker/include/cker/NeonTensorUtils.h b/compute/cker/include/cker/NeonTensorUtils.h

index 5c38bc6..246fd9a 100644 (file)
--- a/compute/cker/include/cker/NeonTensorUtils.h
+++ b/compute/cker/include/cker/NeonTensorUtils.h
@@ -546,7 +546,7 @@ bool NeonIsZeroVector(const float *vector, int v_size)
  
  void NeonCpuBackendGemm(const int8_t *input, const int32_t *bias,
                          const int8_t *input_to_gate_weights, int32_t n_batch, int32_t n_input,
-                        int32_t n_output, int32_t, int32_t *scratch)
+                        int32_t n_output, int32_t, int32_t *scratch, ruy::Context *ruy_context)
  {
    MatrixParams<int8_t> lhs_params;
    lhs_params.order = Order::kRowMajor;
@@ -571,8 +571,6 @@ void NeonCpuBackendGemm(const int8_t *input, const int32_t *bias,
    }
  
    // Below code is from tflite::cpu_backend_gemm::detail::GemmImplUsingRuy
-  ruy::Context *ruy_context = ruy_support::GetRuyContext();
-
    ruy::Matrix<int8_t> ruy_lhs;
    ruy::Matrix<int8_t> ruy_rhs;
    ruy::Matrix<int32_t> ruy_dst;
@@ -851,13 +849,13 @@ void NeonMatrixBatchVectorMultiplyAccumulate(const int8_t *__restrict__ matrix,
                                               const int m_cols, const int8_t *__restrict__ vectors,
                                               const float *scaling_factors, int n_batch,
                                               int32_t *scratch, float *__restrict__ result,
-                                             int result_stride)
+                                             int result_stride, ruy::Context *ruy_context)
  {
    if (m_rows % 4 == 0 && result_stride == 1)
    {
      const int32_t *bias = static_cast<const int32_t *>(nullptr);
      NeonCpuBackendGemm(vectors, bias, matrix, n_batch, m_cols, m_rows,
-                       /*output_zp =*/0, scratch);
+                       /*output_zp =*/0, scratch, ruy_context);
  
      // Multiply by float scaling factors and write to result
      const int total_size = n_batch * m_rows;
diff --git a/compute/cker/include/cker/PortableTensorUtils.h b/compute/cker/include/cker/PortableTensorUtils.h

index 9769d4b..54714e2 100644 (file)
--- a/compute/cker/include/cker/PortableTensorUtils.h
+++ b/compute/cker/include/cker/PortableTensorUtils.h
@@ -20,6 +20,7 @@
  
  #include "cker/Types.h"
  #include "cker/neon/neon_check.h"
+#include <ruy/context.h>
  
  #include <cstring>
  #include <cmath>
@@ -142,7 +143,7 @@ void PortableMatrixBatchVectorMultiplyAccumulate(const int8_t *__restrict__ matr
                                                   const int8_t *__restrict__ vector,
                                                   const float *scaling_factors, int n_batch,
                                                   int32_t *, float *__restrict__ result,
-                                                 int result_stride)
+                                                 int result_stride, ruy::Context *)
  {
    PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector, scaling_factors,
                                                n_batch, result, result_stride);
diff --git a/compute/cker/include/cker/TensorUtils.h b/compute/cker/include/cker/TensorUtils.h

index 6b23c0b..e07c912 100644 (file)
--- a/compute/cker/include/cker/TensorUtils.h
+++ b/compute/cker/include/cker/TensorUtils.h
@@ -73,10 +73,10 @@ void MatrixBatchVectorMultiplyAccumulate(const float *matrix, int m_rows, int m_
  void MatrixBatchVectorMultiplyAccumulate(const int8_t *matrix, const int m_rows, const int m_cols,
                                           const int8_t *vectors, const float *scaling_factors,
                                           int n_batch, int32_t *scratch, float *result,
-                                         int result_stride)
+                                         int result_stride, ruy::Context *ruy_context)
  {
    NEON_OR_PORTABLE(MatrixBatchVectorMultiplyAccumulate, matrix, m_rows, m_cols, vectors,
-                   scaling_factors, n_batch, scratch, result, result_stride);
+                   scaling_factors, n_batch, scratch, result, result_stride, ruy_context);
  }
  
  void ZeroVector(float *vector, int v_size) { PortableZeroVector(vector, v_size); }
diff --git a/compute/cker/include/cker/Types.h b/compute/cker/include/cker/Types.h

index 41b1916..886ce5e 100644 (file)
--- a/compute/cker/include/cker/Types.h
+++ b/compute/cker/include/cker/Types.h
@@ -259,6 +259,12 @@ struct FullyConnectedParams
    // FullyConnectedWeightsFormat weights_format;
  };
  
+struct L2NormParams
+{
+  // uint8 inference params.
+  int32_t input_zero_point;
+};
+
  struct GatherParams
  {
    int32_t axis;
@@ -271,6 +277,14 @@ struct InstanceNormParams
    float float_activation_max;
  };
  
+struct ResizeBilinearParams
+{
+  int32_t output_height;
+  int32_t output_width;
+  bool align_corners;
+  bool half_pixel_centers;
+};
+
  struct TransposeConvParams
  {
    PaddingType padding_type;
@@ -325,6 +339,12 @@ struct SplitParams
    int16_t axis;
  };
  
+struct SplitVParams
+{
+  uint16_t num_split;
+  int16_t axis;
+};
+
  struct FusedBatchNormParams
  {
    bool is_training;
@@ -338,6 +358,11 @@ struct SpaceToBatchParams
    int32_t output_offset;
  };
  
+struct SpaceToDepthParams
+{
+  int32_t block_size;
+};
+
  enum class Order
  {
    kColMajor,
diff --git a/compute/cker/include/cker/Utils.h b/compute/cker/include/cker/Utils.h

index b69d55c..2abb998 100644 (file)
--- a/compute/cker/include/cker/Utils.h
+++ b/compute/cker/include/cker/Utils.h
@@ -123,6 +123,68 @@ inline int CountLeadingZeros(uint32_t integer_input)
    return leading_zeros;
  }
  
+inline void GetInvSqrtQuantizedMultiplierExp(int32_t input, int reverse_shift,
+                                             int32_t *output_inv_sqrt, int *output_shift)
+{
+  assert(input >= 0);
+  if (input <= 1)
+  {
+    // Handle the input value 1 separately to avoid overflow in that case
+    // in the general computation below (b/143972021). Also handle 0 as if it
+    // were a 1. 0 is an invalid input here (divide by zero) and 1 is a valid
+    // but rare/unrealistic input value. We can expect both to occur in some
+    // incompletely trained models, but probably not in fully trained models.
+    *output_inv_sqrt = std::numeric_limits<std::int32_t>::max();
+    *output_shift = 0;
+    return;
+  }
+  assert(input > 1);
+  *output_shift = 11;
+  while (input >= (1 << 29))
+  {
+    input /= 4;
+    ++*output_shift;
+  }
+  const unsigned max_left_shift_bits = CountLeadingZeros(static_cast<uint32_t>(input)) - 1;
+  const unsigned max_left_shift_bit_pairs = max_left_shift_bits / 2;
+  const unsigned left_shift_bit_pairs = max_left_shift_bit_pairs - 1;
+  *output_shift -= left_shift_bit_pairs;
+  input <<= 2 * left_shift_bit_pairs;
+  assert(input >= (1 << 27));
+  assert(input < (1 << 29));
+  using gemmlowp::FixedPoint;
+  using gemmlowp::Rescale;
+  using gemmlowp::SaturatingRoundingMultiplyByPOT;
+  // Using 3 integer bits gives us enough room for the internal arithmetic in
+  // this Newton-Raphson iteration.
+  using F3 = FixedPoint<int32_t, 3>;
+  using F0 = FixedPoint<int32_t, 0>;
+  const F3 fixedpoint_input = F3::FromRaw(input >> 1);
+  const F3 fixedpoint_half_input = SaturatingRoundingMultiplyByPOT<-1>(fixedpoint_input);
+  const F3 fixedpoint_half_three =
+      GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F3, (1 << 28) + (1 << 27), 1.5);
+  // Newton-Raphson iteration
+  // Naive unoptimized starting guess: x = 1
+  F3 x = F3::One();
+  // Naive unoptimized number of iterations: 5
+  for (int i = 0; i < 5; i++)
+  {
+    const F3 x3 = Rescale<3>(x * x * x);
+    x = Rescale<3>(fixedpoint_half_three * x - fixedpoint_half_input * x3);
+  }
+  const F0 fixedpoint_half_sqrt_2 =
+      GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F0, 1518500250, std::sqrt(2.) / 2.);
+  x = x * fixedpoint_half_sqrt_2;
+  *output_inv_sqrt = x.raw();
+  if (*output_shift < 0)
+  {
+    *output_inv_sqrt <<= -*output_shift;
+    *output_shift = 0;
+  }
+  // Convert right shift (right is positive) to left shift.
+  *output_shift *= reverse_shift;
+}
+
  // Comment from tensorflow lite:
  //
  // DO NOT USE THIS STRUCT FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING
diff --git a/compute/cker/include/cker/operation/BatchToSpaceND.h b/compute/cker/include/cker/operation/BatchToSpaceND.h

new file mode 100644 (file)

index 0000000..e33b2fb
--- /dev/null
+++ b/compute/cker/include/cker/operation/BatchToSpaceND.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_BATCH_TO_SPACE_ND_H__
+#define __NNFW_CKER_BATCH_TO_SPACE_ND_H__
+
+#include "cker/Shape.h"
+
+#define UNUSED(x) ((void)(x))
+
+namespace nnfw
+{
+namespace cker
+{
+
+// Helper methods for BatchToSpaceND.
+// `spatial_index_dim` specifies post-crop offset index in this spatial
+// dimension, i.e. spatial offset introduced by flattening batch to spatial
+// dimension minus the crop size at beginning. `block_shape_dim` is the block
+// size in current dimension. `input_dim` and `output_dim` are input and output
+// size of BatchToSpaceND operation in current dimension.
+// Output start index is inclusive and end index is exclusive.
+inline void GetIndexRange(int spatial_index_dim, int block_shape_dim, int input_dim, int output_dim,
+                          int *start_index, int *end_index)
+{
+  // (*start_index) * block_shape_dim is effectively rounded up to the next
+  // multiple of block_shape_dim by the integer division.
+  *start_index = std::max(0, (-spatial_index_dim + block_shape_dim - 1) / block_shape_dim);
+  // Similarly, (*end_index) * block_shape_dim is rounded up too (note that
+  // end_index is exclusive).
+  *end_index =
+      std::min(input_dim, (output_dim - spatial_index_dim + block_shape_dim - 1) / block_shape_dim);
+}
+
+template <typename T>
+inline void BatchToSpaceND(const Shape &unextended_input1_shape, const T *input1_data,
+                           const int32_t *block_shape_data, const int32_t *crops_data,
+                           const Shape &unextended_output_shape, T *output_data)
+{
+  auto input_dim = unextended_input1_shape.DimensionsCount();
+  auto output_dim = unextended_output_shape.DimensionsCount();
+
+  assert(input_dim == 3 || input_dim == 4);
+  assert(input_dim == output_dim);
+
+  UNUSED(input_dim);
+  UNUSED(output_dim);
+
+  // Extends the input/output shape from 3D to 4D if needed, NHC -> NH1C.
+  auto extend_shape = [](const Shape &shape) {
+    if (shape.DimensionsCount() == 4)
+    {
+      return shape;
+    }
+    Shape new_shape(4, 1);
+    new_shape.SetDim(0, shape.Dims(0));
+    new_shape.SetDim(1, shape.Dims(1));
+    new_shape.SetDim(3, shape.Dims(2));
+    return new_shape;
+  };
+  const Shape input1_shape = extend_shape(unextended_input1_shape);
+  const Shape output_shape = extend_shape(unextended_output_shape);
+
+  const int32_t output_width = output_shape.Dims(2);
+  const int32_t output_height = output_shape.Dims(1);
+  const int32_t output_batch_size = output_shape.Dims(0);
+
+  const int32_t depth = input1_shape.Dims(3);
+  const int32_t input_width = input1_shape.Dims(2);
+  const int32_t input_height = input1_shape.Dims(1);
+  const int32_t input_batch_size = input1_shape.Dims(0);
+
+  const int32_t block_shape_height = block_shape_data[0];
+  const int32_t block_shape_width = block_shape_data[1];
+
+  const int32_t crops_top = crops_data[0];
+  const int32_t crops_left = crops_data[2];
+
+  for (int in_batch = 0; in_batch < input_batch_size; ++in_batch)
+  {
+    const int out_batch = in_batch % output_batch_size;
+    const int spatial_offset = in_batch / output_batch_size;
+
+    int in_h_start = 0;
+    int in_h_end = 0;
+    // GetIndexRange ensures start and end indices are in [0, output_height).
+    GetIndexRange(spatial_offset / block_shape_width - crops_top, block_shape_height, input_height,
+                  output_height, &in_h_start, &in_h_end);
+
+    for (int in_h = in_h_start; in_h < in_h_end; ++in_h)
+    {
+      const int out_h = in_h * block_shape_height + spatial_offset / block_shape_width - crops_top;
+      assert(out_h >= 0);
+      assert(out_h < output_height);
+
+      int in_w_start = 0;
+      int in_w_end = 0;
+      // GetIndexRange ensures start and end indices are in [0, output_width).
+      GetIndexRange(spatial_offset % block_shape_width - crops_left, block_shape_width, input_width,
+                    output_width, &in_w_start, &in_w_end);
+
+      for (int in_w = in_w_start; in_w < in_w_end; ++in_w)
+      {
+        const int out_w =
+            in_w * block_shape_width + spatial_offset % block_shape_width - crops_left;
+        assert(out_w >= 0);
+        assert(out_w < output_width);
+        T *out = output_data + Offset(output_shape, out_batch, out_h, out_w, 0);
+        const T *in = input1_data + Offset(input1_shape, in_batch, in_h, in_w, 0);
+        memcpy(out, in, depth * sizeof(T));
+      }
+    }
+  }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_BATCH_TO_SPACE_ND_H__
diff --git a/compute/cker/include/cker/operation/FullyConnected.h b/compute/cker/include/cker/operation/FullyConnected.h

index 9bcf3fd..4280c9a 100644 (file)
--- a/compute/cker/include/cker/operation/FullyConnected.h
+++ b/compute/cker/include/cker/operation/FullyConnected.h
@@ -18,6 +18,7 @@
  #ifndef __NNFW_CKER_FULLY_CONNECTED_H__
  #define __NNFW_CKER_FULLY_CONNECTED_H__
  
+#include <ruy/context.h>
  #include "cker/Shape.h"
  #include "cker/Types.h"
  #include "cker/Utils.h"
@@ -78,8 +79,11 @@ inline void FullyConnected(const FullyConnectedParams &params, const Shape &inpu
    MatrixBatchVectorMultiplyAccumulate(weights_data, num_units, input_size, input_data, batch_size,
                                        output_data, /*result_stride=*/1);
  
-  // Apply activation function
-  ApplyActivationToVector(output_data, batch_size * num_units, params.activation, output_data);
+  if (params.activation != FusedActivationFunctionType::kNone)
+  {
+    // Apply activation function
+    ApplyActivationToVector(output_data, batch_size * num_units, params.activation, output_data);
+  }
  }
  
  inline void FullyConnected(const FullyConnectedParams &params, const Shape &input_shape,
@@ -140,7 +144,7 @@ inline void FullyConnectedHybrid(const FullyConnectedParams &params, const Shape
                                   const float *input_data, const Shape &filter_shape,
                                   const int8_t *filter_data, const Shape &, const float *bias_data,
                                   const Shape &output_shape, float *output_data,
-                                 FCTempArena &temp_arena)
+                                 FCTempArena &temp_arena, ruy::Context *ruy_context)
  {
    int total_input_size = input_shape.FlatSize();
    const int input_size = filter_shape.Dims(1);
@@ -186,19 +190,72 @@ inline void FullyConnectedHybrid(const FullyConnectedParams &params, const Shape
    int32_t *scratch = temp_arena.accum_scratch.data();
    MatrixBatchVectorMultiplyAccumulate(filter_data, num_units, input_size, quant_data,
                                        scaling_factors_ptr, batch_size, scratch, output_data,
-                                      /*result_stride=*/1);
+                                      /*result_stride=*/1, ruy_context);
  #else
    MatrixBatchVectorMultiplyAccumulate(filter_data, num_units, input_size, quant_data,
                                        scaling_factors_ptr, batch_size, output_data,
                                        /*result_stride=*/1);
+  UNUSED_RELEASE(ruy_context);
    UNUSED_RELEASE(output_shape);
  #endif
  
    // Apply activation function to floats.
-  ApplyActivationToVector(output_data, batch_size * num_units, params.activation, output_data);
+  if (params.activation != FusedActivationFunctionType::kNone)
+  {
+    // Apply activation function
+    ApplyActivationToVector(output_data, batch_size * num_units, params.activation, output_data);
+  }
    return;
  }
  
+inline void FullyConnectedSparseWeight(const FullyConnectedParams &params, const Shape &input_shape,
+                                       const float *input_data, const Shape &weights_shape,
+                                       const float *weights_data, const Shape &bias_shape,
+                                       const float *bias_data, const Shape &output_shape,
+                                       float *output_data, int w0_size, const uint16_t *w1_segments,
+                                       const uint16_t *w1_indices)
+{
+  UNUSED_RELEASE(params);
+  UNUSED_RELEASE(input_shape);
+
+  assert(weights_shape.DimensionsCount() == 2);
+  assert(output_shape.DimensionsCount() == 2);
+
+  const int output_dims_count = output_shape.DimensionsCount();
+  const int weights_dims_count = weights_shape.DimensionsCount();
+  const int batches = FlatSizeSkipDim(output_shape, output_dims_count - 1);
+  const int output_depth =
+      MatchingDim(weights_shape, weights_dims_count - 2, output_shape, output_dims_count - 1);
+  const int accum_depth = weights_shape.Dims(weights_dims_count - 1);
+
+  UNUSED_RELEASE(bias_shape);
+  if (bias_data)
+  {
+    VectorBatchVectorAssign(bias_data, output_depth, batches, output_data);
+  }
+  else
+  {
+    ZeroVector(output_data, batches * output_depth);
+  }
+  for (int b = 0; b < batches; ++b)
+  {
+    for (int idx_0 = 0; idx_0 < w0_size; ++idx_0)
+    {
+      for (int pw1 = w1_segments[idx_0]; pw1 < w1_segments[idx_0 + 1]; ++pw1)
+      {
+        int idx_1 = w1_indices[pw1];
+        output_data[b * output_depth + idx_0] +=
+            weights_data[pw1] * input_data[b * accum_depth + idx_1];
+      }
+    }
+  }
+  if (params.activation != FusedActivationFunctionType::kNone)
+  {
+    // Apply activation function
+    ApplyActivationToVector(output_data, batches * output_depth, params.activation, output_data);
+  }
+}
+
  } // namespace cker
  } // namespace nnfw
  
diff --git a/compute/cker/include/cker/operation/Helper/PhiloxRandom.h b/compute/cker/include/cker/operation/Helper/PhiloxRandom.h

new file mode 100644 (file)

index 0000000..8e8879c
--- /dev/null
+++ b/compute/cker/include/cker/operation/Helper/PhiloxRandom.h
@@ -0,0 +1,276 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TENSORFLOW_CORE_LIB_RANDOM_PHILOX_RANDOM_H_
+#define TENSORFLOW_CORE_LIB_RANDOM_PHILOX_RANDOM_H_
+
+#include <stdlib.h>
+
+#include "cker/Types.h"
+#include "cker/Shape.h"
+#include "cker/Utils.h"
+
+// Function qualifiers that need to work on both CPU and GPU.
+#if defined(__CUDACC__) || defined(__HIPCC__)
+// For nvcc.
+#define PHILOX_DEVICE_FUNC __host__ __device__
+#define PHILOX_INLINE __inline__
+#else
+// For non-nvcc.
+#define PHILOX_DEVICE_FUNC
+#define PHILOX_INLINE inline
+#endif
+#define PHILOX_DEVICE_INLINE PHILOX_DEVICE_FUNC PHILOX_INLINE
+
+#include <math.h>
+
+namespace nnfw
+{
+namespace cker
+{
+namespace random
+{
+
+// A class that represents an inline array. It can be used on both CPU and GPU,
+// and also trivially copyable between CPU and GPU.
+// Arguments:
+//   T: the array element type;
+//   ElementCount: the fixed size of the array;
+template <typename T, int ElementCount> class Array
+{
+public:
+  static constexpr int kElementCount = ElementCount;
+  PHILOX_DEVICE_INLINE Array()
+  {
+    for (int i = 0; i < ElementCount; ++i)
+    {
+      data_[i] = T(0);
+    }
+  }
+
+  PHILOX_DEVICE_INLINE const T &operator[](int index) const { return data_[index]; }
+
+  PHILOX_DEVICE_INLINE T &operator[](int index) { return data_[index]; }
+
+  size_t size() const { return ElementCount; }
+
+private:
+  T data_[ElementCount];
+};
+
+// A class that encapsulates all the states for a random number generator using
+// the philox_4x32_10 algorithm. Each invocation returns a 128-bit random bits
+// in the form of four uint32.
+// There are multiple variants of this algorithm, we picked the 4x32_10 version
+// that is most suited for our applications.
+// Since this class is meant to be copied between CPU to GPU, it maintains a
+// value semantics.
+//
+// For example: To use this class and populate an array of 1024 randoms on CPU
+// with two threads,
+//
+//  void Fill(PhiloxRandom rnd, uint32* output, int start, int limit) {
+//    assert(start % 4 == 0);
+//    assert(limit % 4 == 0);
+//    rnd.Skip(start / 4);
+//    for (int i = start; i < limit; i += 4) {
+//      auto sample = rnd();
+//      ... copy sample[0..3] to output[i..i+3]
+//    }
+//  }
+//
+//  PhiloxRandom rng(seed);
+//  PhiloxRandom rng_copy = rng;
+//  rng.Skip(1000/4);
+//
+//  ... schedule Fill(rng_copy, output, 0, 512) in thread 1;
+//  ... schedule Fill(rng_copy, output, 512, 1024) in thread 2;
+//  ... wait for thread 1 & 2 to finish executing Fill().
+//
+// NOTE:
+// 1. PhiloxRandom is trivially copyable.
+// 2. PhiloxRandom is compilable by gcc and nvcc.
+class PhiloxRandom
+{
+public:
+  using ResultType = Array<uint32_t, 4>;
+  using ResultElementType = uint32_t;
+  // The number of elements that will be returned.
+  static constexpr int kResultElementCount = 4;
+  // Cost of generation of a single element (in cycles).
+  static constexpr int kElementCost = 10;
+  // The type for the 64-bit key stored in the form of two 32-bit uint
+  // that are used in the diffusion process.
+  using Key = Array<uint32_t, 2>;
+
+  PHILOX_DEVICE_INLINE
+  PhiloxRandom() {}
+
+  PHILOX_DEVICE_INLINE
+  explicit PhiloxRandom(uint64_t seed)
+  {
+    key_[0] = static_cast<uint32_t>(seed);
+    key_[1] = static_cast<uint32_t>(seed >> 32);
+  }
+
+  PHILOX_DEVICE_INLINE
+  explicit PhiloxRandom(uint64_t seed_lo, uint64_t seed_hi)
+  {
+    key_[0] = static_cast<uint32_t>(seed_lo);
+    key_[1] = static_cast<uint32_t>(seed_lo >> 32);
+    counter_[2] = static_cast<uint32_t>(seed_hi);
+    counter_[3] = static_cast<uint32_t>(seed_hi >> 32);
+  }
+
+  PHILOX_DEVICE_INLINE
+  PhiloxRandom(ResultType counter, Key key) : counter_(counter), key_(key) {}
+
+  PHILOX_DEVICE_INLINE
+  ResultType const &counter() const { return counter_; }
+
+  PHILOX_DEVICE_INLINE
+  Key const &key() const { return key_; }
+
+  // Skip the specified number of samples of 128-bits in the current stream.
+  PHILOX_DEVICE_INLINE
+  void Skip(uint64_t count)
+  {
+    const uint32_t count_lo = static_cast<uint32_t>(count);
+    uint32_t count_hi = static_cast<uint32_t>(count >> 32);
+
+    counter_[0] += count_lo;
+    if (counter_[0] < count_lo)
+    {
+      ++count_hi;
+    }
+
+    counter_[1] += count_hi;
+    if (counter_[1] < count_hi)
+    {
+      if (++counter_[2] == 0)
+      {
+        ++counter_[3];
+      }
+    }
+  }
+
+  // Returns a group of four random numbers using the underlying Philox
+  // algorithm.
+  PHILOX_DEVICE_INLINE ResultType operator()()
+  {
+    ResultType counter = counter_;
+    Key key = key_;
+
+    // Run the single rounds for ten times. Manually unrolling the loop
+    // for better performance.
+    counter = ComputeSingleRound(counter, key);
+    RaiseKey(&key);
+    counter = ComputeSingleRound(counter, key);
+    RaiseKey(&key);
+    counter = ComputeSingleRound(counter, key);
+    RaiseKey(&key);
+    counter = ComputeSingleRound(counter, key);
+    RaiseKey(&key);
+    counter = ComputeSingleRound(counter, key);
+    RaiseKey(&key);
+    counter = ComputeSingleRound(counter, key);
+    RaiseKey(&key);
+    counter = ComputeSingleRound(counter, key);
+    RaiseKey(&key);
+    counter = ComputeSingleRound(counter, key);
+    RaiseKey(&key);
+    counter = ComputeSingleRound(counter, key);
+    RaiseKey(&key);
+    counter = ComputeSingleRound(counter, key);
+
+    SkipOne();
+
+    return counter;
+  }
+
+private:
+  // We use the same constants as recommended by the original paper.
+  static constexpr uint32_t kPhiloxW32A = 0x9E3779B9;
+  static constexpr uint32_t kPhiloxW32B = 0xBB67AE85;
+  static constexpr uint32_t kPhiloxM4x32A = 0xD2511F53;
+  static constexpr uint32_t kPhiloxM4x32B = 0xCD9E8D57;
+
+  // Helper function to skip the next sample of 128-bits in the current stream.
+  PHILOX_DEVICE_INLINE void SkipOne()
+  {
+    if (++counter_[0] == 0)
+    {
+      if (++counter_[1] == 0)
+      {
+        if (++counter_[2] == 0)
+        {
+          ++counter_[3];
+        }
+      }
+    }
+  }
+
+  // Helper function to return the lower and higher 32-bits from two 32-bit
+  // integer multiplications.
+  PHILOX_DEVICE_INLINE
+  static void MultiplyHighLow(uint32_t a, uint32_t b, uint32_t *result_low, uint32_t *result_high)
+  {
+#ifndef __CUDA_ARCH__
+    const uint64_t product = static_cast<uint64_t>(a) * b;
+    *result_low = static_cast<uint32_t>(product);
+    *result_high = static_cast<uint32_t>(product >> 32);
+#else
+    *result_low = a * b;
+    *result_high = __umulhi(a, b);
+#endif
+  }
+
+  // Helper function for a single round of the underlying Philox algorithm.
+  PHILOX_DEVICE_INLINE static ResultType ComputeSingleRound(const ResultType &counter,
+                                                            const Key &key)
+  {
+    uint32_t lo0;
+    uint32_t hi0;
+    MultiplyHighLow(kPhiloxM4x32A, counter[0], &lo0, &hi0);
+
+    uint32_t lo1;
+    uint32_t hi1;
+    MultiplyHighLow(kPhiloxM4x32B, counter[2], &lo1, &hi1);
+
+    ResultType result;
+    result[0] = hi1 ^ counter[1] ^ key[0];
+    result[1] = lo1;
+    result[2] = hi0 ^ counter[3] ^ key[1];
+    result[3] = lo0;
+    return result;
+  }
+
+  PHILOX_DEVICE_INLINE void RaiseKey(Key *key)
+  {
+    (*key)[0] += kPhiloxW32A;
+    (*key)[1] += kPhiloxW32B;
+  }
+
+private:
+  ResultType counter_;
+  Key key_;
+};
+
+} // namespace random
+} // namespace cker
+} // namespace nnfw
+#endif // TENSORFLOW_CORE_LIB_RANDOM_PHILOX_RANDOM_H_
diff --git a/compute/cker/include/cker/operation/Helper/RandomDistributions.h b/compute/cker/include/cker/operation/Helper/RandomDistributions.h

new file mode 100644 (file)

index 0000000..baeafd7
--- /dev/null
+++ b/compute/cker/include/cker/operation/Helper/RandomDistributions.h
@@ -0,0 +1,778 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_HELPER_RANDOM_DISTRIBUTIONS_H__
+#define __NNFW_CKER_HELPER_RANDOM_DISTRIBUTIONS_H__
+
+#include <string.h>
+
+#include <cmath>
+
+#include <algorithm>
+#include <type_traits>
+
+#include "cker/Types.h"
+#include "cker/Shape.h"
+#include "cker/Utils.h"
+
+#include "cker/eigen/EigenSupport.h"
+#include "cker/operation/Helper/PhiloxRandom.h"
+
+namespace nnfw
+{
+namespace cker
+{
+namespace random
+{
+
+// Helper function to convert a 16-bit integer to a half between [0..1).
+PHILOX_DEVICE_INLINE Eigen::half Uint16ToHalf(uint16_t x);
+// Helper function to convert a 16-bit integer to a bfloat16 between [0..1).
+// PHILOX_DEVICE_INLINE bfloat16 Uint16ToGfloat16(uint16 x);
+// Helper function to convert a 32-bit integer to a float between [0..1).
+PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32_t x);
+// Helper function to convert two 32-bit integers to a double between [0..1).
+PHILOX_DEVICE_INLINE double Uint64ToDouble(uint32_t x0, uint32_t x1);
+
+// Computes a + b. Requires that the result is representable in the destination
+// type and that b is not maximal (i.e. b + 1 is not 0). Notably, the addend b
+// need *not* be representable in that type. (The condition on b excludes the
+// extremal case INT_MIN + UINT_MAX = INT_MAX, which this function cannot
+// compute.)
+template <typename Int>
+PHILOX_DEVICE_INLINE Int SignedAdd(Int a, typename std::make_unsigned<Int>::type b)
+{
+  // Implementation note: both b_div_2 and b - b_div_2 are positive and
+  // representable as Int.
+  auto b_div_2 = b >> 1;
+  return a + static_cast<Int>(b_div_2) + static_cast<Int>(b - b_div_2);
+}
+
+// A class that generates uniform distribution random numbers from the
+// underlying random integer generator.
+// Arguments:
+//   Generator: a generator type that returns a number of uint32 upon each
+//              invocation. It needs to define kResultElementCount for the
+//              sample count for each invocation, and ResultType for the
+//              actual returned sample type.
+//   RealType: the data type of the real numbers that will be returned by the
+//             distribution. This could be either float or double for now.
+// This class is meant to be implemented through specialization. The default
+// is not defined by design.
+template <class Generator, typename RealType> class UniformDistribution;
+
+template <class Generator> class UniformDistribution<Generator, Eigen::half>
+{
+public:
+  // The number of elements that will be returned.
+  static constexpr int kResultElementCount = Generator::kResultElementCount;
+  // Cost of generation of a single element (in cycles).
+  static constexpr int kElementCost = 3;
+  // Indicate that this distribution may take variable number of samples
+  // during the runtime.
+  static constexpr bool kVariableSamplesPerOutput = false;
+  typedef Array<Eigen::half, kResultElementCount> ResultType;
+  typedef Eigen::half ResultElementType;
+
+  PHILOX_DEVICE_INLINE
+  ResultType operator()(Generator *gen)
+  {
+    typename Generator::ResultType sample = (*gen)();
+    ResultType result;
+    for (int i = 0; i < kResultElementCount; ++i)
+    {
+      result[i] = Uint16ToHalf(sample[i]); // Truncate the upper 16 bits.
+    }
+    return result;
+  }
+};
+
+template <class Generator> class UniformDistribution<Generator, float>
+{
+public:
+  // The number of elements that will be returned.
+  static constexpr int kResultElementCount = Generator::kResultElementCount;
+  // Cost of generation of a single element (in cycles).
+  static constexpr int kElementCost = 3;
+  // Indicate that this distribution may take variable number of samples
+  // during the runtime.
+  static constexpr bool kVariableSamplesPerOutput = false;
+  typedef Array<float, kResultElementCount> ResultType;
+  typedef float ResultElementType;
+
+  PHILOX_DEVICE_INLINE
+  ResultType operator()(Generator *gen)
+  {
+    typename Generator::ResultType sample = (*gen)();
+    ResultType result;
+    for (int i = 0; i < kResultElementCount; ++i)
+    {
+      result[i] = Uint32ToFloat(sample[i]);
+    }
+    return result;
+  }
+};
+
+template <class Generator> class UniformDistribution<Generator, double>
+{
+public:
+  // The number of elements that will be returned.
+  static constexpr int kResultElementCount = Generator::kResultElementCount / 2;
+  // Cost of generation of a single element (in cycles).
+  static constexpr int kElementCost = 3;
+  // Indicate that this distribution may take variable number of samples
+  // during the runtime.
+  static constexpr bool kVariableSamplesPerOutput = false;
+  typedef Array<double, kResultElementCount> ResultType;
+  typedef double ResultElementType;
+
+  PHILOX_DEVICE_INLINE
+  ResultType operator()(Generator *gen)
+  {
+    typename Generator::ResultType sample = (*gen)();
+    ResultType result;
+    for (int i = 0; i < kResultElementCount; ++i)
+    {
+      result[i] = Uint64ToDouble(sample[2 * i], sample[2 * i + 1]);
+    }
+    return result;
+  }
+};
+
+template <class Generator> class UniformDistribution<Generator, int32_t>
+{
+public:
+  // The number of elements that will be returned.
+  static constexpr int kResultElementCount = Generator::kResultElementCount;
+  // Cost of generation of a single element (in cycles).
+  static constexpr int kElementCost = 3;
+  // Indicate that this distribution may take variable number of samples
+  // during the runtime.
+  static constexpr bool kVariableSamplesPerOutput = false;
+  typedef Array<int32_t, kResultElementCount> ResultType;
+  typedef int32_t ResultElementType;
+
+  // Must have lo < hi
+  UniformDistribution(int32_t lo, int32_t hi)
+      : lo_(lo), range_(static_cast<uint32_t>(hi) - static_cast<uint32_t>(lo))
+  {
+  }
+
+  PHILOX_DEVICE_INLINE
+  ResultType operator()(Generator *gen)
+  {
+    typename Generator::ResultType sample = (*gen)();
+    ResultType result;
+    for (int i = 0; i < kResultElementCount; ++i)
+    {
+      result[i] = SignedAdd(lo_, sample[i] % range_);
+    }
+    return result;
+  }
+
+private:
+  // Note that lo_ is intentionally signed while range_ is intentionally
+  // unsigned.  This is because hi - lo can overflow signed integers if
+  // lo < 0 < hi, but always fits in unsigned.
+  int32_t lo_;
+  int32_t range_;
+};
+
+template <class Generator> class UniformDistribution<Generator, int64_t>
+{
+public:
+  // The number of elements that will be returned.
+  static constexpr int kResultElementCount = Generator::kResultElementCount / 2;
+  // Cost of generation of a single element (in cycles).
+  static constexpr int kElementCost = 3;
+  // Indicate that this distribution may take variable number of samples
+  // during the runtime.
+  static constexpr bool kVariableSamplesPerOutput = false;
+  typedef Array<int64_t, kResultElementCount> ResultType;
+  typedef int64_t ResultElementType;
+
+  // Must have lo < hi
+  UniformDistribution(int64_t lo, int64_t hi)
+      : lo_(lo), range_(static_cast<uint64_t>(hi) - static_cast<uint64_t>(lo))
+  {
+  }
+
+  PHILOX_DEVICE_INLINE
+  ResultType operator()(Generator *gen)
+  {
+    typename Generator::ResultType sample = (*gen)();
+    ResultType result;
+    for (int i = 0; i < kResultElementCount; ++i)
+    {
+      auto bits = sample[2 * i] | static_cast<uint64_t>(sample[2 * i + 1]) << 32;
+      result[i] = SignedAdd(lo_, bits % range_);
+    }
+    return result;
+  }
+
+private:
+  // Note that lo_ is intentionally signed while range_ is intentionally
+  // unsigned.  This is because hi - lo can overflow signed integers if
+  // lo < 0 < hi, but always fits in unsigned.
+  int64_t lo_;
+  uint64_t range_;
+};
+
+// Similar to `UniformDistribution`, except that instead of generating numbers
+// in the range [low, high), it generates numbers covering the whole range of
+// the integer type.
+template <typename Generator, typename IntType> class UniformFullIntDistribution;
+
+template <typename Generator, typename IntType> class UniformFullIntDistribution32
+{
+public:
+  // The number of elements that will be returned.
+  static constexpr int kResultElementCount = Generator::kResultElementCount;
+  // Cost of generation of a single element (in cycles).
+  static constexpr int kElementCost = 3;
+  // Indicate that this distribution may take variable number of samples
+  // during the runtime.
+  static constexpr bool kVariableSamplesPerOutput = false;
+  typedef Array<IntType, kResultElementCount> ResultType;
+  typedef IntType ResultElementType;
+
+  PHILOX_DEVICE_INLINE
+  ResultType operator()(Generator *gen)
+  {
+    typename Generator::ResultType sample = (*gen)();
+    ResultType result;
+    for (int i = 0; i < kResultElementCount; ++i)
+    {
+      result[i] = sample[i];
+    }
+    return result;
+  }
+};
+
+template <typename Generator, typename IntType> class UniformFullIntDistribution64
+{
+public:
+  // The number of elements that will be returned.
+  static constexpr int kResultElementCount = Generator::kResultElementCount / 2;
+  // Cost of generation of a single element (in cycles).
+  static constexpr int kElementCost = 3;
+  // Indicate that this distribution may take variable number of samples
+  // during the runtime.
+  static constexpr bool kVariableSamplesPerOutput = false;
+  typedef Array<IntType, kResultElementCount> ResultType;
+  typedef IntType ResultElementType;
+
+  PHILOX_DEVICE_INLINE
+  ResultType operator()(Generator *gen)
+  {
+    typename Generator::ResultType sample = (*gen)();
+    ResultType result;
+    for (int i = 0; i < kResultElementCount; ++i)
+    {
+      result[i] = sample[2 * i] | static_cast<uint64_t>(sample[2 * i + 1]) << 32;
+    }
+    return result;
+  }
+};
+
+template <typename Generator>
+class UniformFullIntDistribution<Generator, int32_t>
+    : public UniformFullIntDistribution32<Generator, int32_t>
+{
+};
+template <typename Generator>
+class UniformFullIntDistribution<Generator, uint32_t>
+    : public UniformFullIntDistribution32<Generator, uint32_t>
+{
+};
+template <typename Generator>
+class UniformFullIntDistribution<Generator, int64_t>
+    : public UniformFullIntDistribution64<Generator, int64_t>
+{
+};
+template <typename Generator>
+class UniformFullIntDistribution<Generator, uint64_t>
+    : public UniformFullIntDistribution64<Generator, uint64_t>
+{
+};
+
+// A class that adapts the underlying native multiple samples to return a single
+// sample at a time.
+template <class Generator> class SingleSampleAdapter
+{
+public:
+  // The number of elements that will be returned.
+  static constexpr int kResultElementCount = 1;
+  // The number of elements that will be returned by the underlying generator.
+  static constexpr int kNativeElementCount = Generator::kResultElementCount;
+  typedef typename Generator::ResultElementType ResultType;
+  typedef typename Generator::ResultElementType ResultElementType;
+
+  PHILOX_DEVICE_INLINE
+  explicit SingleSampleAdapter(Generator *gen)
+      : generator_(gen), used_result_index_(Generator::kResultElementCount)
+  {
+  }
+
+  PHILOX_DEVICE_INLINE
+  ResultType operator()()
+  {
+    if (used_result_index_ == Generator::kResultElementCount)
+    {
+      unused_results_ = (*generator_)();
+      used_result_index_ = 0;
+    }
+
+    return unused_results_[used_result_index_++];
+  }
+
+  PHILOX_DEVICE_INLINE
+  void Skip(uint64_t num_skips)
+  {
+    if (!num_skips)
+    {
+      return;
+    }
+    int num_unused_results = kNativeElementCount - used_result_index_;
+    if (num_skips <= num_unused_results)
+    {
+      used_result_index_ += num_skips;
+      return;
+    }
+    num_skips -= num_unused_results;
+    used_result_index_ = kNativeElementCount;
+    SkipFromGenerator(num_skips / kNativeElementCount);
+    num_skips = num_skips % kNativeElementCount;
+    if (num_skips)
+    {
+      unused_results_ = (*generator_)();
+      used_result_index_ = num_skips;
+    }
+  }
+
+private:
+  // This implementation iteratively skips over `num_skips` samples
+  // from `generator_`. There is an O(1) implementation for PhiloxRandom
+  // in random_distributions.cc.
+  PHILOX_DEVICE_INLINE
+  void SkipFromGenerator(uint64_t num_skips)
+  {
+    while (num_skips--)
+    {
+      (*generator_)();
+    }
+  }
+
+  Generator *generator_;
+  typename Generator::ResultType unused_results_;
+  int used_result_index_;
+};
+
+// A class that generates unit normal distribution random numbers from the
+// underlying random integer generator.
+// Arguments:
+//   Generator: a generator type that returns a number of uint32 upon each
+//              each invocation. It needs to define kResultElementCount for the
+//              sample count for each invocation, and ResultType for actual
+//              returned sample type.
+//   RealType: the data type of the real numbers that will be returned by the
+//             distribution. This could be either float or double for now.
+// This class is meant to be implemented through specialization. The default
+// is not defined by design.
+template <class Generator, typename RealType> class NormalDistribution;
+
+PHILOX_DEVICE_INLINE
+void BoxMullerFloat(uint32_t x0, uint32_t x1, float *f0, float *f1);
+
+PHILOX_DEVICE_INLINE
+void BoxMullerDouble(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, double *d0, double *d1);
+
+// Exactly like the float version, except that we convert to half afterwards;
+// since we don't have half-precision sin/cos even on GPUs, there's nothing to
+// gain from working in half internally.
+template <class Generator> class NormalDistribution<Generator, Eigen::half>
+{
+public:
+  // The number of elements that will be returned.
+  static constexpr int kResultElementCount = Generator::kResultElementCount;
+  // Cost of generation of a single element (in cycles).
+  static constexpr int kElementCost = 70;
+  // Indicate that this distribution may take variable number of samples
+  // during the runtime.
+  static constexpr bool kVariableSamplesPerOutput = false;
+  typedef Array<Eigen::half, kResultElementCount> ResultType;
+  typedef Eigen::half ResultElementType;
+
+  PHILOX_DEVICE_INLINE
+  ResultType operator()(Generator *gen)
+  {
+    typename Generator::ResultType sample = (*gen)();
+    ResultType result;
+    for (int i = 0; i < kResultElementCount; i += 2)
+    {
+      float f[2];
+      BoxMullerFloat(sample[i], sample[i + 1], &f[0], &f[1]);
+      result[i] = Eigen::half(f[0]);
+      result[i + 1] = Eigen::half(f[1]);
+    }
+    return result;
+  }
+};
+
+template <class Generator> class NormalDistribution<Generator, float>
+{
+public:
+  // The number of elements that will be returned.
+  static constexpr int kResultElementCount = Generator::kResultElementCount;
+  // Cost of generation of a single element (in cycles).
+  static constexpr int kElementCost = 70;
+  // Indicate that this distribution may take variable number of samples
+  // during the runtime.
+  static constexpr bool kVariableSamplesPerOutput = false;
+  typedef Array<float, kResultElementCount> ResultType;
+  typedef float ResultElementType;
+
+  PHILOX_DEVICE_INLINE
+  ResultType operator()(Generator *gen)
+  {
+    typename Generator::ResultType sample = (*gen)();
+    ResultType result;
+    for (int i = 0; i < kResultElementCount; i += 2)
+    {
+      BoxMullerFloat(sample[i], sample[i + 1], &result[i], &result[i + 1]);
+    }
+    return result;
+  }
+};
+
+template <class Generator> class NormalDistribution<Generator, double>
+{
+public:
+  // The number of elements that will be returned.
+  static constexpr int kResultElementCount = Generator::kResultElementCount / 2;
+  // Cost of generation of a single element (in cycles).
+  static constexpr int kElementCost = 70;
+  // Indicate that this distribution may take variable number of samples
+  // during the runtime.
+  static constexpr bool kVariableSamplesPerOutput = false;
+  typedef Array<double, kResultElementCount> ResultType;
+  typedef double ResultElementType;
+
+  PHILOX_DEVICE_INLINE
+  ResultType operator()(Generator *gen)
+  {
+    typename Generator::ResultType sample = (*gen)();
+    ResultType result;
+    for (int i = 0; i < kResultElementCount; i += 2)
+    {
+      const int i2 = 2 * i;
+      BoxMullerDouble(sample[i2], sample[i2 + 1], sample[i2 + 2], sample[i2 + 3], &result[i],
+                      &result[i + 1]);
+    }
+    return result;
+  }
+};
+
+// A class that returns standard normal distribution between
+// [-kTruncateValue, kTruncateValue].
+// Arguments:
+//   Generator: a generator type that returns a number of uint32 upon each
+//              each invocation. It needs to define kResultElementCount for the
+//              sample count for each invocation, and ResultType for actual
+//              returned sample type.
+//   RealType: the data type of the real numbers that will be returned by the
+//             distribution. This could be either float or double for now.
+// This class is meant to be implemented through specialization. The default
+// is not defined by design.
+template <class SingleSampleGenerator, typename RealType> class TruncatedNormalDistribution;
+
+// Exactly like the float version, except that we convert to half afterwards;
+// since we don't have half-precision sin/cos even on GPUs, there's nothing to
+// gain from working in half internally.
+template <class SingleSampleGenerator>
+class TruncatedNormalDistribution<SingleSampleGenerator, Eigen::half>
+{
+public:
+  // The number of elements that will be returned.
+  static constexpr int kResultElementCount = SingleSampleGenerator::kNativeElementCount;
+  // Cost of generation of a single element (in cycles).
+  static constexpr int kElementCost = 90;
+  // Indicate that this distribution may take variable number of samples
+  // during the runtime.
+  static constexpr bool kVariableSamplesPerOutput = true;
+  // The threshold where the normal distribution is truncated.
+  const float kTruncateValue = 2.0f;
+
+  typedef Array<Eigen::half, kResultElementCount> ResultType;
+  typedef Eigen::half ResultElementType;
+
+  PHILOX_DEVICE_INLINE
+  ResultType operator()(SingleSampleGenerator *gen)
+  {
+    ResultType results;
+    int index = 0;
+    while (true)
+    {
+      // Repeatedly take samples from the normal distribution, until we have
+      // the desired number of elements that fall within the pre-defined cutoff
+      // threshold.
+      const uint32_t x0 = (*gen)();
+      const uint32_t x1 = (*gen)();
+      float f[2];
+      BoxMullerFloat(x0, x1, &f[0], &f[1]);
+
+      if (Eigen::numext::abs(f[0]) < kTruncateValue)
+      {
+        results[index++] = Eigen::half(f[0]);
+        if (index >= kResultElementCount)
+        {
+          return results;
+        }
+      }
+      if (Eigen::numext::abs(f[1]) < kTruncateValue)
+      {
+        results[index++] = Eigen::half(f[1]);
+        if (index >= kResultElementCount)
+        {
+          return results;
+        }
+      }
+    }
+  }
+};
+
+// Partial specialization for float.
+template <class SingleSampleGenerator>
+class TruncatedNormalDistribution<SingleSampleGenerator, float>
+{
+public:
+  // The number of elements that will be returned.
+  static constexpr int kResultElementCount = SingleSampleGenerator::kNativeElementCount;
+  // Cost of generation of a single element (in cycles).
+  static constexpr int kElementCost = 90;
+  // Indicate that this distribution may take variable number of samples
+  // during the runtime.
+  static constexpr bool kVariableSamplesPerOutput = true;
+  // The threshold where the normal distribution is truncated.
+  const float kTruncateValue = 2.0f;
+
+  typedef Array<float, kResultElementCount> ResultType;
+  typedef float ResultElementType;
+
+  PHILOX_DEVICE_INLINE
+  ResultType operator()(SingleSampleGenerator *gen)
+  {
+    ResultType results;
+    int index = 0;
+    while (true)
+    {
+      // Repeatedly take samples from the normal distribution, until we have
+      // the desired number of elements that fall within the pre-defined cutoff
+      // threshold.
+      const uint32_t x0 = (*gen)();
+      const uint32_t x1 = (*gen)();
+      float f[2];
+      BoxMullerFloat(x0, x1, &f[0], &f[1]);
+
+      if (Eigen::numext::abs(f[0]) < kTruncateValue)
+      {
+        results[index++] = f[0];
+        if (index >= kResultElementCount)
+        {
+          return results;
+        }
+      }
+      if (Eigen::numext::abs(f[1]) < kTruncateValue)
+      {
+        results[index++] = f[1];
+        if (index >= kResultElementCount)
+        {
+          return results;
+        }
+      }
+    }
+  }
+};
+
+// Partial specialization for double.
+template <class SingleSampleGenerator>
+class TruncatedNormalDistribution<SingleSampleGenerator, double>
+{
+public:
+  // The number of elements that will be returned.
+  static constexpr int kResultElementCount = (SingleSampleGenerator::kNativeElementCount > 1)
+                                                 ? SingleSampleGenerator::kNativeElementCount / 2
+                                                 : 1;
+  // Cost of generation of a single element (in cycles).
+  static constexpr int kElementCost = 90;
+  // Indicate that this distribution may take variable number of samples
+  // during the runtime.
+  static constexpr bool kVariableSamplesPerOutput = true;
+  typedef Array<double, kResultElementCount> ResultType;
+  typedef double ResultElementType;
+  const double kTruncateValue = 2.0;
+
+  PHILOX_DEVICE_INLINE
+  ResultType operator()(SingleSampleGenerator *gen)
+  {
+    ResultType results;
+    int index = 0;
+    while (1)
+    {
+      const uint32_t x0 = (*gen)();
+      const uint32_t x1 = (*gen)();
+      const uint32_t x2 = (*gen)();
+      const uint32_t x3 = (*gen)();
+      double d[2];
+      BoxMullerDouble(x0, x1, x2, x3, &d[0], &d[1]);
+
+      if (Eigen::numext::abs(d[0]) < kTruncateValue)
+      {
+        results[index++] = d[0];
+        if (index >= kResultElementCount)
+        {
+          return results;
+        }
+      }
+      if (Eigen::numext::abs(d[1]) < kTruncateValue)
+      {
+        results[index++] = d[1];
+        if (index >= kResultElementCount)
+        {
+          return results;
+        }
+      }
+    }
+  }
+};
+
+// Helper function to convert two 32-bit uniform integers to two floats
+// under the unit normal distribution.
+PHILOX_DEVICE_INLINE
+void BoxMullerFloat(uint32_t x0, uint32_t x1, float *f0, float *f1)
+{
+  // This function implements the Box-Muller transform:
+  // http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form
+  // Do not send a really small number to log().
+  // We cannot mark "epsilon" as "static const" because NVCC would complain
+  const float epsilon = 1.0e-7f;
+  float u1 = Uint32ToFloat(x0);
+  if (u1 < epsilon)
+  {
+    u1 = epsilon;
+  }
+  const float v1 = 2.0f * M_PI * Uint32ToFloat(x1);
+  const float u2 = Eigen::numext::sqrt(-2.0f * Eigen::numext::log(u1));
+#if defined(TENSORFLOW_USE_SYCL) || !defined(__linux__)
+  *f0 = Eigen::numext::sin(v1);
+  *f1 = Eigen::numext::cos(v1);
+#else
+  sincosf(v1, f0, f1);
+#endif
+  *f0 *= u2;
+  *f1 *= u2;
+}
+
+// Helper function to convert four 32-bit uniform integers to two doubles
+// under the unit normal distribution.
+PHILOX_DEVICE_INLINE
+void BoxMullerDouble(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, double *d0, double *d1)
+{
+  // This function implements the Box-Muller transform:
+  // http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form
+  // Do not send a really small number to log().
+  // We cannot mark "epsilon" as "static const" because NVCC would complain
+  const double epsilon = 1.0e-7;
+  double u1 = Uint64ToDouble(x0, x1);
+  if (u1 < epsilon)
+  {
+    u1 = epsilon;
+  }
+  const double v1 = 2 * M_PI * Uint64ToDouble(x2, x3);
+  const double u2 = Eigen::numext::sqrt(-2.0 * Eigen::numext::log(u1));
+#if defined(TENSORFLOW_USE_SYCL) || !defined(__linux__)
+  *d0 = Eigen::numext::sin(v1);
+  *d1 = Eigen::numext::cos(v1);
+#else
+  sincos(v1, d0, d1);
+#endif
+  *d0 *= u2;
+  *d1 *= u2;
+}
+
+// Helper function to convert an 16-bit integer to a half between [0..1).
+PHILOX_DEVICE_INLINE Eigen::half Uint16ToHalf(uint16_t x)
+{
+  // IEEE754 halfs are formatted as follows (MSB first):
+  //    sign(1) exponent(5) mantissa(10)
+  // Conceptually construct the following:
+  //    sign == 0
+  //    exponent == 15  -- an excess 15 representation of a zero exponent
+  //    mantissa == 10 random bits
+  const uint16_t man = x & 0x3ffu; // 10 bit mantissa
+  const uint16_t exp = static_cast<uint16_t>(15);
+  const uint16_t val = (exp << 10) | man;
+
+  Eigen::half result;
+  result.x = val;
+  return result - Eigen::half(1.0);
+}
+
+// Helper function to convert an 32-bit integer to a float between [0..1).
+PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32_t x)
+{
+  // IEEE754 floats are formatted as follows (MSB first):
+  //    sign(1) exponent(8) mantissa(23)
+  // Conceptually construct the following:
+  //    sign == 0
+  //    exponent == 127  -- an excess 127 representation of a zero exponent
+  //    mantissa == 23 random bits
+  const uint32_t man = x & 0x7fffffu; // 23 bit mantissa
+  const uint32_t exp = static_cast<uint32_t>(127);
+  const uint32_t val = (exp << 23) | man;
+
+  // Assumes that endian-ness is same for float and uint32.
+  float result;
+  memcpy(&result, &val, sizeof(val));
+  return result - 1.0f;
+}
+
+// Helper function to convert two 32-bit integers to a double between [0..1).
+PHILOX_DEVICE_INLINE double Uint64ToDouble(uint32_t x0, uint32_t x1)
+{
+  // IEEE754 doubles are formatted as follows (MSB first):
+  //    sign(1) exponent(11) mantissa(52)
+  // Conceptually construct the following:
+  //    sign == 0
+  //    exponent == 1023  -- an excess 1023 representation of a zero exponent
+  //    mantissa == 52 random bits
+  const uint32_t mhi = x0 & 0xfffffu;                            // upper 20 bits of mantissa
+  const uint32_t mlo = x1;                                       // lower 32 bits of mantissa
+  const uint64_t man = (static_cast<uint64_t>(mhi) << 32) | mlo; // mantissa
+  const uint64_t exp = static_cast<uint64_t>(1023);
+  const uint64_t val = (exp << 52) | man;
+  // Assumes that endian-ness is same for double and uint64.
+  double result;
+  memcpy(&result, &val, sizeof(val));
+  return result - 1.0;
+}
+
+} // namespace random
+} // namespace tensorflow
+}
+
+#endif // __NNFW_CKER_HELPER_RANDOM_DISTRIBUTIONS_H__
diff --git a/compute/cker/include/cker/operation/Helper/RandomOp.h b/compute/cker/include/cker/operation/Helper/RandomOp.h

new file mode 100644 (file)

index 0000000..7dc51fe
--- /dev/null
+++ b/compute/cker/include/cker/operation/Helper/RandomOp.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_HELPER_RANDOM_OP_H__
+#define __NNFW_CKER_HELPER_RANDOM_OP_H__
+
+#include "cker/Types.h"
+#include "cker/Shape.h"
+#include "cker/Utils.h"
+
+#include "cker/operation/Helper/RandomDistributions.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+namespace functor
+{
+
+template <typename Device, class Distribution> struct FillPhiloxRandom;
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+// Declares the partially CPU-specialized functor struct.
+//
+// NOTE: Due to inlining done by the compiler, you may need to add
+// explicit instantiation of the functor in random_op.cc.  See example
+// functor::FillPhiloxRandom<CPUDevice, random::UniformDistribution>.
+template <class Distribution> struct FillPhiloxRandom<CPUDevice, Distribution>
+{
+  void operator()(random::PhiloxRandom gen, typename Distribution::ResultElementType *data,
+                  int64_t size, Distribution dist);
+};
+
+} // namespace functor
+} // namespace tensorflow
+}
+#endif // __NNFW_CKER_HELPER_RANDOM_OP_H__
diff --git a/compute/cker/include/cker/operation/Helper/RandomOpCpu.h b/compute/cker/include/cker/operation/Helper/RandomOpCpu.h

new file mode 100644 (file)

index 0000000..85d2677
--- /dev/null
+++ b/compute/cker/include/cker/operation/Helper/RandomOpCpu.h
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_HELPER_RANDOM_OP_CPU_H__
+#define __NNFW_CKER_HELPER_RANDOM_OP_CPU_H__
+
+#define EIGEN_USE_THREADS
+
+#include <algorithm>
+#include <cmath>
+#include <memory>
+
+#include "cker/Types.h"
+#include "cker/Shape.h"
+#include "cker/Utils.h"
+
+#include "cker/eigen/EigenSupport.h"
+
+#include "cker/operation/Helper/PhiloxRandom.h"
+#include "cker/operation/Helper/RandomOp.h"
+#include "cker/operation/Helper/RandomDistributions.h"
+
+#if EIGEN_COMP_GNUC && __cplusplus > 199711L
+#define DISABLE_FLOAT_EQUALITY_WARNING \
+  _Pragma("GCC diagnostic push") _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"")
+#define ENABLE_FLOAT_EQUALITY_WARNING _Pragma("GCC diagnostic pop")
+#else
+#define DISABLE_FLOAT_EQUALITY_WARNING
+#define ENABLE_FLOAT_EQUALITY_WARNING
+#endif
+
+namespace nnfw
+{
+namespace cker
+{
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+
+namespace functor
+{
+using random::PhiloxRandom;
+using random::SingleSampleAdapter;
+
+// The default implementation of the functor, which should never be invoked
+// But we still need to provide implementation for now for the linker to work,
+// since we do not support all the distributions yet.
+template <typename Device, class Distribution> struct FillPhiloxRandom
+{
+  typedef typename Distribution::ResultElementType T;
+  void operator()() {}
+};
+
+// A class to fill a specified range of random groups
+template <class Distribution, bool VariableSamplesPerOutput> struct FillPhiloxRandomTask;
+
+// Specialization for distribution that takes a fixed number of samples for
+// each output.
+template <class Distribution> struct FillPhiloxRandomTask<Distribution, false>
+{
+  typedef typename Distribution::ResultElementType T;
+  static void Run(random::PhiloxRandom gen, T *data, int64_t size, Distribution dist)
+  {
+    const int kGroupSize = Distribution::kResultElementCount;
+    gen.Skip(0);
+    int64_t offset = 0;
+
+    // First fill all the full-size groups
+    int64_t limit_group_full = size / kGroupSize;
+    for (int64_t index = 0; index < limit_group_full; ++index)
+    {
+      auto samples = dist(&gen);
+      std::copy(&samples[0], &samples[0] + kGroupSize, data + offset);
+      offset += kGroupSize;
+    }
+
+    int64_t remaining_size = size - limit_group_full * kGroupSize;
+
+    // If there are any remaining elements that need to be filled, process them
+    if (remaining_size > 0)
+    {
+      auto samples = dist(&gen);
+      std::copy(&samples[0], &samples[0] + remaining_size, data + offset);
+    }
+  }
+};
+
+// Specialization for distribution that takes a variable number of samples for
+// each output. This will be slower due to the generality.
+template <class Distribution> struct FillPhiloxRandomTask<Distribution, true>
+{
+  typedef typename Distribution::ResultElementType T;
+  static constexpr int64_t kReservedSamplesPerOutput = 256;
+
+  static void Run(random::PhiloxRandom base_gen, T *data, int64_t size, Distribution dist)
+  {
+    const int kGroupSize = Distribution::kResultElementCount;
+    static const int kGeneratorSkipPerOutputGroup =
+        kGroupSize * kReservedSamplesPerOutput / PhiloxRandom::kResultElementCount;
+
+    int64_t offset = 0;
+
+    // First fill all the full-size groups
+    int64_t limit_group_full = size / kGroupSize;
+    int64_t group_index;
+    for (group_index = 0; group_index < limit_group_full; ++group_index)
+    {
+      // Reset the generator to the beginning of the output group region
+      // This is necessary if we want the results to be independent of order
+      // of work
+      PhiloxRandom gen = base_gen;
+      gen.Skip(group_index * kGeneratorSkipPerOutputGroup);
+      SingleSampleAdapter<PhiloxRandom> single_samples(&gen);
+
+      auto samples = dist(&single_samples);
+      std::copy(&samples[0], &samples[0] + kGroupSize, data + offset);
+      offset += kGroupSize;
+    }
+
+    int64_t remaining_size = size - limit_group_full * kGroupSize;
+    // If there are any remaining elements that need to be filled, process them
+    if (remaining_size > 0)
+    {
+      PhiloxRandom gen = base_gen;
+      gen.Skip(group_index * kGeneratorSkipPerOutputGroup);
+      SingleSampleAdapter<PhiloxRandom> single_samples(&gen);
+
+      auto samples = dist(&single_samples);
+      std::copy(&samples[0], &samples[0] + remaining_size, data + offset);
+    }
+  }
+};
+
+// Partial specialization for CPU to fill the entire region with randoms
+// It splits the work into several tasks and run them in parallel
+template <class Distribution>
+void FillPhiloxRandom<CPUDevice, Distribution>::
+operator()(random::PhiloxRandom gen, typename Distribution::ResultElementType *data, int64_t size,
+           Distribution dist)
+{
+  FillPhiloxRandomTask<Distribution, Distribution::kVariableSamplesPerOutput>::Run(gen, data, size,
+                                                                                   dist);
+}
+
+} // namespace functor
+
+} // end namespace tensorflow
+}
+
+#endif // __NNFW_CKER_HELPER_RANDOM_OP_CPU_H__
diff --git a/compute/cker/include/cker/operation/L2Normalize.h b/compute/cker/include/cker/operation/L2Normalize.h

new file mode 100644 (file)

index 0000000..a0075c3
--- /dev/null
+++ b/compute/cker/include/cker/operation/L2Normalize.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_L2NORMALIZE_H__
+#define __NNFW_CKER_L2NORMALIZE_H__
+
+#include "cker/Shape.h"
+#include "cker/Utils.h"
+#include "cker/Types.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+void L2NormalizeFloat32(const Shape &input_shape, const float *input_data,
+                        const Shape &output_shape, float *output_data)
+{
+  float epsilon = 1e-6;
+  const int trailing_dim = input_shape.DimensionsCount() - 1;
+  const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+  const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+  for (int i = 0; i < outer_size; ++i)
+  {
+    float squared_l2_norm = 0;
+    for (int c = 0; c < depth; ++c)
+    {
+      const float val = input_data[c];
+      squared_l2_norm += val * val;
+    }
+    float l2_norm = std::sqrt(squared_l2_norm);
+    l2_norm = std::max(l2_norm, epsilon);
+    for (int c = 0; c < depth; ++c)
+    {
+      *output_data = *input_data / l2_norm;
+      ++output_data;
+      ++input_data;
+    }
+  }
+}
+
+void L2NormalizeQuant8(L2NormParams &params, const Shape &input_shape, const uint8_t *input_data,
+                       const Shape &output_shape, uint8_t *output_data)
+{
+  const int trailing_dim = input_shape.DimensionsCount() - 1;
+  const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+  const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+  const int32_t input_zero_point = params.input_zero_point;
+
+  for (int i = 0; i < outer_size; ++i)
+  {
+    int32_t square_l2_norm = 0;
+    for (int c = 0; c < depth; c++)
+    {
+      // Note that input_data advances by depth in the second pass below.
+      int32_t diff = input_data[c] - input_zero_point;
+      square_l2_norm += diff * diff;
+    }
+    int32_t inv_l2norm_multiplier;
+    int inv_l2norm_shift;
+    GetInvSqrtQuantizedMultiplierExp(square_l2_norm, -1, &inv_l2norm_multiplier, &inv_l2norm_shift);
+    for (int c = 0; c < depth; c++)
+    {
+      int32_t diff = *input_data - input_zero_point;
+      int32_t rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+          128 * diff, inv_l2norm_multiplier, inv_l2norm_shift);
+      int32_t unclamped_output_val = 128 + rescaled_diff;
+      int32_t output_val = std::min(static_cast<int32_t>(255),
+                                    std::max(static_cast<int32_t>(0), unclamped_output_val));
+      *output_data = static_cast<uint8_t>(output_val);
+      ++input_data;
+      ++output_data;
+    }
+  }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_L2NORMALIZE_H__
diff --git a/compute/cker/include/cker/operation/Logistic.h b/compute/cker/include/cker/operation/Logistic.h

index 7477858..3d3e59e 100644 (file)
--- a/compute/cker/include/cker/operation/Logistic.h
+++ b/compute/cker/include/cker/operation/Logistic.h
@@ -32,18 +32,9 @@ namespace cker
  inline void Logistic(const Shape &input_shape, const float *input_data, const Shape &output_shape,
                       float *output_data)
  {
-#ifdef __aarch64__
    auto input_map = MapAsVector(input_data, input_shape);
    auto output_map = MapAsVector(output_data, output_shape);
    output_map.array() = input_map.array().unaryExpr(Eigen::internal::scalar_logistic_op<float>());
-#else
-  // Note, this can be done using TANH: (1/2) + (1/2) * TANH(x/2)
-  const int size = MatchingFlatSize(input_shape, output_shape);
-  for (int i = 0; i < size; i++)
-  {
-    output_data[i] = 1.f / (1.f + std::exp(-input_data[i]));
-  }
-#endif
  }
  
  } // namespace cker
diff --git a/compute/cker/include/cker/operation/MatrixBandPart.h b/compute/cker/include/cker/operation/MatrixBandPart.h

index 9f49c8f..5674ff3 100644 (file)
--- a/compute/cker/include/cker/operation/MatrixBandPart.h
+++ b/compute/cker/include/cker/operation/MatrixBandPart.h
@@ -32,10 +32,10 @@ void MatrixBandPart(const T num_lower_diags, const T num_upper_diags, const Shap
  {
    auto last_dim = input_shape.DimensionsCount() - 1;
  
-  T batch_num = 0;
-  for (int dim = 0; dim < last_dim - 2; dim++)
+  T batch_num = 1;
+  for (int dim = 0; dim < input_shape.DimensionsCount() - 2; dim++)
    {
-    batch_num += input_shape.Dims(dim);
+    batch_num *= input_shape.Dims(dim);
    }
  
    const T row_num = input_shape.Dims(last_dim - 1);
diff --git a/compute/cker/include/cker/operation/Pad.h b/compute/cker/include/cker/operation/Pad.h

index af432f3..4a2732d 100644 (file)
--- a/compute/cker/include/cker/operation/Pad.h
+++ b/compute/cker/include/cker/operation/Pad.h
@@ -26,9 +26,10 @@ namespace nnfw
  {
  namespace cker
  {
+template <typename T>
  inline void Pad(const int32_t *padding_data, int32_t pad_rank, const Shape &input_shape,
-                const float *input_data, const Shape &output_shape, float *output_data,
-                const float *constant_value_data)
+                const T *input_data, const Shape &output_shape, T *output_data,
+                const T *constant_value_data)
  {
    // Note, this is pad with mode=`CONSTANT`: it doesn't support `REFLECT` and `SYMMETRIC`
    // TODO: come up with more subtle solution that uses subtensors like arm compute
@@ -38,7 +39,7 @@ inline void Pad(const int32_t *padding_data, int32_t pad_rank, const Shape &inpu
    /** List of padding information */
    using PaddingList = std::vector<PaddingInfo>;
  
-  auto constant_value = constant_value_data ? *constant_value_data : 0;
+  const T constant_value = constant_value_data ? *constant_value_data : 0;
    assert(output_shape.DimensionsCount() == input_shape.DimensionsCount());
  
    PaddingList padding_list(pad_rank);
@@ -64,7 +65,7 @@ inline void Pad(const int32_t *padding_data, int32_t pad_rank, const Shape &inpu
      {
        const int32_t in_row_len = input_shape.Dims(0);
        std::fill_n(output_data, padding_list[0].first, constant_value);
-      std::memcpy(output_data + padding_list[0].first, input_data, in_row_len * sizeof(float));
+      std::memcpy(output_data + padding_list[0].first, input_data, in_row_len * sizeof(T));
        std::fill_n(output_data + padding_list[0].first + in_row_len, padding_list[0].second,
                    constant_value);
        break;
@@ -89,7 +90,7 @@ inline void Pad(const int32_t *padding_data, int32_t pad_rank, const Shape &inpu
          out_offset += padding_list[1].first;
  
          // copy a row of input data
-        memcpy(output_data + out_offset, input_data + in_offset, in_row_len * sizeof(float));
+        memcpy(output_data + out_offset, input_data + in_offset, in_row_len * sizeof(T));
  
          out_offset += in_row_len;
  
@@ -132,7 +133,7 @@ inline void Pad(const int32_t *padding_data, int32_t pad_rank, const Shape &inpu
            out_offset += padding_list[2].first;
  
            // copy a row of input data
-          memcpy(output_data + out_offset, input_data + in_offset, in_row_len * sizeof(float));
+          memcpy(output_data + out_offset, input_data + in_offset, in_row_len * sizeof(T));
  
            out_offset += in_row_len;
  
@@ -191,7 +192,7 @@ inline void Pad(const int32_t *padding_data, int32_t pad_rank, const Shape &inpu
              out_c_offset += padding_list[3].first;
  
              // copy a row of input data
-            memcpy(output_data + out_c_offset, input_data + in_offset, in_row_len * sizeof(float));
+            memcpy(output_data + out_c_offset, input_data + in_offset, in_row_len * sizeof(T));
  
              out_c_offset += in_row_len;
  
diff --git a/compute/cker/include/cker/operation/Quantize.h b/compute/cker/include/cker/operation/Quantize.h

new file mode 100644 (file)

index 0000000..5c82d11
--- /dev/null
+++ b/compute/cker/include/cker/operation/Quantize.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_QUANTIZE_H__
+#define __NNFW_CKER_QUANTIZE_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+#include <stdexcept>
+#include <iostream>
+namespace nnfw
+{
+namespace cker
+{
+template <typename InputT, typename OutputT>
+inline void Quantize(const Shape &input_shape, const InputT *input_data, const Shape &output_shape,
+                     OutputT *output_data, const float output_scale, const int32_t output_offset)
+{
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  int min_val = std::numeric_limits<OutputT>::min();
+  int max_val = std::numeric_limits<OutputT>::max();
+
+  for (int i = 0; i < flat_size; i++)
+  {
+    int32_t unclamped = static_cast<int32_t>(round(input_data[i] / output_scale)) + output_offset;
+    int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
+    output_data[i] = clamped;
+  }
+}
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_QUANTIZE_H__
diff --git a/compute/cker/include/cker/operation/ReLU6.h b/compute/cker/include/cker/operation/ReLU6.h

new file mode 100644 (file)

index 0000000..20df561
--- /dev/null
+++ b/compute/cker/include/cker/operation/ReLU6.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_RELU6_H__
+#define __NNFW_CKER_RELU6_H__
+
+#include "cker/Shape.h"
+#include "cker/eigen/Utils.h"
+
+#include <cmath>
+#include <Eigen/Core>
+
+namespace nnfw
+{
+namespace cker
+{
+
+inline void ReLU6(const Shape &input_shape, const float *input_data, float *output_data)
+{
+  int size = input_shape.FlatSize();
+
+  for (int i = 0; i < size; ++i)
+  {
+    if (input_data[i] <= 0)
+    {
+      output_data[i] = 0;
+    }
+    else if (input_data[i] > 6.0)
+    {
+      output_data[i] = 6.0;
+    }
+    else
+    {
+      output_data[i] = input_data[i];
+    }
+  }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_RELU6_H__
diff --git a/compute/cker/include/cker/operation/Reduce.h b/compute/cker/include/cker/operation/Reduce.h

index 4ba3652..cf9634a 100644 (file)
--- a/compute/cker/include/cker/operation/Reduce.h
+++ b/compute/cker/include/cker/operation/Reduce.h
@@ -159,6 +159,92 @@ public:
                              num_resolved_axis, temp_index_data(), reducer, output_data);
    }
  
+  // Computes the mean of elements across dimensions given in axis.
+  // It does so in two stages, first calculates the sum of elements along the axis
+  // then divides it by the number of element in axis for quantized values.
+  template <typename T, typename U>
+  inline bool QuantizedMeanOrSum(const T *input_data, int32_t input_zero_point, float input_scale,
+                                 const Shape &input_shape, T *output_data,
+                                 int32_t output_zero_point, float output_scale,
+                                 const Shape &output_shape, const std::vector<int> &axes,
+                                 bool /*keep_dims*/, U *temp_sum, bool compute_sum,
+                                 U reducer(const U current, const T in))
+  {
+    // Reset output data.
+    size_t num_outputs = 1;
+    for (int idx = 0; idx < output_shape.DimensionsCount(); ++idx)
+    {
+      size_t current = static_cast<size_t>(output_shape.Dims(idx));
+      // Overflow prevention.
+      if (num_outputs > std::numeric_limits<size_t>::max() / current)
+      {
+        return false;
+      }
+      num_outputs *= current;
+    }
+    for (size_t idx = 0; idx < num_outputs; ++idx)
+    {
+      output_data[idx] = T();
+      temp_sum[idx] = U();
+    }
+
+    // Resolve axis.
+    int num_resolved_axis = 0;
+    if (!ResolveAxis(input_shape.DimensionsCount(), axes, resolved_axis_data(), &num_resolved_axis))
+    {
+      return false;
+    }
+
+    if (!ReduceImpl<T, U>(input_data, input_shape, output_shape, resolved_axis_data(),
+                          num_resolved_axis, temp_index_data(), reducer, temp_sum))
+    {
+      return false;
+    }
+
+    // Calculate mean by dividing output_data by num of aggregated element.
+    U num_elements_in_axis = 1;
+    for (int idx = 0; idx < num_resolved_axis; ++idx)
+    {
+      size_t current = static_cast<size_t>(input_shape.Dims(resolved_axis_data()[idx]));
+      // Overflow prevention.
+      if (current > static_cast<size_t>(std::numeric_limits<U>::max() / num_elements_in_axis))
+      {
+        return false;
+      }
+      num_elements_in_axis *= current;
+    }
+
+    if (num_elements_in_axis > 0)
+    {
+      const float scale = input_scale / output_scale;
+      if (compute_sum)
+      {
+        // TODO(b/116341117): Eliminate float and do this completely in 8bit.
+        const float bias = -input_zero_point * scale * num_elements_in_axis + 0.5f;
+        for (size_t idx = 0; idx < num_outputs; ++idx)
+        {
+          const U value =
+              static_cast<U>(std::round(temp_sum[idx] * scale + bias)) + output_zero_point;
+          output_data[idx] = static_cast<T>(value);
+        }
+      }
+      else
+      {
+        const float bias = -input_zero_point * scale + 0.5f;
+        for (size_t idx = 0; idx < num_outputs; ++idx)
+        {
+          float float_mean =
+              static_cast<float>(temp_sum[idx]) / static_cast<float>(num_elements_in_axis);
+          float result = std::min(std::round(float_mean * scale + bias) + output_zero_point,
+                                  static_cast<float>(std::numeric_limits<T>::max()));
+          result = std::max(result, static_cast<float>(std::numeric_limits<T>::min()));
+          output_data[idx] = static_cast<T>(result);
+        }
+      }
+    }
+    return true;
+  }
+
    inline int32_t *resolved_axis_data(void)
    {
      return _resolved_axis.size() ? _resolved_axis.data() : _resolved_axis_small;
diff --git a/compute/cker/include/cker/operation/ResizeBilinear.h b/compute/cker/include/cker/operation/ResizeBilinear.h

new file mode 100644 (file)

index 0000000..7fc1e91
--- /dev/null
+++ b/compute/cker/include/cker/operation/ResizeBilinear.h
@@ -0,0 +1,270 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_RESIZEBILINEAR_H__
+#define __NNFW_CKER_RESIZEBILINEAR_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include <cmath>
+
+namespace nnfw
+{
+namespace cker
+{
+
+inline void ResizeBilinearKernel2x2(int32_t x0, int32_t x1, int32_t y0, int32_t y1, int32_t x,
+                                    int32_t y, int32_t depth, int32_t batch,
+                                    const Shape &input_shape, const float *input_data,
+                                    const Shape &output_shape, float *output_data)
+{
+  const int32_t input_width = input_shape.Dims(2);
+  const int32_t output_width = output_shape.Dims(2);
+
+  const int32_t input_x_offset = (x1 - x0) * depth;
+  const int32_t input_y_offset = (y1 - y0) * depth * input_width;
+  const int32_t output_x_offset = depth;
+  const int32_t output_y_offset = depth * output_width;
+
+  for (int ch = 0; ch < depth; ch++)
+  {
+    const int32_t input_offset = Offset(input_shape, batch, y0, x0, ch);
+
+    float x0y0 = input_data[input_offset];
+    float x1y0 = input_data[input_offset + input_x_offset];
+    float x0y1 = input_data[input_offset + input_y_offset];
+    float x1y1 = input_data[input_offset + input_x_offset + input_y_offset];
+
+    // Top left corner.
+    const int32_t output_offset = Offset(output_shape, batch, y, x, ch);
+    output_data[output_offset] = x0y0;
+
+    // Top right corner.
+    output_data[output_offset + output_x_offset] = (x0y0 + x1y0) / 2;
+
+    // Bottom left corner.
+    float output = (x0y0 + x0y1) / 2;
+    output_data[output_offset + output_y_offset] = output;
+
+    // Bottom right corner.
+    output_data[output_offset + output_x_offset + output_y_offset] =
+        (output + ((x1y0 + x1y1) / 2)) / 2;
+  }
+}
+
+inline void ResizeBilinear2x2(int32_t batches, int32_t input_height, int32_t input_width,
+                              int32_t depth, int32_t output_height, int32_t output_width,
+                              const Shape &input_shape, const float *input_data,
+                              const Shape &output_shape, float *output_data)
+{
+  for (int b = 0; b < batches; b++)
+  {
+    for (int y0 = 0, y = 0; y <= output_height - 2; y += 2, y0++)
+    {
+      for (int x0 = 0, x = 0; x <= output_width - 2; x += 2, x0++)
+      {
+        int32_t x1 = std::min(x0 + 1, input_width - 1);
+        int32_t y1 = std::min(y0 + 1, input_height - 1);
+        ResizeBilinearKernel2x2(x0, x1, y0, y1, x, y, depth, b, input_shape, input_data,
+                                output_shape, output_data);
+      }
+    }
+  }
+}
+
+inline void ResizeBilinearKernel(const float *input_ptr, int32_t depth, float scale,
+                                 float *output_ptr)
+{
+  for (int32_t i = 0; i < depth; i++)
+  {
+    *output_ptr += *input_ptr * scale;
+    output_ptr++;
+    input_ptr++;
+  }
+}
+
+inline void ComputeInterpolationValues(const float value, const float scale,
+                                       const bool half_pixel_centers, int32_t input_size,
+                                       float *scaled_value, int32_t *lower_bound,
+                                       int32_t *upper_bound)
+{
+  if (half_pixel_centers)
+  {
+    *scaled_value = (value + 0.5f) * scale - 0.5f;
+  }
+  else
+  {
+    *scaled_value = value * scale;
+  }
+  float scaled_value_floor = std::floor(*scaled_value);
+  *lower_bound = std::max(static_cast<int32_t>(scaled_value_floor), static_cast<int32_t>(0));
+  *upper_bound = std::min(static_cast<int32_t>(std::ceil(*scaled_value)), input_size - 1);
+}
+
+inline void ResizeBilinearGeneric(int32_t batches, int32_t input_height, int32_t input_width,
+                                  int32_t depth, int32_t output_height, int32_t output_width,
+                                  float height_scale, float width_scale, const Shape &input_shape,
+                                  const float *input_data, float *output_data,
+                                  const bool half_pixel_centers)
+{
+  memset(output_data, 0, batches * output_height * output_width * depth * sizeof(float));
+
+  int32_t output_offset = 0;
+  for (int b = 0; b < batches; ++b)
+  {
+    for (int y = 0; y < output_height; ++y)
+    {
+      float input_y;
+      int32_t y0, y1;
+      ComputeInterpolationValues(y, height_scale, half_pixel_centers, input_height, &input_y, &y0,
+                                 &y1);
+      for (int x = 0; x < output_width; ++x)
+      {
+        float input_x;
+        int32_t x0, x1;
+        ComputeInterpolationValues(x, width_scale, half_pixel_centers, input_width, &input_x, &x0,
+                                   &x1);
+        float *output_ptr = &output_data[output_offset];
+
+        // Run kernel on the 4 corners of the bilinear resize algorithm.
+        int32_t input_offset = Offset(input_shape, b, y0, x0, 0);
+        float scale = (1 - (input_y - y0)) * (1 - (input_x - x0));
+        const float *input_ptr = &input_data[input_offset];
+        ResizeBilinearKernel(input_ptr, depth, scale, output_ptr);
+
+        input_offset = Offset(input_shape, b, y0, x1, 0);
+        scale = (1 - (input_y - y0)) * (input_x - x0);
+        input_ptr = &input_data[input_offset];
+        ResizeBilinearKernel(input_ptr, depth, scale, output_ptr);
+
+        input_offset = Offset(input_shape, b, y1, x0, 0);
+        scale = (input_y - y0) * (1 - (input_x - x0));
+        input_ptr = &input_data[input_offset];
+        ResizeBilinearKernel(input_ptr, depth, scale, output_ptr);
+
+        input_offset = Offset(input_shape, b, y1, x1, 0);
+        scale = (input_y - y0) * (input_x - x0);
+        input_ptr = &input_data[input_offset];
+        ResizeBilinearKernel(input_ptr, depth, scale, output_ptr);
+
+        output_offset += depth;
+      }
+    }
+  }
+}
+
+template <typename T>
+inline void ResizeBilinearGenericSmallChannel(int32_t batches, int32_t input_height,
+                                              int32_t input_width, int32_t depth,
+                                              int32_t output_height, int32_t output_width,
+                                              float height_scale, float width_scale,
+                                              const Shape &input_shape, const T *input_data,
+                                              T *output_data, const bool half_pixel_centers)
+{
+  T *output_ptr = &output_data[0];
+  for (int b = 0; b < batches; ++b)
+  {
+    for (int y = 0; y < output_height; ++y)
+    {
+      float input_y;
+      int32_t y0, y1;
+      ComputeInterpolationValues(y, height_scale, half_pixel_centers, input_height, &input_y, &y0,
+                                 &y1);
+      for (int x = 0; x < output_width; ++x)
+      {
+        float input_x;
+        int32_t x0, x1;
+        ComputeInterpolationValues(x, width_scale, half_pixel_centers, input_width, &input_x, &x0,
+                                   &x1);
+
+        int32_t input_offset[4] = {
+            Offset(input_shape, b, y0, x0, 0), Offset(input_shape, b, y0, x1, 0),
+            Offset(input_shape, b, y1, x0, 0), Offset(input_shape, b, y1, x1, 0)};
+        float scale[4] = {(1 - (input_y - y0)) * (1 - (input_x - x0)),
+                          (1 - (input_y - y0)) * (input_x - x0),
+                          (input_y - y0) * (1 - (input_x - x0)), (input_y - y0) * (input_x - x0)};
+
+        for (int d = 0; d < depth; d++)
+        {
+          const T *input_ptr = &input_data[d];
+          *output_ptr++ = static_cast<T>(
+              input_ptr[input_offset[0]] * scale[0] + input_ptr[input_offset[1]] * scale[1] +
+              input_ptr[input_offset[2]] * scale[2] + input_ptr[input_offset[3]] * scale[3]);
+        }
+      }
+    }
+  }
+}
+
+void ResizeBilinear(ResizeBilinearParams &params, const Shape &input_shape, const float *input_data,
+                    const Shape &output_shape, float *output_data)
+{
+  int32_t batches = static_cast<int32_t>(MatchingDim(input_shape, 0, output_shape, 0));
+  int32_t input_height = input_shape.Dims(1);
+  int32_t input_width = input_shape.Dims(2);
+  int32_t depth = static_cast<int32_t>(MatchingDim(input_shape, 3, output_shape, 3));
+
+  // Specialize for 2x2 upsample.
+  if (!params.align_corners && !params.half_pixel_centers &&
+      params.output_height == 2 * input_height && params.output_width == 2 * input_width)
+  {
+    ResizeBilinear2x2(batches, input_height, input_width, depth, params.output_height,
+                      params.output_width, input_shape, input_data, output_shape, output_data);
+  }
+  else
+  {
+    float height_scale = static_cast<float>(input_height) / params.output_height;
+    float width_scale = static_cast<float>(input_width) / params.output_width;
+    if (params.align_corners && params.output_height > 1)
+    {
+      height_scale = static_cast<float>(input_height - 1) / (params.output_height - 1);
+    }
+    if (params.align_corners && params.output_width > 1)
+    {
+      width_scale = static_cast<float>(input_width - 1) / (params.output_width - 1);
+    }
+
+    ResizeBilinearGeneric(batches, input_height, input_width, depth, params.output_height,
+                          params.output_width, height_scale, width_scale, input_shape, input_data,
+                          output_data, params.half_pixel_centers);
+  }
+}
+
+void ResizeBilinear(ResizeBilinearParams &params, const Shape &input_shape,
+                    const uint8_t *input_data, const Shape &output_shape, uint8_t *output_data)
+{
+  int32_t batches = MatchingDim(input_shape, 0, output_shape, 0);
+  int32_t input_height = input_shape.Dims(1);
+  int32_t input_width = input_shape.Dims(2);
+  int32_t depth = MatchingDim(input_shape, 3, output_shape, 3);
+
+  float height_scale = (params.align_corners && params.output_height > 1)
+                           ? (static_cast<float>(input_height - 1) / (params.output_height - 1))
+                           : (static_cast<float>(input_height) / params.output_height);
+
+  float width_scale = (params.align_corners && params.output_width > 1)
+                          ? (static_cast<float>(input_width - 1) / (params.output_width - 1))
+                          : (static_cast<float>(input_width) / params.output_width);
+
+  ResizeBilinearGenericSmallChannel<uint8_t>(
+      batches, input_height, input_width, depth, params.output_height, params.output_width,
+      height_scale, width_scale, input_shape, input_data, output_data, params.half_pixel_centers);
+}
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_RESIZEBILINEAR_H__
diff --git a/compute/cker/include/cker/operation/SpaceToDepth.h b/compute/cker/include/cker/operation/SpaceToDepth.h

new file mode 100644 (file)

index 0000000..ef67931
--- /dev/null
+++ b/compute/cker/include/cker/operation/SpaceToDepth.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_SPACE_TO_DEPTH_H__
+#define __NNFW_CKER_SPACE_TO_DEPTH_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+template <typename T>
+inline void SpaceToDepth(const SpaceToDepthParams &params, const Shape &unextended_input_shape,
+                         const T *input_data, const Shape &unextended_output_shape, T *output_data)
+{
+  assert(unextended_input_shape.DimensionsCount() <= 4);
+  assert(unextended_output_shape.DimensionsCount() <= 4);
+  const Shape input_shape = Shape::ExtendedShape(4, unextended_input_shape);
+  const Shape output_shape = Shape::ExtendedShape(4, unextended_output_shape);
+
+  const int output_depth = output_shape.Dims(3);
+  const int output_width = output_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+
+  const int input_depth = input_shape.Dims(3);
+  const int batch_size = input_shape.Dims(0);
+
+  // Number of continuous values that we can copy in one interation.
+  const int stride = params.block_size * input_depth;
+
+  for (int batch = 0; batch < batch_size; ++batch)
+  {
+    for (int out_h = 0; out_h < output_height; ++out_h)
+    {
+      T *output_ptr = output_data + Offset(output_shape, batch, out_h, 0, 0);
+      for (int offset_h = 0; offset_h < params.block_size; ++offset_h)
+      {
+        T *dst = output_ptr;
+        for (int out_w = 0; out_w < output_width; ++out_w)
+        {
+          memcpy(dst, input_data, stride * sizeof(T));
+          input_data += stride;
+          dst += output_depth;
+        }
+        output_ptr += stride;
+      }
+    }
+  }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_SPACE_TO_DEPTH_H__
diff --git a/compute/cker/include/cker/operation/SplitV.h b/compute/cker/include/cker/operation/SplitV.h

new file mode 100644 (file)

index 0000000..9e46f4b
--- /dev/null
+++ b/compute/cker/include/cker/operation/SplitV.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_SPLIT_V_H__
+#define __NNFW_CKER_SPLIT_V_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+template <typename Scalar>
+void SplitV(const SplitVParams &params, const Shape &input_shape, const Scalar *input_data,
+            std::vector<nnfw::cker::Shape> &output_shapes, Scalar *const *output_data)
+{
+  const int split_dimensions = input_shape.DimensionsCount();
+  int axis = params.axis < 0 ? params.axis + split_dimensions : params.axis;
+  int outputs_count = params.num_split;
+
+  int64_t split_size = 0;
+
+  for (int i = 0; i < outputs_count; i++)
+  {
+    // TFLITE_DCHECK_EQ(output_shapes[i]->DimensionsCount(), split_dimensions);
+    for (int j = 0; j < split_dimensions; j++)
+    {
+      if (j != axis)
+      {
+        MatchingDim(output_shapes[i], j, input_shape, j);
+      }
+    }
+    split_size += output_shapes[i].Dims(axis);
+  }
+
+  int64_t outer_size = 1;
+  for (int i = 0; i < axis; ++i)
+  {
+    outer_size *= input_shape.Dims(i);
+  }
+  // For all output arrays,
+  // FlatSize() = outer_size * Dims(axis) * base_inner_size;
+  int64_t base_inner_size = 1;
+  for (int i = axis + 1; i < split_dimensions; ++i)
+  {
+    base_inner_size *= input_shape.Dims(i);
+  }
+
+  const Scalar *input_ptr = input_data;
+  int copy_size = 0;
+  for (int k = 0; k < outer_size; k++)
+  {
+    for (int i = 0; i < outputs_count; ++i)
+    {
+      copy_size = output_shapes[i].Dims(axis) * base_inner_size;
+      memcpy(output_data[i] + k * copy_size, input_ptr, copy_size * sizeof(Scalar));
+      input_ptr += copy_size;
+    }
+  }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_SPLIT_V_H__
diff --git a/compute/cker/include/cker/operation/StatelessRandomUniform.h b/compute/cker/include/cker/operation/StatelessRandomUniform.h

new file mode 100644 (file)

index 0000000..d5952ae
--- /dev/null
+++ b/compute/cker/include/cker/operation/StatelessRandomUniform.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_STATELESS_RANDOM_UNIFORM_H__
+#define __NNFW_CKER_STATELESS_RANDOM_UNIFORM_H__
+
+#include "cker/Types.h"
+#include "cker/Shape.h"
+#include "cker/Utils.h"
+
+#include "cker/eigen/EigenSupport.h"
+
+#include "cker/operation/Helper/Tensor.h"
+#include "cker/operation/Helper/PhiloxRandom.h"
+#include "cker/operation/Helper/RandomOpCpu.h"
+#include "cker/operation/Helper/RandomDistributions.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+void GenerateKey(Tensor seed, random::PhiloxRandom::Key *out_key,
+                 random::PhiloxRandom::ResultType *out_counter)
+{
+  // Grab the two seeds
+  uint32_t seed0;
+  uint32_t seed1;
+
+  const auto seed_vals = seed.flat<int32_t>();
+
+  seed0 = seed_vals(0);
+  seed1 = seed_vals(1);
+  // Scramble the seeds so that the user doesn't need to worry about which
+  // part of the seed needs to be strong.
+  (*out_key)[0] = 0x3ec8f720;
+  (*out_key)[1] = 0x02461e29;
+  (*out_counter)[0] = static_cast<uint32_t>(seed0);
+  (*out_counter)[1] = (*out_counter)[3] = 0;
+  (*out_counter)[2] = static_cast<uint32_t>(seed1);
+  const auto mix = random::PhiloxRandom(*out_counter, *out_key)();
+  (*out_key)[0] = mix[0];
+  (*out_key)[1] = mix[1];
+  (*out_counter)[0] = (*out_counter)[1] = 0;
+  (*out_counter)[2] = mix[2];
+  (*out_counter)[3] = mix[3];
+}
+
+template <typename Device, class Distribution>
+void Fill(random::PhiloxRandom random, Tensor *output)
+{
+  // Build distribution
+  typedef typename Distribution::ResultElementType T;
+
+  auto flat = output->flat<T>();
+  // Reuse the compute kernels from the stateful random ops
+  functor::FillPhiloxRandom<Device, Distribution>()(random, flat.data(), flat.size(),
+                                                    Distribution());
+}
+
+inline void StatelessRandomUniform(const Shape &shape_shape, const int *shape_data,
+                                   const Shape &seed_shape, const int *seed_data,
+                                   const Shape &output_shape, float *output_data)
+{
+  Tensor shape_t;
+  Tensor seed_t;
+
+  shape_t.shape.ReplaceWith(shape_shape.DimensionsCount(), shape_shape.DimsData());
+  shape_t.buffer = (void *)shape_data;
+
+  seed_t.shape.ReplaceWith(seed_shape.DimensionsCount(), seed_shape.DimsData());
+  seed_t.buffer = (void *)seed_data;
+
+  Tensor output_t;
+  output_t.shape.ReplaceWith(output_shape.DimensionsCount(), output_shape.DimsData());
+  output_t.buffer = output_data;
+
+  random::PhiloxRandom::Key key;
+  random::PhiloxRandom::ResultType counter;
+
+  GenerateKey(seed_t, &key, &counter);
+
+  Fill<Eigen::ThreadPoolDevice, random::UniformDistribution<random::PhiloxRandom, float>>(
+      random::PhiloxRandom(counter, key), &output_t);
+}
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_STATELESS_RANDOM_UNIFORM_H__
diff --git a/compute/cker/include/cker/ruy/RuySupport.h b/compute/cker/include/cker/ruy/RuySupport.h

index 432b181..9612dd5 100644 (file)
--- a/compute/cker/include/cker/ruy/RuySupport.h
+++ b/compute/cker/include/cker/ruy/RuySupport.h
@@ -22,11 +22,6 @@
  #include <ruy/context.h>
  #include "cker/Types.h"
  
-namespace
-{
-const int kDefaultNumThreadpoolThreads = 4;
-}
-
  namespace nnfw
  {
  namespace cker
@@ -34,42 +29,6 @@ namespace cker
  namespace ruy_support
  {
  
-struct RuyContext
-{
-public:
-  RuyContext() : ruy_context_(new ruy::Context)
-  {
-    SetMaxNumThreads(onert::util::getConfigInt(onert::util::config::RUY_THREADS));
-#ifdef USE_RUY_GEMV
-    ruy_context_->cache_policy = ruy::kCacheLHSOnNarrowMul;
-#endif
-  };
-
-  ruy::Context *ruy_context() const { return ruy_context_.get(); }
-
-  static inline RuyContext &GetRuyContext()
-  {
-    static thread_local RuyContext instance;
-    return instance;
-  }
-
-  void SetMaxNumThreads(int max_num_threads)
-  {
-    const int target_num_threads =
-        max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads;
-    ruy_context_->max_num_threads = target_num_threads;
-  }
-
-private:
-  const std::unique_ptr<ruy::Context> ruy_context_;
-};
-
-inline ruy::Context *GetRuyContext()
-{
-  auto &ctx = RuyContext::GetRuyContext();
-  return ctx.ruy_context();
-}
-
  template <typename Scalar, typename DataPointer>
  void MakeRuyMatrix(const MatrixParams<Scalar> &params, DataPointer data_ptr,
                     ruy::Matrix<Scalar> *dst)
diff --git a/docs/conf.py b/docs/conf.py

index 3abe4f4..649b677 100644 (file)
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -21,7 +21,7 @@ copyright = '2020, Samsung Research & contributors'
  author = 'Samsung Research & contributors'
  
  # The full version, including alpha/beta/rc tags
-release = '1.7.0'
+release = '1.8.0'
  
  # -- General configuration ---------------------------------------------------
  
diff --git a/docs/howto/how-to-build-runtime.md b/docs/howto/how-to-build-runtime.md

index 2bfd14c..f475119 100644 (file)
--- a/docs/howto/how-to-build-runtime.md
+++ b/docs/howto/how-to-build-runtime.md
@@ -13,7 +13,7 @@ In the Ubuntu, you can easily install it with the following command.
  
  ```
  $ sudo apt-get install cmake libboost-all-dev
-``` 
+```
  
  If your linux system does not have the basic development configuration, you will need to install more packages. A list of all packages needed to configure the development environment can be found in the https://github.com/Samsung/ONE/blob/master/infra/docker/Dockerfile.1804 file.
  
@@ -44,7 +44,7 @@ python3-venv \
  scons \
  software-properties-common \
  unzip \
-wget 
+wget
  
  $ mkdir /tmp/gtest
  $ cd /tmp/gtest
@@ -63,7 +63,7 @@ In a typical linux development environment, including Ubuntu, you can build the
  ```
  $ git clone https://github.com/Samsung/ONE.git one
  $ cd one
-$ cp -n Makefile.template Makefile; make install
+$ make -f Makefile.template install
  ```
  
  Unfortunately, the debug build on the x86_64 architecture currently has an error. To solve the problem, you must use gcc version 9 or higher. Another workaround is to do a release build rather than a debug build. This is not a suitable method for debugging during development, but it is enough to check the function of the runtime. To release build the runtime, add the environment variable `BUILD_TYPE=release` to the build command as follows.
@@ -107,7 +107,7 @@ $ tree -L 3 ./Product/out
  │   │   ├── NeuralNetworksEx.h
  │   │   ├── NeuralNetworksExtensions.h
  │   │   ├── NeuralNetworks.h
-│   │   ├── nnfw_dev.h
+│   │   ├── nnfw_experimental.h
  │   │   └── nnfw.h
  │   └── onert
  │       ├── backend
diff --git a/docs/howto/how-to-use-nnfw-api.md b/docs/howto/how-to-use-nnfw-api.md

index 6c0fb49..1198a31 100644 (file)
--- a/docs/howto/how-to-use-nnfw-api.md
+++ b/docs/howto/how-to-use-nnfw-api.md
@@ -23,8 +23,8 @@ nnfw_load_model_from_file(session, nnpackage_path);
  ```
  3) (Optional) Assign a specific backend to operations
  ``` c
-  // Use acl_neon backend for CONV_2D and acl_cl for otherwise.
-  // Note that defalut backend is acl_cl
+  // Use 'acl_neon' backend for CONV_2D and 'cpu' for otherwise.
+  // Note that defalut backend is 'cpu'.
    nnfw_set_op_backend(session, "CONV_2D", "acl_neon");
  ```
  
diff --git a/docs/nnfw/howto/CrossBuildForAndroid.md b/docs/nnfw/howto/CrossBuildForAndroid.md

index d7e48c8..08d5fd6 100644 (file)
--- a/docs/nnfw/howto/CrossBuildForAndroid.md
+++ b/docs/nnfw/howto/CrossBuildForAndroid.md
@@ -44,11 +44,9 @@ Different from cross build for linux,
  Here is an example of using Makefile.
  
  ```bash
-cp -n Makefile.template Makefile
-
  TARGET_OS=android \
  CROSS_BUILD=1 \
  NDK_DIR=/path/android-tools/r20/ndk \
  EXT_ACL_FOLDER=/path/arm_compute-v19.11.1-bin-android/lib/android-arm64-v8a-neon-cl \
-make install
+make -f Makefile.template install
  ```
diff --git a/docs/overview/supported-operations.md b/docs/overview/supported-operations.md

index 6120e24..1d9050a 100644 (file)
--- a/docs/overview/supported-operations.md
+++ b/docs/overview/supported-operations.md
@@ -106,7 +106,7 @@ SELECT_V2 | O |   |  
  SHAPE | O | O | O
  SIN | O | O | O
  SKIP_GRAM | O |   |  
-SLICE | O | O |  
+SLICE | O | O | O
  SOFTMAX | O | O | O
  SPACE_TO_BATCH_ND | O | O | O
  SPACE_TO_DEPTH | O | O | O
diff --git a/docs/release/1.7/release-note-1.7.0.md b/docs/release/1.7/release-note-1.7.0.md

deleted file mode 100644 (file)

index c1a4f50..0000000
--- a/docs/release/1.7/release-note-1.7.0.md
+++ /dev/null
@@ -1,46 +0,0 @@
-## Feature Highlights
-
-- **ONE** Compiler
-  - Compiler supports more operations
-  - New command line interface for user interface consistancy
-- **ONE** Runtime
-  - Runtime CPU backend supports more operations
-  - Runtime CPU backend supports more quant8 operations
-  - API changes
-  - New optimization
-  
-## ONE Compiler
-
-### Compiler supports more operations
-
-- MatrixDiag, MatrixSetDiag, ReverseSequence, ReverseV2, SegmentSum, SelectV2, SparseToDense, Where
-
-### New command line interface for user interface consistancy
-
-- one-import: imports conventional model files to circle
-   - one-import-tf: imports TensorFlow model to circle
-   - one-import-tflite: imports TensorFlow lite model to circle
-- one-optimize: circle optimize command
-- one-quantize: circle quantize command
-   - supports float32 to uint8, layer wise (for Conv series)
-- one-pack: package command
-- one-prepare-venv: prepares python virtual environment for importing TensorFlow model
-- one-codegen: backend(if available) code generator
-
-## ONE Runtime
-
-### Runtime CPU backend supports more operations
-
-- LogSoftmax, SpaceToBatchND
-
-### Runtime CPU backend supports more quant8 operations
-
-- Logistic, Mul, Tanh, SpaceToBatchND, Transpose, Sub, Max, Min, Less, Greater, GreaterEqual, LessEqual, Equal, NotEqual
-
-### API changes
-
-- Introduce basic asynchronous execution API
-
-### New optimization
-    
-- Remove dynamic tensor overhead from static models
diff --git a/docs/release/1.8/release-note-1.8.0.md b/docs/release/1.8/release-note-1.8.0.md

new file mode 100644 (file)

index 0000000..1cbbd0b
--- /dev/null
+++ b/docs/release/1.8/release-note-1.8.0.md
@@ -0,0 +1,42 @@
+# Release Note 1.8.0
+
+## Feature Highlights
+
+- **ONE** Compiler
+    - Support new command line interface
+
+- **ONE** Runtime
+    - CPU backend supports 7 more operations
+    - CPU backend supports 9 more quant8 operations
+
+## ONE Compiler
+
+### New command line interface for user interface consistancy
+
+- `one-import-bcq` : import BCQ(Binary coding quantized) TensorFlow model
+- Commands now support `--version` option to show version number
+
+### Changes
+
+- Experimental support for TensorFlow 2.x has updated to 2.3.0 (TensorFlow 1.3.2 is our official support version)
+- Support more operators in luci-interpreter
+- Enhancing one-quantizer
+
+## ONE Runtime
+
+### Rename headers
+
+- Rename `nnfw_dev.h` to `nnfw_experimental.h`
+
+### Optimization
+
+- Remove copies for model input/outputs whenever possible
+
+### Support CPU backend operation
+
+- BatchToSpaceND, L2Normalization, ReLU6, ResizeBilinear, SpaceToDepth, SplitV, StatelessRandomUniform
+
+### Support CPU backend quant8 operation
+
+- BatchToSpaceND, L2Normalization, Pad, PadV2, ResizeBilinear, Slice, Quantize, SpaceToDepth, Sum
+
diff --git a/docs/runtime/api-layered-arch.png b/docs/runtime/api-layered-arch.png

new file mode 100644 (file)

index 0000000..86eda75

Binary files /dev/null and b/docs/runtime/api-layered-arch.png differ
diff --git a/docs/runtime/api.md b/docs/runtime/api.md

index 5932792..3ff9ff0 100644 (file)
--- a/docs/runtime/api.md
+++ b/docs/runtime/api.md
@@ -1 +1,35 @@
  # API
+
+## Runtime Layered Architecture
+
+Here is a figure of runtime layered architecture.
+
+![Layered Architecture](api-layered-arch.png)
+
+There are three parts - Frontend, Core and Backend. Core works with Frontend and Backend API. Frontend gets user inputs(neural networks models) and Backend does the actual computation.
+
+## Frontend API
+
+Frontend API is about from creation/loading the model and
+
+Runtime supports two (frontend) APIs - NN API and NNFW API.
+
+### NN API
+
+NN API stands for Android Neural Networks API. It is part of Android Open Source Project and we provide a binding between NN API and One Runtime.
+
+For usage, refer to [Howto : NN API](../howto/how-to-use-nnapi-binding.md).
+
+### NNFW API
+
+NNFW API is ONE's own API. It supports loading models from NN Packages. As it is our own API, It can do most of functionalities that One Runtime offers. Representatively, it provides functions for execution with multiple backends.
+
+For usage, refer to [Howto : NNFW API](../howto/how-to-use-nnfw-api.md).
+
+## Backend API
+
+Backend API is defined by One Runtime.
+
+Backend API is about actual computation of operations and memory management for operands. In order to allow different kinds of computation units or computation libraries, One Runtime defines Backend API to support user defined operation kernels and memory manager. It contains a lot of C++ headers which are subject to change.
+
+For detailed descriptions, refer to [Backend API](../runtime/backend-api.md).
diff --git a/docs/runtime/core.md b/docs/runtime/core.md

index 42ba75f..64a6c62 100644 (file)
--- a/docs/runtime/core.md
+++ b/docs/runtime/core.md
@@ -68,7 +68,7 @@ Let's say we have some functions written in a certain programming language. Then
  
  With generated tensors and kernels, the compiler creates executor objects. There are 3 types of executors are supported - Linear, Dataflow, and Parallel. Linear executor is the default executor and Dataflow Executor and Parallel Executor are experimental.
  
-For more about executors, please refer to [Executors](./executors.md) document.
+For more about executors, please refer to [Executors](executors.md) document.
  
  ### Module `exec`
  
@@ -83,4 +83,4 @@ For more about executors, please refer to [Executors](./executors.md) document.
  
  Backends are plugins and they are loaded dynamically(via `dlopen`). So this module is a set of interface classes for backend implementation. `compiler` can compile with a variety of backends without knowing specific backend implementation.
  
-Backend interface classes are mostly about memory management and kernel generation. For more, please refer to [Backend API](./backend-api.md) document.
+Backend interface classes are mostly about memory management and kernel generation. For more, please refer to [Backend API](backend-api.md) document.
diff --git a/docs/runtime/heterogeneous-execution.md b/docs/runtime/heterogeneous-execution.md

index dc39dae..e7a5e27 100644 (file)
--- a/docs/runtime/heterogeneous-execution.md
+++ b/docs/runtime/heterogeneous-execution.md
@@ -12,11 +12,11 @@ Here is another case. Let's say we have a model that is not sequential so there
  
  ![Add-3Conv model](heterogeneous-execution-add-3-conv-model.png)
  
-Say we have 3 backends that are based on CPU, GPU and NPU(Neural Processing Unit) respectively. After executing Add, 3 Conv2D operations are ready to run. We may utilize those backends with [Parallel Executor (experimental)](./executors.md#parallel-executor-experimental). For this case we may get performance gain regardless of kernels' speed as those are run in parallel independently.
+Say we have 3 backends that are based on CPU, GPU and NPU(Neural Processing Unit) respectively. After executing Add, 3 Conv2D operations are ready to run. We may utilize those backends with [Parallel Executor (experimental)](executors.md#parallel-executor-experimental). For this case we may get performance gain regardless of kernels' speed as those are run in parallel independently.
  
  ## Graph Transformation
  
-Unfortunately it is not that simple to get performance gain. As each backend has its own memory management module, a copy must be done between backend boundaries. Plus, it may require layout changes so "Permute" operations are added from  `PermutationInsertionPass`. This process is done from [Lowering](./core.md#1-lowering) phase of compilation.
+Unfortunately it is not that simple to get performance gain. As each backend has its own memory management module, a copy must be done between backend boundaries. Plus, it may require layout changes so "Permute" operations are added from  `PermutationInsertionPass`. This process is done from [Lowering](core.md#1-lowering) phase of compilation.
  
  Here is an example of that. Let's say we have assigned different backends for Add and Conv2D. So a Permute operation is inserted between them.
  
diff --git a/infra/cmake/packages/ARMComputeSourceConfig.cmake b/infra/cmake/packages/ARMComputeSourceConfig.cmake

index 51a235a..adec1f9 100644 (file)
--- a/infra/cmake/packages/ARMComputeSourceConfig.cmake
+++ b/infra/cmake/packages/ARMComputeSourceConfig.cmake
@@ -8,7 +8,7 @@ function(_ARMComputeSource_import)
    nnas_include(OptionTools)
  
    envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
-  set(ARMCOMPUTE_URL ${EXTERNAL_DOWNLOAD_SERVER}/ARM-software/ComputeLibrary/archive/v19.11.1.tar.gz)
+  set(ARMCOMPUTE_URL ${EXTERNAL_DOWNLOAD_SERVER}/ARM-software/ComputeLibrary/archive/v20.05.tar.gz)
    ExternalSource_Download(ARMCOMPUTE ${ARMCOMPUTE_URL})
  
    set(ARMComputeSource_DIR ${ARMCOMPUTE_SOURCE_DIR} PARENT_SCOPE)
diff --git a/infra/cmake/packages/FarmhashSourceConfig.cmake b/infra/cmake/packages/FarmhashSourceConfig.cmake

index 8a9a384..a19c8b9 100644 (file)
--- a/infra/cmake/packages/FarmhashSourceConfig.cmake
+++ b/infra/cmake/packages/FarmhashSourceConfig.cmake
@@ -9,7 +9,7 @@ function(_FarmhashSource_import)
  
    # NOTE TensorFlow 1.12 downloads farmhash from the following URL
    #      TensorFlow 1.13.1 downloads farmhash from the following URL
-  #      TensorFlow 2.3-rc0 downloads farmhash from the following URL
+  #      TensorFlow 2.3.0 downloads farmhash from the following URL
    envoption(FARMHASH_1_12_URL https://github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz)
  
    ExternalSource_Download(FARMHASH ${FARMHASH_1_12_URL})
diff --git a/infra/cmake/packages/FlatBuffersConfig.cmake b/infra/cmake/packages/FlatBuffersConfig.cmake

index ab0b770..da084e7 100644 (file)
--- a/infra/cmake/packages/FlatBuffersConfig.cmake
+++ b/infra/cmake/packages/FlatBuffersConfig.cmake
@@ -25,7 +25,8 @@ function(_FlatBuffers_build)
                        BUILD_DIR   ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS/build
                        INSTALL_DIR ${EXT_OVERLAY_DIR}
                        BUILD_FLAGS ${ADDITIONAL_CXX_FLAGS}
-                      IDENTIFIER  "1.10-fix1"
+                      IDENTIFIER  "1.10-fix2"
+                      EXTRA_OPTS "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF"
                        PKG_NAME    "FLATBUFFERS")
  
  endfunction(_FlatBuffers_build)
diff --git a/infra/cmake/packages/HDF5Config.cmake b/infra/cmake/packages/HDF5Config.cmake

index e282e0b..19803f1 100644 (file)
--- a/infra/cmake/packages/HDF5Config.cmake
+++ b/infra/cmake/packages/HDF5Config.cmake
@@ -27,6 +27,7 @@ _HDF5_build()
  find_path(HDF5_CONFIG_DIR "hdf5-config.cmake"
            PATHS ${EXT_OVERLAY_DIR}
            PATH_SUFFIXES
+            cmake
              share/cmake
              share/cmake/hdf5
              cmake/hdf5
diff --git a/infra/cmake/packages/Pybind11Config.cmake b/infra/cmake/packages/Pybind11Config.cmake

new file mode 100644 (file)

index 0000000..b6d5004
--- /dev/null
+++ b/infra/cmake/packages/Pybind11Config.cmake
@@ -0,0 +1,22 @@
+function(_Pybind11_import)
+  nnas_find_package(Pybind11Source QUIET)
+
+  if(NOT Pybind11Source_FOUND)
+    set(Pybind11_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT Pybind11Source_FOUND)
+
+  nnas_include(ExternalBuildTools)
+  ExternalBuild_CMake(CMAKE_DIR   ${Pybind11Source_DIR}
+                      BUILD_DIR   ${CMAKE_BINARY_DIR}/externals/PYBIND11/build
+                      INSTALL_DIR ${EXT_OVERLAY_DIR}
+                      IDENTIFIER  "2.5.0"
+                      PKG_NAME    "PYBIND11"
+                      EXTRA_OPTS "-DPYBIND11_TEST:BOOL=OFF")
+
+  find_path(Pybind11_INCLUDE_DIRS NAMES pybind11.h PATHS ${EXT_OVERLAY_DIR} PATH_SUFFIXES include/pybind11)
+
+  set(Pybind11_FOUND TRUE PARENT_SCOPE)
+endfunction(_Pybind11_import)
+
+_Pybind11_import()
diff --git a/infra/cmake/packages/Pybind11SourceConfig.cmake b/infra/cmake/packages/Pybind11SourceConfig.cmake

new file mode 100644 (file)

index 0000000..76f51e4
--- /dev/null
+++ b/infra/cmake/packages/Pybind11SourceConfig.cmake
@@ -0,0 +1,18 @@
+function(_Pybind11Source_import)
+  if(NOT DOWNLOAD_PYBIND11)
+    set(Pybind11Source_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_PYBIND11)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  envoption(PYBIND11_URL https://github.com/pybind/pybind11/archive/v2.5.0.tar.gz)
+
+  ExternalSource_Download(PYBIND11 ${PYBIND11_URL})
+
+  set(Pybind11Source_DIR ${PYBIND11_SOURCE_DIR} PARENT_SCOPE)
+  set(Pybind11Source_FOUND TRUE PARENT_SCOPE)
+endfunction(_Pybind11Source_import)
+
+_Pybind11Source_import()
diff --git a/infra/cmake/packages/TensorFlowEigenSource-2.3.0/TensorFlowEigenSourceConfig.cmake b/infra/cmake/packages/TensorFlowEigenSource-2.3.0/TensorFlowEigenSourceConfig.cmake

new file mode 100644 (file)

index 0000000..d50d045
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowEigenSource-2.3.0/TensorFlowEigenSourceConfig.cmake
@@ -0,0 +1,21 @@
+function(_TensorFlowEigenSource_import)
+  if(NOT DOWNLOAD_EIGEN)
+    set(TensorFlowEigenSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_EIGEN)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  # Exact version used by TensorFlow v2.3.0.
+  # See tensorflow/tensorflow/workspace.bzl.
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://gitlab.com")
+  envoption(TENSORFLOW_2_3_0_EIGEN_URL ${EXTERNAL_DOWNLOAD_SERVER}/libeigen/eigen/-/archive/386d809bde475c65b7940f290efe80e6a05878c4/eigen-386d809bde475c65b7940f290efe80e6a05878c4.tar.gz)
+
+  ExternalSource_Download(EIGEN DIRNAME TENSORFLOW-2.3.0-EIGEN ${TENSORFLOW_2_3_0_EIGEN_URL})
+
+  set(TensorFlowEigenSource_DIR ${EIGEN_SOURCE_DIR} PARENT_SCOPE)
+  set(TensorFlowEigenSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowEigenSource_import)
+
+_TensorFlowEigenSource_import()
diff --git a/infra/cmake/packages/TensorFlowEigenSource-2.3.0/TensorFlowEigenSourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowEigenSource-2.3.0/TensorFlowEigenSourceConfigVersion.cmake

new file mode 100644 (file)

index 0000000..04df5eb
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowEigenSource-2.3.0/TensorFlowEigenSourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.3.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/TensorFlowSource-2.3.0/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-2.3.0/TensorFlowSourceConfig.cmake

new file mode 100644 (file)

index 0000000..5c3a0f8
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowSource-2.3.0/TensorFlowSourceConfig.cmake
@@ -0,0 +1,18 @@
+function(_TensorFlowSource_import)
+  if(NOT DOWNLOAD_TENSORFLOW)
+    set(TensorFlowSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_TENSORFLOW)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  envoption(TENSORFLOW_2_3_0_URL https://github.com/tensorflow/tensorflow/archive/v2.3.0.tar.gz)
+
+  ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.3.0 ${TENSORFLOW_2_3_0_URL})
+
+  set(TensorFlowSource_DIR ${TENSORFLOW_SOURCE_DIR} PARENT_SCOPE)
+  set(TensorFlowSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowSource_import)
+
+_TensorFlowSource_import()
diff --git a/infra/cmake/packages/TensorFlowSource-2.3.0/TensorFlowSourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowSource-2.3.0/TensorFlowSourceConfigVersion.cmake

new file mode 100644 (file)

index 0000000..04df5eb
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowSource-2.3.0/TensorFlowSourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.3.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/docker/Dockerfile b/infra/docker/Dockerfile

index e675b53..052cc4f 100644 (file)
--- a/infra/docker/Dockerfile
+++ b/infra/docker/Dockerfile
@@ -1,8 +1,6 @@
  FROM ubuntu:16.04
  
  ARG UBUNTU_MIRROR
-ENV http_proxy $http_proxy
-ENV https_proxy $https_proxy
  
  RUN if [ -n "$http_proxy" ] ; then echo "Acquire::http::proxy \"${http_proxy}\";" >> /etc/apt/apt.conf ; fi
  RUN if [ -n "$https_proxy" ] ; then echo "Acquire::https::proxy \"${https_proxy}\";" >> /etc/apt/apt.conf ; fi
@@ -22,6 +20,7 @@ RUN apt-get update && apt-get -qqy install libprotobuf-dev protobuf-compiler
  
  # Additonal tools
  RUN apt-get update && apt-get -qqy install doxygen graphviz wget unzip clang-format-3.9 python3 python3-pip python3-venv hdf5-tools pylint
+RUN pip3 install --upgrade pip
  RUN pip3 install yapf==0.22.0 numpy
  
  # Install google test (source)
diff --git a/infra/docker/Dockerfile.1804 b/infra/docker/Dockerfile.1804

index fc6fc9a..cc31bba 100644 (file)
--- a/infra/docker/Dockerfile.1804
+++ b/infra/docker/Dockerfile.1804
@@ -1,12 +1,6 @@
  FROM ubuntu:18.04
  
  ARG UBUNTU_MIRROR
-ENV http_proxy $http_proxy
-ENV https_proxy $https_proxy
-
-RUN if [ -n "$http_proxy" ] ; then echo "Acquire::http::proxy \"${http_proxy}\";" >> /etc/apt/apt.conf ; fi
-RUN if [ -n "$https_proxy" ] ; then echo "Acquire::https::proxy \"${https_proxy}\";" >> /etc/apt/apt.conf ; fi
-RUN if [ -n "$UBUNTU_MIRROR" ] ; then sed "s/archive.ubuntu.com/${UBUNTU_MIRROR}/g" -i /etc/apt/sources.list ; fi
  
  # Install 'add-apt-repository'
  RUN apt-get update && apt-get -qqy install software-properties-common
@@ -22,6 +16,7 @@ RUN apt-get update && apt-get -qqy install libprotobuf-dev protobuf-compiler
  
  # Additonal tools
  RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -qqy install doxygen graphviz wget unzip clang-format-3.9 python3 python3-pip python3-venv hdf5-tools pylint
+RUN pip3 install --upgrade pip
  RUN pip3 install yapf==0.22.0 numpy
  
  # Install google test (source)
diff --git a/infra/nncc/CMakeLists.txt b/infra/nncc/CMakeLists.txt

index 3ac6680..0be6885 100644 (file)
--- a/infra/nncc/CMakeLists.txt
+++ b/infra/nncc/CMakeLists.txt
@@ -98,6 +98,7 @@ option(DOWNLOAD_CAFFE "Download Caffe source" ON)
  option(DOWNLOAD_PYTORCH "Download Pytorch source" ON)
  option(DOWNLOAD_ONNX "Download ONNX source" ON)
  option(DOWNLOAD_ABSEIL "Download Abseil-cpp source" ON)
+option(DOWNLOAD_PYBIND11 "Download Pybind11 source" ON)
  
  option(DOWNLOAD_GTEST "Download Google Test source" ON)
  option(BUILD_GTEST "Build Google Test from the downloaded source" ON)
diff --git a/infra/nncc/command/utcount b/infra/nncc/command/utcount

index d4610e3..d06c5c9 100644 (file)
--- a/infra/nncc/command/utcount
+++ b/infra/nncc/command/utcount
@@ -13,7 +13,7 @@ BUILD_ITEMS="angkor cwrap pepper-str pepper-strcast pp stdex \
  oops pepper-assert \
  hermes hermes-std \
  loco locop locomotiv logo-core logo \
-foder souschef arser \
+foder souschef arser vconone \
  safemain mio-circle mio-tflite \
  tflite2circle \
  luci \
diff --git a/infra/nnfw/cmake/CfgOptionFlags.cmake b/infra/nnfw/cmake/CfgOptionFlags.cmake

index d1395f8..3c6b7d9 100644 (file)
--- a/infra/nnfw/cmake/CfgOptionFlags.cmake
+++ b/infra/nnfw/cmake/CfgOptionFlags.cmake
@@ -23,7 +23,7 @@ CMAKE_DEPENDENT_OPTION(BUILD_RUNTIME_NNAPI_TEST "Build Runtime NN API Generated
                         OFF)
  option(BUILD_RUNTIME_NNFW_API_TEST "Build Runtime NNFW API Tests" ON)
  option(BUILD_TFLITE_RUN "Build tflite-run" ON)
-option(BUILD_TFLITE_RUN_2_2_0 "Build tflite-run 2.2.0" OFF)
+option(BUILD_TFLITE_VANILLA_RUN "Build tflite-vanilla-run" OFF)
  option(BUILD_TFLITE_BENCHMARK_MODEL "Build tflite benchmark model" OFF)
  option(BUILD_NNAPI_TEST "Build nnapi_test" ON)
  option(BUILD_NNPACKAGE_RUN "Build nnpackge_run" ON)
@@ -70,7 +70,7 @@ option(DOWNLOAD_BOOST "Download boost source" OFF)
  option(DOWNLOAD_RUY "Download ruy source" ON)
  option(BUILD_BOOST "Build boost source" OFF)
  option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" ON)
-option(BUILD_TENSORFLOW_LITE_2_2_0 "Build TensorFlow Lite from the downloaded source" OFF)
+option(BUILD_TENSORFLOW_LITE_2_3_0 "Build TensorFlow Lite 2.3.0 from the downloaded source" OFF)
  option(BUILD_GTEST "Download and build Google Test" ON)
  option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" ON)
  option(BUILD_RUY "Build ruy library from the downloaded source" ON)
diff --git a/infra/nnfw/cmake/packages/EigenConfig.cmake b/infra/nnfw/cmake/packages/EigenConfig.cmake

index f37d653..e71830a 100644 (file)
--- a/infra/nnfw/cmake/packages/EigenConfig.cmake
+++ b/infra/nnfw/cmake/packages/EigenConfig.cmake
@@ -1,5 +1,5 @@
  function(_Eigen_import)
-  nnas_find_package(TensorFlowEigenSource-2.3.0-rc0 QUIET)
+  nnas_find_package(TensorFlowEigenSource EXACT 2.3.0 QUIET)
  
    if(NOT TensorFlowEigenSource_FOUND)
      set(Eigen_FOUND FALSE PARENT_SCOPE)
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-2.2.0Config.cmake b/infra/nnfw/cmake/packages/TensorFlowLite-2.2.0Config.cmake

deleted file mode 100644 (file)

index e698235..0000000
--- a/infra/nnfw/cmake/packages/TensorFlowLite-2.2.0Config.cmake
+++ /dev/null
@@ -1,92 +0,0 @@
-if(BUILD_TENSORFLOW_LITE_2_2_0)
-  macro(return_unless VAR)
-  if(NOT ${VAR})
-    message("${VAR} NOT TRUE")
-    set(TensorFlowLite_2_2_0_FOUND PARENT_SCOPE)
-    return()
-  endif(NOT ${VAR})
-  endmacro(return_unless)
-
-  nnas_include(ExternalSourceTools)
-  nnas_include(OptionTools)
-
-  # Below urls come from https://github.com/tensorflow/tensorflow/blob/v2.2.0/tensorflow/lite/tools/make/Makefile
-
-  set(absl_url "https://github.com/abseil/abseil-cpp/archive/43ef2148c0936ebf7cb4be6b19927a9d9d145b8f.tar.gz")
-  ExternalSource_Download("tflite220_Absl" ${absl_url})
-  set(TFLite220AbslSource_DIR "${tflite220_Absl_SOURCE_DIR}")
-  if (NOT TFLite220AbslSource_DIR STREQUAL "")
-    set(TFLite220AbslSource_FOUND TRUE)
-  endif()
-  return_unless(TFLite220AbslSource_FOUND)
-
-  set(eigen_url "https://gitlab.com/libeigen/eigen/-/archive/52a2fbbb008a47c5e3fb8ac1c65c2feecb0c511c/eigen-52a2fbbb008a47c5e3fb8ac1c65c2feecb0c511c.tar.gz")
-  ExternalSource_Download("tflite220_Eigen" ${eigen_url})
-  set(TFLite220EigenSource_DIR "${tflite220_Eigen_SOURCE_DIR}")
-  if (NOT TFLite220EigenSource_DIR STREQUAL "")
-    set(TFLite220EigenSource_FOUND TRUE)
-  endif()
-  return_unless(TFLite220EigenSource_FOUND)
-
-  set(farmhash_url "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz")
-  ExternalSource_Download("tflite220_Farmhash" ${farmhash_url})
-  set(TFLite220FarmhashSource_DIR "${tflite220_Farmhash_SOURCE_DIR}")
-  if (NOT TFLite220FarmhashSource_DIR STREQUAL "")
-    set(TFLite220FarmhashSource_FOUND TRUE)
-  endif()
-  return_unless(TFLite220FarmhashSource_FOUND)
-
-  set(fft2d_url "https://storage.googleapis.com/mirror.tensorflow.org/www.kurims.kyoto-u.ac.jp/~ooura/fft2d.tgz")
-  ExternalSource_Download("tflite220_FFT2D" ${fft2d_url})
-  set(TFLite220FFT2DSource_DIR "${tflite220_FFT2D_SOURCE_DIR}")
-  if (NOT TFLite220FFT2DSource_DIR STREQUAL "")
-    set(TFLite220FFT2DSource_FOUND TRUE)
-  endif()
-  return_unless(TFLite220FFT2DSource_FOUND)
-
-  set(flatbuffers_url "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/flatbuffers/archive/v1.11.0.tar.gz")
-  ExternalSource_Download("tflite220_FlatBuffers" ${flatbuffers_url})
-  set(TFLite220FlatBuffersSource_DIR "${tflite220_FlatBuffers_SOURCE_DIR}")
-  if (NOT TFLite220FlatBuffersSource_DIR STREQUAL "")
-    set(TFLite220FlatBuffersSource_FOUND TRUE)
-  endif()
-  return_unless(TFLite220FlatBuffersSource_FOUND)
-
-  set(fp16_url "https://github.com/Maratyszcza/FP16/archive/febbb1c163726b5db24bed55cc9dc42529068997.zip")
-  ExternalSource_Download("tflite220_FP16" ${fp16_url})
-  set(TFLite220FP16Source_DIR "${tflite220_FP16_SOURCE_DIR}")
-  if (NOT TFLite220FP16Source_DIR STREQUAL "")
-    set(TFLite220FP16Source_FOUND TRUE)
-  endif()
-  return_unless(TFLite220FP16Source_FOUND)
-
-  set(gemmlowp_url "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/gemmlowp/archive/12fed0cd7cfcd9e169bf1925bc3a7a58725fdcc3.zip")
-  ExternalSource_Download("tflite220_GEMMLowp" ${gemmlowp_url})
-  set(TFLite220GEMMLowpSource_DIR "${tflite220_GEMMLowp_SOURCE_DIR}")
-  if (NOT TFLite220GEMMLowpSource_DIR STREQUAL "")
-    set(TFLite220GEMMLowpSource_FOUND TRUE)
-  endif()
-  return_unless(TFLite220GEMMLowpSource_FOUND)
-
-  set(neon2sse_url "https://github.com/intel/ARM_NEON_2_x86_SSE/archive/master.zip")
-  ExternalSource_Download("tflite220_NEON2SSE" ${neon2sse_url})
-  set(TFLite220NEON2SSESource_DIR "${tflite220_NEON2SSE_SOURCE_DIR}")
-  if (NOT TFLite220NEON2SSESource_DIR STREQUAL "")
-    set(TFLite220NEON2SSESource_FOUND TRUE)
-  endif()
-  return_unless(TFLite220NEON2SSESource_FOUND)
-
-  set(tensorflow_url "https://github.com/tensorflow/tensorflow/archive/v2.2.0.tar.gz")
-  ExternalSource_Download("tflite220_TensorFlow" ${tensorflow_url})
-  set(TFLite220TensorFlowSource_DIR "${tflite220_TensorFlow_SOURCE_DIR}")
-  if (NOT TFLite220TensorFlowSource_DIR STREQUAL "")
-    set(TFLite220TensorFlowSource_FOUND TRUE)
-  endif()
-  return_unless(TFLite220TensorFlowSource_FOUND)
-
-  nnas_include(ExternalProjectTools)
-  add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/TensorFlowLite-2.2.0" tflite-2.2.0)
-
-  set(TensorFlowLite_2_2_0_FOUND TRUE)
-  return()
-endif()
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-2.2.0/CMakeLists.txt b/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/CMakeLists.txt

similarity index 51%

rename from infra/nnfw/cmake/packages/TensorFlowLite-2.2.0/CMakeLists.txt

rename to infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/CMakeLists.txt

index 8e7f78e..20547b9 100644 (file)
--- a/infra/nnfw/cmake/packages/TensorFlowLite-2.2.0/CMakeLists.txt
+++ b/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/CMakeLists.txt
@@ -1,8 +1,8 @@
-# Reference: https://github.com/tensorflow/tensorflow/blob/v2.2.0/tensorflow/lite/tools/make/Makefile
+# Reference: https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/lite/tools/make/Makefile
  #
-# Tensorflow Lite library 2.2.0
+# Tensorflow Lite library 2.3.0
  #
-set(TENSORFLOW_LITE_BASE ${TFLite220TensorFlowSource_DIR}/tensorflow/lite)
+set(TENSORFLOW_LITE_BASE ${TFLiteVanillaTensorFlowSource_DIR}/tensorflow/lite)
  
  file(GLOB TFLITE_CORE_SRCS "${TENSORFLOW_LITE_BASE}/*.c"
       "${TENSORFLOW_LITE_BASE}/*.cc"
@@ -18,8 +18,7 @@ file(GLOB TFLITE_API_SRCS "${TENSORFLOW_LITE_BASE}/core/api/*.c"
  list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/memory_info.cc")
  list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/time.cc")
  
-file(GLOB TFLITE_EXPERIMENTAL_SRCS "${TENSORFLOW_LITE_BASE}/experimental/resource/*.cc"
-     "${TENSORFLOW_LITE_BASE}/experimental/ruy/*.cc")
+file(GLOB TFLITE_EXPERIMENTAL_SRCS "${TENSORFLOW_LITE_BASE}/experimental/resource/*.cc")
  
  file(GLOB TFLITE_SPARSITY_SRCS "${TENSORFLOW_LITE_BASE}/tools/optimize/sparsity/*.cc")
  
@@ -32,24 +31,37 @@ list(APPEND TFLITE_SRCS ${TFLITE_EXPERIMENTAL_SRCS})
  list(APPEND TFLITE_SRCS ${TFLITE_SPARSITY_SRCS})
  
  # externals
-list(APPEND TFLITE_SRCS "${TFLite220FarmhashSource_DIR}/src/farmhash.cc")
-list(APPEND TFLITE_SRCS "${TFLite220FFT2DSource_DIR}/fftsg.c")
-list(APPEND TFLITE_SRCS "${TFLite220FFT2DSource_DIR}/fftsg2d.c")
-list(APPEND TFLITE_SRCS "${TFLite220FlatBuffersSource_DIR}/src/util.cpp")
+list(APPEND TFLITE_SRCS "${TFLiteVanillaFarmhashSource_DIR}/src/farmhash.cc")
+list(APPEND TFLITE_SRCS "${TFLiteVanillaFFT2DSource_DIR}/fftsg.c")
+list(APPEND TFLITE_SRCS "${TFLiteVanillaFFT2DSource_DIR}/fftsg2d.c")
+list(APPEND TFLITE_SRCS "${TFLiteVanillaFlatBuffersSource_DIR}/src/util.cpp")
  
  # externals - absl
-file(GLOB_RECURSE ABSL_SRCS "${TFLite220AbslSource_DIR}/absl/*.cc")
-file(GLOB_RECURSE ABSL_EXCLS "${TFLite220AbslSource_DIR}/absl/*test*.cc"
-     "${TFLite220AbslSource_DIR}/absl/*benchmark*.cc"
-     "${TFLite220AbslSource_DIR}/absl/synchronization/*.cc"
-     "${TFLite220AbslSource_DIR}/absl/debugging/*.cc"
-     "${TFLite220AbslSource_DIR}/absl/hash/*.cc"
-     "${TFLite220AbslSource_DIR}/absl/flags/*.cc")
+file(GLOB_RECURSE ABSL_SRCS "${TFLiteVanillaAbslSource_DIR}/absl/*.cc")
+file(GLOB_RECURSE ABSL_EXCLS "${TFLiteVanillaAbslSource_DIR}/absl/*test*.cc"
+     "${TFLiteVanillaAbslSource_DIR}/absl/*benchmark*.cc"
+     "${TFLiteVanillaAbslSource_DIR}/absl/synchronization/*.cc"
+     "${TFLiteVanillaAbslSource_DIR}/absl/debugging/*.cc"
+     "${TFLiteVanillaAbslSource_DIR}/absl/hash/*.cc"
+     "${TFLiteVanillaAbslSource_DIR}/absl/flags/*.cc"
+     "${TFLiteVanillaAbslSource_DIR}/absl/random/*.cc")
  list(REMOVE_ITEM ABSL_SRCS ${ABSL_EXCLS})
  list(APPEND TFLITE_SRCS ${ABSL_SRCS})
  
+# externals - ruy
+file(GLOB RUY_SRCS "${TFLiteVanillaRuySource_DIR}/ruy/*.cc")
+file(GLOB_RECURSE RUY_EXCLS "${TFLiteVanillaRuySource_DIR}/ruy/*test*.cc"
+      "${TFLiteVanillaRuySource_DIR}/ruy/*benchmark*.cc"
+      "${TFLiteVanillaRuySource_DIR}/ruy/*example*.cc")
+list(REMOVE_ITEM RUY_SRCS ${RUY_EXCLS})
+# Temporary fix for ruy compilation error.
+# TODO(b/158800055): Remove this hack once the ruy version is correctly bumped.
+list(REMOVE_ITEM RUY_SRCS "${TFLiteVanillaRuySource_DIR}/ruy/prepare_packed_matrices.cc")
+list(APPEND TFLITE_SRCS ${RUY_SRCS})
+
+
  # Build with mmap? true
-# caution: v2.2.0's Makefile has wrong code on this part. This is fixed on master branch.
+# caution: v2.3.0's Makefile has wrong code on this part. This is fixed on master branch.
  set(BUILD_WITH_MMAP TRUE)
  if(${BUILD_WITH_MMAP})
    list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/mmap_allocation_disabled.cc")
@@ -86,25 +98,26 @@ file(GLOB_RECURSE TFLITE_EXCLS "${TENSORFLOW_LITE_BASE}/*test*.cc"
  list(REMOVE_ITEM TFLITE_SRCS ${TFLITE_EXCLS})
  
  # include headers
-list(APPEND TFLITE_INCLUDES "${TFLite220TensorFlowSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${TFLite220EigenSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${TFLite220AbslSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${TFLite220GEMMLowpSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${TFLite220NEON2SSESource_DIR}")
-list(APPEND TFLITE_INCLUDES "${TFLite220FarmhashSource_DIR}/src")
-list(APPEND TFLITE_INCLUDES "${TFLite220FlatBuffersSource_DIR}/include")
-list(APPEND TFLITE_INCLUDES "${TFLite220FP16Source_DIR}/include")
-
-add_library(tensorflow-lite-2.2.0 STATIC ${TFLITE_SRCS})
-target_include_directories(tensorflow-lite-2.2.0 SYSTEM PUBLIC ${TFLITE_INCLUDES})
-target_compile_definitions(tensorflow-lite-2.2.0 PUBLIC "GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK -DTFLITE_WITH_RUY -DTFLITE_WITH_RUY_GEMV")
-set_property(TARGET tensorflow-lite-2.2.0 PROPERTY POSITION_INDEPENDENT_CODE ON)
-target_link_libraries(tensorflow-lite-2.2.0 eigen ${LIB_PTHREAD} dl)
-if(${BUILD_WITH_NNAPI})
-  target_link_libraries(tensorflow-lite-2.2.0 rt)
+list(APPEND TFLITE_INCLUDES "${TFLiteVanillaTensorFlowSource_DIR}")
+list(APPEND TFLITE_INCLUDES "${TFLiteVanillaEigenSource_DIR}")
+list(APPEND TFLITE_INCLUDES "${TFLiteVanillaAbslSource_DIR}")
+list(APPEND TFLITE_INCLUDES "${TFLiteVanillaGEMMLowpSource_DIR}")
+list(APPEND TFLITE_INCLUDES "${TFLiteVanillaNEON2SSESource_DIR}")
+list(APPEND TFLITE_INCLUDES "${TFLiteVanillaFarmhashSource_DIR}/src")
+list(APPEND TFLITE_INCLUDES "${TFLiteVanillaFlatBuffersSource_DIR}/include")
+list(APPEND TFLITE_INCLUDES "${TFLiteVanillaFP16Source_DIR}/include")
+list(APPEND TFLITE_INCLUDES "${TFLiteVanillaRuySource_DIR}")
+
+add_library(tensorflow-lite-2.3.0 STATIC ${TFLITE_SRCS})
+target_include_directories(tensorflow-lite-2.3.0 SYSTEM PUBLIC ${TFLITE_INCLUDES})
+target_compile_definitions(tensorflow-lite-2.3.0 PUBLIC "GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK -DTFLITE_WITH_RUY -DTFLITE_WITH_RUY_GEMV")
+set_property(TARGET tensorflow-lite-2.3.0 PROPERTY POSITION_INDEPENDENT_CODE ON)
+target_link_libraries(tensorflow-lite-2.3.0 eigen ${LIB_PTHREAD} dl)
+if(NOT ANDROID AND ${BUILD_WITH_NNAPI})
+  target_link_libraries(tensorflow-lite-2.3.0 rt)
  endif()
  
  if(ANDROID)
-  target_link_libraries(tensorflow-lite-2.2.0 log)
-  target_include_directories(tensorflow-lite-2.2.0 PUBLIC "${NDK_DIR}/..")
+  target_link_libraries(tensorflow-lite-2.3.0 log)
+  target_include_directories(tensorflow-lite-2.3.0 PUBLIC "${NDK_DIR}/..")
  endif()
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0Config.cmake b/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0Config.cmake

new file mode 100644 (file)

index 0000000..d00ca96
--- /dev/null
+++ b/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0Config.cmake
@@ -0,0 +1,100 @@
+if(BUILD_TENSORFLOW_LITE_2_3_0)
+  macro(return_unless VAR)
+  if(NOT ${VAR})
+    message("${VAR} NOT TRUE")
+    set(TensorFlowLite_2_3_0_FOUND PARENT_SCOPE)
+    return()
+  endif(NOT ${VAR})
+  endmacro(return_unless)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  # Below urls come from https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/lite/tools/make/Makefile
+
+  set(absl_url "https://github.com/abseil/abseil-cpp/archive/df3ea785d8c30a9503321a3d35ee7d35808f190d.tar.gz")
+  ExternalSource_Download("TFLiteVanilla_Absl" ${absl_url})
+  set(TFLiteVanillaAbslSource_DIR "${TFLiteVanilla_Absl_SOURCE_DIR}")
+  if (NOT TFLiteVanillaAbslSource_DIR STREQUAL "")
+    set(TFLiteVanillaAbslSource_FOUND TRUE)
+  endif()
+  return_unless(TFLiteVanillaAbslSource_FOUND)
+
+  set(eigen_url "https://gitlab.com/libeigen/eigen/-/archive/386d809bde475c65b7940f290efe80e6a05878c4/eigen-386d809bde475c65b7940f290efe80e6a05878c4.tar.gz")
+  ExternalSource_Download("TFLiteVanilla_Eigen" ${eigen_url})
+  set(TFLiteVanillaEigenSource_DIR "${TFLiteVanilla_Eigen_SOURCE_DIR}")
+  if (NOT TFLiteVanillaEigenSource_DIR STREQUAL "")
+    set(TFLiteVanillaEigenSource_FOUND TRUE)
+  endif()
+  return_unless(TFLiteVanillaEigenSource_FOUND)
+
+  set(farmhash_url "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz")
+  ExternalSource_Download("TFLiteVanilla_Farmhash" ${farmhash_url})
+  set(TFLiteVanillaFarmhashSource_DIR "${TFLiteVanilla_Farmhash_SOURCE_DIR}")
+  if (NOT TFLiteVanillaFarmhashSource_DIR STREQUAL "")
+    set(TFLiteVanillaFarmhashSource_FOUND TRUE)
+  endif()
+  return_unless(TFLiteVanillaFarmhashSource_FOUND)
+
+  set(fft2d_url "https://storage.googleapis.com/mirror.tensorflow.org/github.com/petewarden/OouraFFT/archive/v1.0.tar.gz")
+  ExternalSource_Download("TFLiteVanilla_FFT2D" ${fft2d_url})
+  set(TFLiteVanillaFFT2DSource_DIR "${TFLiteVanilla_FFT2D_SOURCE_DIR}")
+  if (NOT TFLiteVanillaFFT2DSource_DIR STREQUAL "")
+    set(TFLiteVanillaFFT2DSource_FOUND TRUE)
+  endif()
+  return_unless(TFLiteVanillaFFT2DSource_FOUND)
+
+  set(flatbuffers_url "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/flatbuffers/archive/v1.12.0.tar.gz")
+  ExternalSource_Download("TFLiteVanilla_FlatBuffers" ${flatbuffers_url})
+  set(TFLiteVanillaFlatBuffersSource_DIR "${TFLiteVanilla_FlatBuffers_SOURCE_DIR}")
+  if (NOT TFLiteVanillaFlatBuffersSource_DIR STREQUAL "")
+    set(TFLiteVanillaFlatBuffersSource_FOUND TRUE)
+  endif()
+  return_unless(TFLiteVanillaFlatBuffersSource_FOUND)
+
+  set(fp16_url "https://github.com/Maratyszcza/FP16/archive/4dfe081cf6bcd15db339cf2680b9281b8451eeb3.zip")
+  ExternalSource_Download("TFLiteVanilla_FP16" ${fp16_url})
+  set(TFLiteVanillaFP16Source_DIR "${TFLiteVanilla_FP16_SOURCE_DIR}")
+  if (NOT TFLiteVanillaFP16Source_DIR STREQUAL "")
+    set(TFLiteVanillaFP16Source_FOUND TRUE)
+  endif()
+  return_unless(TFLiteVanillaFP16Source_FOUND)
+
+  set(gemmlowp_url "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip")
+  ExternalSource_Download("TFLiteVanilla_GEMMLowp" ${gemmlowp_url})
+  set(TFLiteVanillaGEMMLowpSource_DIR "${TFLiteVanilla_GEMMLowp_SOURCE_DIR}")
+  if (NOT TFLiteVanillaGEMMLowpSource_DIR STREQUAL "")
+    set(TFLiteVanillaGEMMLowpSource_FOUND TRUE)
+  endif()
+  return_unless(TFLiteVanillaGEMMLowpSource_FOUND)
+
+  set(neon2sse_url "https://github.com/intel/ARM_NEON_2_x86_SSE/archive/1200fe90bb174a6224a525ee60148671a786a71f.tar.gz")
+  ExternalSource_Download("TFLiteVanilla_NEON2SSE" ${neon2sse_url})
+  set(TFLiteVanillaNEON2SSESource_DIR "${TFLiteVanilla_NEON2SSE_SOURCE_DIR}")
+  if (NOT TFLiteVanillaNEON2SSESource_DIR STREQUAL "")
+    set(TFLiteVanillaNEON2SSESource_FOUND TRUE)
+  endif()
+  return_unless(TFLiteVanillaNEON2SSESource_FOUND)
+
+  set(tensorflow_url "https://github.com/tensorflow/tensorflow/archive/v2.3.0.tar.gz")
+  ExternalSource_Download("TFLiteVanilla_TensorFlow" ${tensorflow_url})
+  set(TFLiteVanillaTensorFlowSource_DIR "${TFLiteVanilla_TensorFlow_SOURCE_DIR}")
+  if (NOT TFLiteVanillaTensorFlowSource_DIR STREQUAL "")
+    set(TFLiteVanillaTensorFlowSource_FOUND TRUE)
+  endif()
+  return_unless(TFLiteVanillaTensorFlowSource_FOUND)
+
+  set(ruy_url "https://github.com/google/ruy/archive/34ea9f4993955fa1ff4eb58e504421806b7f2e8f.zip")
+  ExternalSource_Download("TFLiteVanilla_Ruy" ${ruy_url})
+  set(TFLiteVanillaRuySource_DIR "${TFLiteVanilla_Ruy_SOURCE_DIR}")
+  if (NOT TFLiteVanillaRuySource_DIR STREQUAL "")
+    set(TFLiteVanillaRuySource_FOUND TRUE)
+  endif()
+  return_unless(TFLiteVanillaRuySource_FOUND)
+
+  nnas_include(ExternalProjectTools)
+  add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/TensorFlowLite-2.3.0" tflite-2.3.0)
+
+  set(TensorFlowLite_2_3_0_FOUND TRUE)
+  return()
+endif()
diff --git a/infra/nnfw/config/gbs.conf b/infra/nnfw/config/gbs.conf

index 515cada..bad9eb2 100644 (file)
--- a/infra/nnfw/config/gbs.conf
+++ b/infra/nnfw/config/gbs.conf
@@ -5,7 +5,7 @@ profile = profile.tizen
  [profile.tizen]
  user=obs_viewer
  obs = obs.tizen
-repos = repo.tizen_base,repo.tizen_mobile
+repos = repo.tizen_one,repo.tizen_base,repo.tizen_mobile
  buildroot = /home/GBS-ROOT/
  
  [obs.tizen]
@@ -15,6 +15,8 @@ url = http://api.tizen.org
  url = http://download.tizen.org/snapshots/tizen/unified/latest/repos/standard/packages/
  
  [repo.tizen_base]
-url =  http://download.tizen.org/snapshots/tizen/base/latest/repos/standard/packages/
+url = http://download.tizen.org/snapshots/tizen/base/latest/repos/standard/packages/
  
+[repo.tizen_one]
+url = http://nnfw.mooo.com/archive/tizen/
  
diff --git a/infra/packaging/build b/infra/packaging/build

index 036c2d5..e941a72 100644 (file)
--- a/infra/packaging/build
+++ b/infra/packaging/build
@@ -85,7 +85,8 @@ function join_by
  # Invoke "preset_configure" function that the preset provides
  preset_configure
  
-NPROC=$(cat /proc/cpuinfo | grep -c processor)
+NPROC=${NPROC:-$(cat /proc/cpuinfo | grep -c processor)}
+echo "[BUILD] \"make\" with -j${NPROC} option. You can specify the number of jobs by defining NPROC"
  cmake --build . -- -j$((NPROC/2)) all
  cmake --build . -- install
  # Install NN Package tools
diff --git a/infra/packaging/preset/20200630 b/infra/packaging/preset/20200630

index e159935..5d16358 100644 (file)
--- a/infra/packaging/preset/20200630
+++ b/infra/packaging/preset/20200630
@@ -14,6 +14,7 @@ function preset_configure()
    REQUIRED_UNITS+=("souschef")
    REQUIRED_UNITS+=("safemain")
    REQUIRED_UNITS+=("arser")
+  REQUIRED_UNITS+=("vconone")
    # Hermes Logging Framework
    REQUIRED_UNITS+=("hermes" "hermes-std")
    # loco IR and related utilities
@@ -27,12 +28,16 @@ function preset_configure()
    REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter" "circle-verify")
    REQUIRED_UNITS+=("record-minmax" "circle-quantizer")
    REQUIRED_UNITS+=("one-cmds")
+  REQUIRED_UNITS+=("bcq-tools")
+
+  NPROC=${NPROC:-$(cat /proc/cpuinfo | grep -c processor)}
  
    # TODO Use "nncc configure" and "nncc build"
    cmake \
      -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
      -DCMAKE_BUILD_TYPE=release \
      -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+    -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
      ${EXTRA_OPTIONS[@]} \
      "${NNAS_PROJECT_PATH}/infra/nncc"
  }
@@ -44,14 +49,4 @@ function preset_install()
  
    # Install tf2nnpkg
    install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.${PRESET}" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
-
-  # Create python virtual enviornment
-  python3 -m venv "${NNAS_INSTALL_PREFIX}/bin/venv"
-
-  # Install tensorflow
-  source "${NNAS_INSTALL_PREFIX}/bin/venv/bin/activate"
-  python -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
-    install -U pip setuptools
-  python -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
-    install tensorflow-cpu==2.3.0rc0
  }
diff --git a/infra/packaging/preset/20200731_windows b/infra/packaging/preset/20200731_windows

new file mode 100644 (file)

index 0000000..65d179e
--- /dev/null
+++ b/infra/packaging/preset/20200731_windows
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+function preset_configure()
+{
+  REQUIRED_UNITS=()
+  # Common Libraries
+  REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp" "stdex")
+  REQUIRED_UNITS+=("oops" "pepper-assert" "foder")
+  REQUIRED_UNITS+=("souschef")
+  REQUIRED_UNITS+=("safemain")
+  REQUIRED_UNITS+=("arser")
+  REQUIRED_UNITS+=("vconone")
+  # Hermes Logging Framework
+  REQUIRED_UNITS+=("hermes" "hermes-std")
+  # loco IR and related utilities
+  REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+  # Flatbuffer I/O
+  REQUIRED_UNITS+=("mio-tflite" "mio-circle")
+  # Circle compiler library (.circle -> .circle)
+  REQUIRED_UNITS+=("luci")
+  # Tools
+  REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef" "circlechef")
+  REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter" "circle-verify")
+  REQUIRED_UNITS+=("record-minmax" "circle-quantizer")
+  REQUIRED_UNITS+=("one-cmds")
+
+  NPROC=$(cat /proc/cpuinfo | grep -c processor)
+
+  # TODO Use "nncc configure" and "nncc build"
+  cmake \
+    -G "MSYS Makefiles" \
+    -DTF2NNPKG_FOR_WINDOWS=ON \
+    -DUSE_PROTOBUF_LEGACY_IMPORT=ON \
+    -DCMAKE_EXE_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+    -DCMAKE_SHARED_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+    -DENABLE_TEST=OFF \
+    -DDOWNLOAD_GTEST=OFF \
+    -DBUILD_GTEST=OFF \
+    -DCMAKE_C_COMPILER=gcc \
+    -DCMAKE_CXX_COMPILER=g++ \
+    -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+    -DCMAKE_BUILD_TYPE=release \
+    -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+    -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
+    ${EXTRA_OPTIONS[@]} \
+    "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+  # Install libraries to bin/ for Windows release
+  mv ${NNCC_INSTALL_PREFIX}/lib/*.dll ${NNCC_INSTALL_PREFIX}/bin
+  rm -rf ${NNCC_INSTALL_PREFIX}/lib
+
+  install -t "${NNPKG_INSTALL_PREFIX}/bin" -D \
+    "${NNAS_PROJECT_PATH}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh"
+
+  # Install tf2nnpkg
+  install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.20200630" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+
+  # Though you have to install tensorflow to run 'tf2tfliteV2',
+  # tensorflow can't be installed in mingw. First, You can install tensorflow 
+  # from Window native CMD(run as administrator) with python virtual environment.
+  # And, you must copy it to "${NNAS_INSTALL_PREFIX}/bin/venv"
+}
diff --git a/infra/packaging/res/tf2nnpkg.20200630 b/infra/packaging/res/tf2nnpkg.20200630

index 9101f82..7846fd3 100644 (file)
--- a/infra/packaging/res/tf2nnpkg.20200630
+++ b/infra/packaging/res/tf2nnpkg.20200630
@@ -14,10 +14,16 @@ command_exists() {
  usage()
  {
    echo "Convert TensorFlow model to nnpackage."
-  echo "Usage: tf2nnpkg --info <path/to/info> --graphdef <path/to/pb> [OPTION] -o <path/to/nnpkg/directory>"
-  exit 0
+  echo "Usage: tf2nnpkg"
+  echo "    --info <path/to/info>"
+  echo "    --graphdef <path/to/pb>"
+  echo "    -o <path/to/nnpkg/directory>"
+  echo "    --v2 (optional) Use TF 2.x interface"
+  exit 255
  }
  
+TF_INTERFACE="--v1"
+
  # Parse command-line arguments
  #
  while [ "$#" -ne 0 ]; do
@@ -39,6 +45,10 @@ while [ "$#" -ne 0 ]; do
        export OUTPUT_DIR="$2"
        shift 2
        ;;
+    '--v2')
+      TF_INTERFACE="--v2"
+      shift
+      ;;
      *)
        echo "${CUR}"
        shift
@@ -83,10 +93,7 @@ OUTPUT=$(awk -F, '/^output/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' '
  INPUT_SHAPES=$(grep ^input ${INFO_FILE} | cut -d "[" -f2 | cut -d "]" -f1 | tr -d ' ' | xargs | tr ' ' ':')
  
  # generate tflite file
-python "${ROOT}/bin/tf2tfliteV2.py" --v2 --input_path ${GRAPHDEF_FILE} \
---output_path "${TMPDIR}/${MODEL_NAME}.tflite" \
---input_arrays ${INPUT} --output_arrays ${OUTPUT} || \
-python "${ROOT}/bin/tf2tfliteV2.py" --v1 --input_path ${GRAPHDEF_FILE} \
+python "${ROOT}/bin/tf2tfliteV2.py" ${TF_INTERFACE} --input_path ${GRAPHDEF_FILE} \
  --output_path "${TMPDIR}/${MODEL_NAME}.tflite" \
  --input_arrays ${INPUT} --input_shapes ${INPUT_SHAPES} \
  --output_arrays ${OUTPUT}
diff --git a/infra/scripts/build-tcm.sh b/infra/scripts/build-tcm.sh

new file mode 100755 (executable)

index 0000000..22fb335
--- /dev/null
+++ b/infra/scripts/build-tcm.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+#
+# STEP 1
+#   Download latest TCM tool from 
+#   https://github.sec.samsung.net/RS-TCM/tca-standalone/releases/download/v0.0.8/tca-standalone-0.0.8.jar
+#
+# STEP 2
+#   Create symbolic link `./src` for source directory to be analyzed which has `.ahub` configuration.
+#
+# STEP 3
+#   run this `build-tcm.sh` script.
+#
+# See the following link for additional details.
+#   https://github.sec.samsung.net/RS-TCM/tca-standalone/wiki/Tutorials-CPP-Gtest
+#
+
+echo ${PROJECT_DIR:=${PWD}}
+
+java -jar $PROJECT_DIR/tca-standalone-0.0.8.jar \
+  --outdir=$PROJECT_DIR/tcm-output \
+  --config=$PROJECT_DIR/.ahub/tcchecker-tca/config.yaml \
+  --local=$PROJECT_DIR/src \
+  --logfile=$PROJECT_DIR/tcm-output/tcm.log \
+  --debug
diff --git a/infra/scripts/common.sh b/infra/scripts/common.sh

index 28aa213..a10aac2 100755 (executable)
--- a/infra/scripts/common.sh
+++ b/infra/scripts/common.sh
@@ -15,15 +15,18 @@
  # TFLiteModelVerification $1 $2 $3
  #   Run ./tests/scripts/test-driver.sh script verification test
  #
-# Unittests $1 $2 $3
-#   Run ./tests/scripts/test-driver.sh script unittest
+# NNAPIGTest $1 $2 $3
+#   Run [INSTALL_PATH]/test/onert-test unittest command for nnapi gtest
  #
  # NNPackageTest $1 $2
-#   Run ./tests/scripts/nnpkg_test.sh script nnpackage test
+#   Run [INSTALL_PATH]/test/onert-test nnpkg-test command
  
  CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  ROOT_PATH="$(cd ${CURRENT_PATH}/../../ && pwd)"
  
+# Install path on CI
+INSTALL_PATH=$ROOT_PATH/Product/out
+
  function CheckTestPrepared()
  {
    # Model download server setting
@@ -47,16 +50,12 @@ function TFLiteModelVerification()
  
    export BACKENDS=$1
    if [[ "$2" == "" ]]; then
-    ./tests/scripts/test-driver.sh \
-      --reportdir=$ROOT_PATH/$3 \
-      --verification \
-      .
+    $INSTALL_PATH/test/onert-test verify-tflite --api=nnapi \
+      --reportdir=$ROOT_PATH/$3
    else
-    ./tests/scripts/test-driver.sh \
-      --frameworktest_list_file=$2 \
-      --reportdir=$ROOT_PATH/$3 \
-      --verification \
-      .
+    $INSTALL_PATH/test/onert-test verify-tflite --api=nnapi \
+      --list=$2 \
+      --reportdir=$ROOT_PATH/$3
    fi
    unset BACKENDS
  
@@ -64,10 +63,10 @@ function TFLiteModelVerification()
  }
  
  # $1: (required) backend
-# $2: (required) unittest skiplist file relative path from nnfw root directory
+# $2: (required) nnapi gtest skiplist file relative path from nnfw root directory
  #                pass empty string if there is no test list
  # $3: (required) relative path for report from nnfw root directory
-function Unittests()
+function NNAPIGTest()
  {
    [[ $# -ne 3 ]] && echo "Invalid function argument setting" && exit 1
  
@@ -75,7 +74,7 @@ function Unittests()
  
    # Backup original nnapi_gtest.skip
    # TODO Pass skiplist to test-driver.sh
-  SKIPLIST_FILE="${ROOT_PATH}/Product/out/unittest/nnapi_gtest.skip"
+  SKIPLIST_FILE="${INSTALL_PATH}/unittest/nnapi_gtest.skip"
    BACKUP_FILE="${SKIPLIST_FILE}.backup"
    if [[ "$2" != "" ]]; then
      cp ${SKIPLIST_FILE} ${BACKUP_FILE}
@@ -83,10 +82,9 @@ function Unittests()
    fi
  
    export BACKENDS=$1
-  ./tests/scripts/test-driver.sh \
+  $INSTALL_PATH/test/onert-test unittest \
      --reportdir=$ROOT_PATH/$3 \
-    --unittest \
-    .
+    --unittestdir=$INSTALL_PATH/unittest
    unset BACKENDS
  
    # TODO Pass skiplist to test-driver.sh
@@ -115,7 +113,7 @@ function NNPackageTest()
    do
      for entry in "nnpkg-tcs"/$f; do
        if [ -e $entry ]; then
-        BACKENDS="$1" tests/scripts/nnpkg_test.sh -d -i nnpkg-tcs $(basename "$entry")
+        BACKENDS="$1" $INSTALL_PATH/test/onert-test nnpkg-test -d -i nnpkg-tcs $(basename "$entry")
        fi
      done
      EXITCODE_F=$?
@@ -144,16 +142,11 @@ function TFLiteLoaderTest()
  
    export BACKENDS=$1
    if [[ "$2" == "" ]]; then
-    ./tests/scripts/test-driver.sh \
-      --frameworktest \
-      --framework_driverbin="$ROOT_PATH/Product/out/bin/tflite_loader_test_tool" \
+    $INSTALL_PATH/test/onert-test verify-tflite --api=loader \
        --reportdir=$ROOT_PATH/$3
-      .
    else
-    ./tests/scripts/test-driver.sh \
-      --frameworktest \
-      --framework_driverbin="$ROOT_PATH/Product/out/bin/tflite_loader_test_tool" \
-      --frameworktest_list_file=tests/scripts/list/tflite_loader_list.${TEST_ARCH}.txt \
+    $INSTALL_PATH/test/onert-test verify-tflite --api=loader \
+      --list=$2 \
        --reportdir=$ROOT_PATH/$3
    fi
    unset BACKENDS
diff --git a/infra/scripts/compiler_modules.sh b/infra/scripts/compiler_modules.sh

index d436e8a..a0323e0 100644 (file)
--- a/infra/scripts/compiler_modules.sh
+++ b/infra/scripts/compiler_modules.sh
@@ -7,7 +7,7 @@ DEBUG_BUILD_ITEMS="angkor;cwrap;pepper-str;pepper-strcast;pp;stdex"
  DEBUG_BUILD_ITEMS+=";oops;pepper-assert"
  DEBUG_BUILD_ITEMS+=";hermes;hermes-std"
  DEBUG_BUILD_ITEMS+=";loco;locop;locomotiv;logo-core;logo"
-DEBUG_BUILD_ITEMS+=";foder;souschef;arser"
+DEBUG_BUILD_ITEMS+=";foder;souschef;arser;vconone"
  DEBUG_BUILD_ITEMS+=";safemain;mio-circle;mio-tflite"
  DEBUG_BUILD_ITEMS+=";tflite2circle"
  DEBUG_BUILD_ITEMS+=";luci"
diff --git a/infra/scripts/docker_build_cross_aarch64_runtime.sh b/infra/scripts/docker_build_cross_aarch64_runtime.sh

index 7da6736..011d14c 100755 (executable)
--- a/infra/scripts/docker_build_cross_aarch64_runtime.sh
+++ b/infra/scripts/docker_build_cross_aarch64_runtime.sh
@@ -6,7 +6,7 @@ CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  ROOT_PATH="$CURRENT_PATH/../../"
  
  # prepare rootfs
-if [ ! -d $ROOTFS_DIR ]; then
+if [ -z "$ROOTFS_DIR" ] || [ ! -d $ROOTFS_DIR ]; then
    echo "It will use default rootfs path"
  else
    DOCKER_VOLUMES+=" -v $ROOTFS_DIR:/opt/rootfs"
diff --git a/infra/scripts/docker_build_cross_arm_runtime.sh b/infra/scripts/docker_build_cross_arm_runtime.sh

index f1f666a..551fb57 100755 (executable)
--- a/infra/scripts/docker_build_cross_arm_runtime.sh
+++ b/infra/scripts/docker_build_cross_arm_runtime.sh
@@ -6,7 +6,7 @@ CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  ROOT_PATH="$CURRENT_PATH/../../"
  
  # prepare rootfs
-if [ ! -d $ROOTFS_DIR ]; then
+if [ -z "$ROOTFS_DIR" ] || [ ! -d $ROOTFS_DIR ]; then
    echo "It will use default rootfs path"
  else
    DOCKER_VOLUMES+=" -v $ROOTFS_DIR:/opt/rootfs"
diff --git a/infra/scripts/docker_build_cross_arm_runtime_release.sh b/infra/scripts/docker_build_cross_arm_runtime_release.sh

index ea66f17..876f318 100755 (executable)
--- a/infra/scripts/docker_build_cross_arm_runtime_release.sh
+++ b/infra/scripts/docker_build_cross_arm_runtime_release.sh
@@ -6,7 +6,7 @@ CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  ROOT_PATH="$CURRENT_PATH/../../"
  
  # prepare rootfs
-if [ ! -d $ROOTFS_DIR ]; then
+if [ -z "$ROOTFS_DIR" ] || [ ! -d $ROOTFS_DIR ]; then
    echo "It will use default rootfs path"
  else
    DOCKER_VOLUMES+=" -v $ROOTFS_DIR:/opt/rootfs"
diff --git a/infra/scripts/docker_build_cross_coverage.sh b/infra/scripts/docker_build_cross_coverage.sh

index 08244e5..f42251b 100755 (executable)
--- a/infra/scripts/docker_build_cross_coverage.sh
+++ b/infra/scripts/docker_build_cross_coverage.sh
@@ -6,7 +6,7 @@ CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  ROOT_PATH="$CURRENT_PATH/../../"
  
  # prepare rootfs
-if [ ! -d $ROOTFS_DIR ]; then
+if [ -z "$ROOTFS_DIR" ] || [ ! -d $ROOTFS_DIR ]; then
    echo "It will use default rootfs path"
  else
    DOCKER_VOLUMES+=" -v $ROOTFS_DIR:/opt/rootfs"
diff --git a/infra/scripts/docker_build_nncc.sh b/infra/scripts/docker_build_nncc.sh

index 418b50d..046bc8a 100755 (executable)
--- a/infra/scripts/docker_build_nncc.sh
+++ b/infra/scripts/docker_build_nncc.sh
@@ -54,7 +54,18 @@ pushd $ROOT_PATH > /dev/null
  mkdir -p ${NNCC_INSTALL_PREFIX}
  ./nncc docker-run ./nnas create-package --prefix "${PWD}/${NNCC_INSTALL_PREFIX}" -- "${CONFIG_OPTIONS}"
  
+# create python virtual environment
+./nncc docker-run python3 -m venv "${NNCC_INSTALL_PREFIX}/bin/venv"
+
+./nncc docker-run "${NNCC_INSTALL_PREFIX}/bin/venv/bin/python" \
+  -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
+  install -U pip setuptools
+./nncc docker-run "${NNCC_INSTALL_PREFIX}/bin/venv/bin/python" \
+  -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
+  install tensorflow-cpu==2.3.0
+
  mkdir -p ${ARCHIVE_PATH}
-tar -zcf ${ARCHIVE_PATH}/nncc-package.tar.gz -C ${NNCC_INSTALL_PREFIX} ./
+tar -zcf ${ARCHIVE_PATH}/nncc-package.tar.gz -C ${NNCC_INSTALL_PREFIX} --exclude "bin/venv" ./
+tar -zcf ${ARCHIVE_PATH}/nncc-venv-package.tar.gz -C ${NNCC_INSTALL_PREFIX} bin/venv
  
  popd > /dev/null
diff --git a/infra/scripts/docker_build_tizen_cross.sh b/infra/scripts/docker_build_tizen_cross.sh

index 18809ad..ee0f183 100755 (executable)
--- a/infra/scripts/docker_build_tizen_cross.sh
+++ b/infra/scripts/docker_build_tizen_cross.sh
@@ -6,7 +6,7 @@ CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  ROOT_PATH="$CURRENT_PATH/../../"
  
  # prepare rootfs
-if [ ! -d $ROOTFS_DIR ]; then
+if [ -z "$ROOTFS_DIR" ] || [ ! -d $ROOTFS_DIR ]; then
    echo "It will use default rootfs path"
  else
    DOCKER_VOLUMES+=" -v $ROOTFS_DIR:/opt/rootfs"
diff --git a/infra/scripts/docker_collect_nnpkg_resources.sh b/infra/scripts/docker_collect_nnpkg_resources.sh

index 556c5bd..55adaa1 100755 (executable)
--- a/infra/scripts/docker_collect_nnpkg_resources.sh
+++ b/infra/scripts/docker_collect_nnpkg_resources.sh
@@ -60,7 +60,7 @@ pushd $ROOT_PATH > /dev/null
  REQUIRED_UNITS=()
  # Common Libraries
  REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp" "stdex")
-REQUIRED_UNITS+=("oops" "safemain" "foder" "arser" "oops")
+REQUIRED_UNITS+=("oops" "safemain" "foder" "arser" "vconone")
  # Hermes Logging Framework
  REQUIRED_UNITS+=("hermes" "hermes-std")
  # loco IR and related utilities
diff --git a/infra/scripts/test_arm_nnpkg.sh b/infra/scripts/test_arm_nnpkg.sh

index 68adaf9..d00eb73 100755 (executable)
--- a/infra/scripts/test_arm_nnpkg.sh
+++ b/infra/scripts/test_arm_nnpkg.sh
@@ -7,10 +7,10 @@ BACKENDS=("acl_cl" "acl_neon" "cpu")
  
  for BACKEND in "${BACKENDS[@]}";
  do
-  NNPackageTest ${BACKEND} "tests/scripts/list/nnpkg_test_list.armv7l-linux.${BACKEND}"
+  NNPackageTest ${BACKEND} "Product/out/test/list/nnpkg_test_list.armv7l-linux.${BACKEND}"
  done
  
  # Interpreter test
  export DISABLE_COMPILE=1
-NNPackageTest "interp" "tests/scripts/list/nnpkg_test_list.noarch.interp"
+NNPackageTest "interp" "Product/out/test/list/nnpkg_test_list.noarch.interp"
  unset DISABLE_COMPILE
diff --git a/infra/scripts/test_coverage.sh b/infra/scripts/test_coverage.sh

index c3dc048..12a9942 100755 (executable)
--- a/infra/scripts/test_coverage.sh
+++ b/infra/scripts/test_coverage.sh
@@ -32,7 +32,7 @@ export GCOV_PREFIX_STRIP=`cat $ROOT_PATH/tests/scripts/build_path_depth.txt`
  TENSOR_LOGGING=trace_log.txt ONERT_LOG_ENABLE=1 GRAPH_DOT_DUMP=1 ./infra/scripts/test_ubuntu_runtime_mixed.sh
  # Enable trace event (acl_cl default backend)
  export TRACE_FILEPATH=trace.json
-TFLiteModelVerification "acl_cl" "tests/scripts/list/frameworktest_list.armv7l.acl_cl.txt" "report/acl_cl/trace"
+TFLiteModelVerification "acl_cl" "Product/out/test/list/frameworktest_list.armv7l.acl_cl.txt" "report/acl_cl/trace"
  unset TRACE_FILEPATH
  
  # Interpreter
diff --git a/infra/scripts/test_ubuntu_runtime.sh b/infra/scripts/test_ubuntu_runtime.sh

index 76e567a..f250df5 100755 (executable)
--- a/infra/scripts/test_ubuntu_runtime.sh
+++ b/infra/scripts/test_ubuntu_runtime.sh
@@ -68,7 +68,7 @@ else
  fi
  
  UNITTEST_SKIPLIST="Product/out/unittest/nnapi_gtest.skip.${TEST_PLATFORM}.${BACKEND}"
-FRAMEWORK_TESTLIST="tests/scripts/list/frameworktest_list.${TEST_ARCH}.${BACKEND}.txt"
+FRAMEWORK_TESTLIST="Product/out/test/list/frameworktest_list.${TEST_ARCH}.${BACKEND}.txt"
  REPORT_BASE="report/${BACKEND}"
  EXECUTORS=("Linear" "Dataflow" "Parallel")
  
@@ -91,7 +91,7 @@ do
      export EXECUTOR="${EXECUTOR}"
    fi
  
-  Unittests "${BACKEND}" "${UNITTEST_SKIPLIST}" "${REPORT_PATH}"
+  NNAPIGTest "${BACKEND}" "${UNITTEST_SKIPLIST}" "${REPORT_PATH}"
    TFLiteModelVerification "${BACKEND}" "${FRAMEWORK_TESTLIST}" "${REPORT_PATH}"
  
    if [ $EXECUTOR = "Interpreter" ]; then
@@ -103,12 +103,7 @@ done
  
  # Current support acl_cl backend testlist only
  # TODO Support more backends
-TFLITE_LOADER_TESTLIST="tests/scripts/list/tflite_loader_list.${TEST_ARCH}.txt"
+TFLITE_LOADER_TESTLIST="Product/out/test/list/tflite_loader_list.${TEST_ARCH}.txt"
  if [[ $TFLITE_LOADER = "1" ]]; then
    TFLiteLoaderTest "${BACKEND}" "${TFLITE_LOADER_TESTLIST}" "${REPORT_BASE}/loader/${EXECUTOR}"
-
-  # Test custom op
-  pushd ${ROOT_PATH} > /dev/null
-  ./Product/out/tests/FillFrom_runner
-  popd > /dev/null
  fi
diff --git a/infra/scripts/test_ubuntu_runtime_mixed.sh b/infra/scripts/test_ubuntu_runtime_mixed.sh

index 265a2ac..24fde88 100755 (executable)
--- a/infra/scripts/test_ubuntu_runtime_mixed.sh
+++ b/infra/scripts/test_ubuntu_runtime_mixed.sh
@@ -14,32 +14,26 @@ TEST_OS="linux"
  
  # This test requires test model installation
  pushd ${ROOT_PATH} > /dev/null
-echo
-echo "==== Run nnfw_api_gtest begin ===="
-echo
-NNFW_API_TEST_MODEL_INSTALLER=tests/scripts/nnfw_api_gtest/install_nnfw_api_gtest_nnpackages.sh
-TEST_BIN=Product/out/unittest_standalone/nnfw_api_gtest
-$NNFW_API_TEST_MODEL_INSTALLER --install-dir ${TEST_BIN}_models
-${TEST_BIN}
-echo
-echo "==== Run nnfw_api_gtest end ===="
-echo
+echo ""
+echo "==== Run standalone unittest begin ===="
+echo ""
+Product/out/test/onert-test prepare-model --model=nnpackage
+Product/out/test/onert-test unittest --unittestdir=Product/out/unittest_standalone
+echo ""
+echo "==== Run standalone unittest end ===="
+echo ""
+
+# Test custom op
+pushd ${ROOT_PATH} > /dev/null
+./Product/out/test/FillFrom_runner
  popd > /dev/null
  
-Product/out/unittest_standalone/test_compute
-Product/out/unittest_standalone/test_onert
-Product/out/unittest_standalone/test_onert_backend_cpu_common
-Product/out/unittest_standalone/test_onert_frontend_nnapi
-Product/out/unittest_standalone/tflite_test
-
-pushd ${ROOT_PATH}
-
  # NOTE Fixed backend assignment by type of operation
  # TODO Enhance this with randomized test
  BACKENDS=(acl_cl acl_neon cpu)
  
  # Get the intersect of framework test list files
-TESTLIST_PREFIX="tests/scripts/list/frameworktest_list.${TEST_ARCH}"
+TESTLIST_PREFIX="Product/out/test/list/frameworktest_list.${TEST_ARCH}"
  SKIPLIST_PREFIX="Product/out/unittest/nnapi_gtest.skip.${TEST_ARCH}-${TEST_OS}"
  sort $TESTLIST_PREFIX.${BACKENDS[0]}.txt > $TESTLIST_PREFIX.intersect.txt
  sort $SKIPLIST_PREFIX.${BACKENDS[0]} > $SKIPLIST_PREFIX.union
@@ -65,5 +59,5 @@ export OP_BACKEND_Conv2D="cpu"
  export OP_BACKEND_MaxPool2D="acl_cl"
  export OP_BACKEND_AvgPool2D="acl_neon"
  export ACL_LAYOUT="NCHW"
-Unittests "acl_cl;acl_neon;cpu" "Product/out/unittest/nnapi_gtest.skip.${TEST_ARCH}-${TEST_OS}.union" "report/mixed"
+NNAPIGTest "acl_cl;acl_neon;cpu" "Product/out/unittest/nnapi_gtest.skip.${TEST_ARCH}-${TEST_OS}.union" "report/mixed"
  TFLiteModelVerification "acl_cl;acl_neon;cpu" "${TESTLIST_PREFIX}.intersect.txt" "report/mixed"
diff --git a/infra/scripts/tizen_xu4_test.sh b/infra/scripts/tizen_xu4_test.sh

index 5521b5f..8f9e86f 100755 (executable)
--- a/infra/scripts/tizen_xu4_test.sh
+++ b/infra/scripts/tizen_xu4_test.sh
@@ -23,19 +23,21 @@ function install_model()
  {
      # download tflite model files
      pushd $HOST_HOME
-    tests/scripts/framework/run_test.sh --download=on
+    tests/scripts/models/run_test.sh --download=on --run=off
      # TODO Since this command removes model file(.zip),
      # We must always download the file unlike model file(.tflite).
      # Because caching applies only to tflite file.
      find tests -name "*.zip" -exec rm {} \;
-    tar -zcf cache.tar.gz tests/scripts/framework/cache
+    tar -zcf cache.tar.gz -C tests/scripts/models cache
      $SDB_CMD push cache.tar.gz $TEST_ROOT/.
      rm -rf cache.tar.gz
-    $SDB_CMD shell tar -zxf $TEST_ROOT/cache.tar.gz -C $TEST_ROOT
+    $SDB_CMD shell tar -zxf $TEST_ROOT/cache.tar.gz -C $TEST_ROOT/Product/out/test/models
  
      # download api test model file for nnfw_api_gtest
      MODEL_CACHE_DIR=$(mktemp -d)
-    tests/scripts/nnfw_api_gtest/install_nnfw_api_gtest_nnpackages.sh --install-dir $MODEL_CACHE_DIR
+    tests/scripts/models/run_test.sh --download=on --run=off \
+        --configdir=test/scripts/nnfw_api_gtest/models \
+        --cachedir=$MODEL_CACHE_DIR
      tar -zcf $MODEL_CACHE_DIR/api_model_test.tar.gz -C $MODEL_CACHE_DIR .
      $SDB_CMD push $MODEL_CACHE_DIR/api_model_test.tar.gz $TEST_ROOT/Product/out/unittest_standalone/nnfw_api_gtest_models/
      $SDB_CMD shell tar -zxf $TEST_ROOT/Product/out/unittest_standalone/nnfw_api_gtest_models/api_model_test.tar.gz \
@@ -157,7 +159,7 @@ else
    rm -rf ${GCOV_DIR}/*
    pushd ${GCOV_DIR}
  
-  sdb pull ${TEST_ROOT}/tests/scripts/build_path.txt
+  sdb pull ${TEST_ROOT}/Product/out/test/build_path.txt
    SRC_PREFIX=`cat build_path.txt`
    GCOV_PREFIX_STRIP=`echo "${SRC_PREFIX}" | grep -o '/' | wc -l`
    GCOV_DATA_PATH="/opt/usr/nnfw-gcov"
diff --git a/nnpackage/spec/30_custom_op.md b/nnpackage/spec/30_custom_op.md

index 504695f..d98521b 100644 (file)
--- a/nnpackage/spec/30_custom_op.md
+++ b/nnpackage/spec/30_custom_op.md
@@ -42,7 +42,7 @@ typedef void (*nnfw_custom_eval)(nnfw_custom_kernel_params *params, char *userda
  ```
  
  The structures and relevant APIs are defined in nnfw APIs.
-Please see `nnfw_dev.h` for detail.
+Please see `nnfw_experimental.h` for detail.
  
  You can find example in `nnfw` repository.
  
diff --git a/packaging/nnapi_test_generated.tar.gz b/packaging/nnapi_test_generated.tar.gz

index ebbb849..504dbf9 100644 (file)

Binary files a/packaging/nnapi_test_generated.tar.gz and b/packaging/nnapi_test_generated.tar.gz differ
diff --git a/packaging/nnfw.spec b/packaging/nnfw.spec

index ce1cd0b..a1157c7 100644 (file)
--- a/packaging/nnfw.spec
+++ b/packaging/nnfw.spec
@@ -1,6 +1,6 @@
  Name:    nnfw
  Summary: nnfw
-Version: 1.7.0
+Version: 1.8.0
  Release: 1
  Group:   Development
  License: Apache-2.0 and MIT and BSD-2-Clause
@@ -30,7 +30,7 @@ BuildRequires:  flatbuffers-devel
  %ifarch %{arm} aarch64
  # Require python for acl-ex library build pre-process
  BuildRequires:  python
-BuildRequires:  libarmcl-devel
+BuildRequires:  libarmcl-devel >= v20.05
  %endif
  
  Requires(post): /sbin/ldconfig
@@ -62,6 +62,12 @@ Requires: %{name}-devel = %{version}-%{release}
  %description plugin-devel
  NNFW development package for backend plugin developer
  
+%package minimal-app
+Summary: Minimal test binary for VD manual test
+
+%description minimal-app
+Minimal test binary for VD manual test
+
  %if %{test_build} == 1
  %package test
  Summary: NNFW Test
@@ -83,7 +89,7 @@ NNFW test rpm. It does not depends on nnfw rpm since it contains nnfw runtime.
  %define install_dir %{_prefix}
  %define install_path %{buildroot}%{install_dir}
  %define build_env NNFW_WORKSPACE=build
-%define build_options -DCMAKE_BUILD_TYPE=%{build_type} -DTARGET_ARCH=%{target_arch} -DTARGET_OS=tizen -DENABLE_TEST=off
+%define build_options -DCMAKE_BUILD_TYPE=%{build_type} -DTARGET_ARCH=%{target_arch} -DTARGET_OS=tizen -DENABLE_TEST=off -DBUILD_MINIMAL_SAMPLE=on
  
  # Set option for test build (and coverage test build)
  %define test_install_home /opt/usr/nnfw-test
@@ -126,7 +132,7 @@ tar -xf %{SOURCE1005} -C ./externals
  %if %{coverage_build} == 1
  pwd > tests/scripts/build_path.txt
  %endif # coverage_build
-tar -zcf test-suite.tar.gz infra/scripts tests/scripts
+tar -zcf test-suite.tar.gz infra/scripts
  %endif # test_build
  %endif # arm armv7l aarch64
  
@@ -134,8 +140,10 @@ tar -zcf test-suite.tar.gz infra/scripts tests/scripts
  %ifarch arm armv7l aarch64
  
  mkdir -p %{buildroot}%{_libdir}
+mkdir -p %{buildroot}%{_bindir}
  mkdir -p %{buildroot}%{_includedir}
  install -m 644 build/out/lib/*.so %{buildroot}%{_libdir}
+install -m 755 build/out/bin/onert-minimal-app %{buildroot}%{_bindir}
  cp -r build/out/include/* %{buildroot}%{_includedir}/
  
  # For developer
@@ -154,13 +162,14 @@ install -m 0644 ./nnfw-plugin.pc.in %{buildroot}%{_libdir}/pkgconfig/nnfw-plugin
  %if %{test_build} == 1
  %{test_build_env} ./nnfw install
  # Share test script with ubuntu (ignore error if there is no list for target)
-cp tests/nnapi/nnapi_gtest.skip.* %{buildroot}%{test_install_dir}/unittest/.
+cp tests/nnapi/nnapi_gtest.skip.%{target_arch}-* %{buildroot}%{test_install_dir}/unittest/.
  cp %{buildroot}%{test_install_dir}/unittest/nnapi_gtest.skip.%{target_arch}-linux.cpu %{buildroot}%{test_install_dir}/unittest/nnapi_gtest.skip
  tar -zxf test-suite.tar.gz -C %{buildroot}%{test_install_home}
  
  %if %{coverage_build} == 1
  mkdir -p %{buildroot}%{test_install_home}/gcov
  find . -name "*.gcno" -exec xargs cp {} %{buildroot}%{test_install_home}/gcov/. \;
+install -m 0644 ./tests/scripts/build_path.txt %{buildroot}%{test_install_dir}/test/build_path.txt
  %endif # coverage_build
  %endif # test_build
  
@@ -189,11 +198,16 @@ find . -name "*.gcno" -exec xargs cp {} %{buildroot}%{test_install_home}/gcov/.
  %manifest %{name}.manifest
  %defattr(-,root,root,-)
  %ifarch arm armv7l aarch64
-%dir %{_includedir}/nnfw
+%dir %{_includedir}/onert
  %{_includedir}/onert/*
  %{_libdir}/pkgconfig/nnfw-plugin.pc
  %endif
  
+%files minimal-app
+%manifest %{name}.manifest
+%defattr(-,root,root,-)
+%{_bindir}/onert-minimal-app
+
  %if %{test_build} == 1
  %files test
  %manifest %{name}.manifest
diff --git a/res/TensorFlowLiteRecipes/AveragePool2D_U8_000/test.recipe b/res/TensorFlowLiteRecipes/AveragePool2D_U8_000/test.recipe

new file mode 100644 (file)

index 0000000..7322e90
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/AveragePool2D_U8_000/test.recipe
@@ -0,0 +1,26 @@
+operand {
+  name: "ifm"
+  type: UINT8
+  shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+  quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+  name: "ofm"
+  type: UINT8
+  shape { dim: 1 dim: 7 dim: 7 dim: 1 }
+  quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operation {
+  type: "AveragePool2D"
+  averagepool2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+    filter_width: 2
+    filter_height: 2
+  }
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/AveragePool2D_U8_000/test.reverse b/res/TensorFlowLiteRecipes/AveragePool2D_U8_000/test.reverse

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/res/TensorFlowLiteRecipes/DepthwiseConv2D_003/test.recipe b/res/TensorFlowLiteRecipes/DepthwiseConv2D_003/test.recipe

new file mode 100644 (file)

index 0000000..a09afc1
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/DepthwiseConv2D_003/test.recipe
@@ -0,0 +1,44 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 5 dim: 5 }
+}
+operand {
+  name: "ker"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 2 dim: 25 }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 25 }
+  filler {
+    tag: "constant"
+    arg: "1.1"
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 2 dim: 25 }
+}
+operation {
+  type: "DepthwiseConv2D"
+  version: 2
+  depthwiseconv2d_options {
+    padding: VALID
+    stride_w: 2
+    stride_h: 2
+    dilation_w_factor: 2
+    dilation_h_factor: 1
+    depth_multiplier: 5
+    activation : RELU6
+  }
+  input: "ifm"
+  input: "ker"
+  input: "bias"
+  output: "ofm"
+}
+input: "ifm"
+input: "ker"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/DepthwiseConv2D_003/test.reverse b/res/TensorFlowLiteRecipes/DepthwiseConv2D_003/test.reverse

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/res/TensorFlowLiteRecipes/DepthwiseConv2D_003/test.rule b/res/TensorFlowLiteRecipes/DepthwiseConv2D_003/test.rule

new file mode 100644 (file)

index 0000000..edfabc6
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/DepthwiseConv2D_003/test.rule
@@ -0,0 +1,3 @@
+# To check if DEPTHWISE_CONV_2D version is 2
+
+RULE    "OP_VERSION_CHECK"        $(op_version DEPTHWISE_CONV_2D) '=' 2
diff --git a/res/TensorFlowLiteRecipes/DepthwiseConv2D_U8_001/test.recipe b/res/TensorFlowLiteRecipes/DepthwiseConv2D_U8_001/test.recipe

new file mode 100644 (file)

index 0000000..5e0b6b5
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/DepthwiseConv2D_U8_001/test.recipe
@@ -0,0 +1,61 @@
+operand {
+  name: "ifm"
+  type: UINT8
+  shape { dim: 1 dim: 112 dim: 112 dim: 4 }
+  quant { min: 0 max: 6 scale: 0.0235294 zero_point: 0 }
+}
+operand {
+  name: "ker"
+  type: UINT8
+  shape { dim: 1 dim: 3 dim: 3 dim: 4 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+  quant {
+    min: -30.3175 min: -0.779597 min: -10.2751 min: -10.8594
+    max: 4.35049 max: 2.70807 max: 11.0269 max: 20.97
+    scale:0.135953 scale: 0.0136771 scale: 0.0835375 scale: 0.124821
+    zero_point:223 zero_point: 57 zero_point: 123 zero_point: 87
+    quantized_dimension: 3
+  }
+}
+operand {
+  name: "bias"
+  type: INT32
+  shape { dim: 4 }
+       filler {
+         tag: "gaussian"
+         arg: "0"
+         arg: "1.0"
+       }
+  quant {
+    scale: 1.4758e-16 scale: 3.15185e-05 scale: 2.20685e-05 scale: 1.72205e-16
+    zero_point: 0 zero_point: 0 zero_point: 0 zero_point: 0
+  }
+}
+operand {
+  name: "ofm"
+  type: UINT8
+  shape { dim: 1 dim: 112 dim: 112 dim: 4 }
+  quant { min: 0 max: 6 scale: 0.0235294 zero_point: 0 }
+
+}
+operation {
+  type: "DepthwiseConv2D"
+  depthwiseconv2d_options {
+    padding: SAME
+    stride_w: 1
+    stride_h: 1
+    depth_multiplier: 1
+    activation : RELU6
+  }
+  input: "ifm"
+  input: "ker"
+  input: "bias"
+  output: "ofm"
+}
+input: "ifm"
+input: "ker"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/DepthwiseConv2D_U8_001/test.reverse b/res/TensorFlowLiteRecipes/DepthwiseConv2D_U8_001/test.reverse

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_003/test.recipe b/res/TensorFlowLiteRecipes/FullyConnected_003/test.recipe

new file mode 100644 (file)

index 0000000..0ecb561
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/FullyConnected_003/test.recipe
@@ -0,0 +1,55 @@
+operand {
+  name: "in"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 }
+}
+operand {
+  name: "weight"
+  type: FLOAT32
+  shape { dim: 4 dim: 16 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "2" arg: "-3" arg: "-4"
+    arg: "-5" arg: "6" arg: "-7" arg: "8"
+    arg: "4" arg: "-2" arg: "3" arg: "-1"
+    arg: "-8" arg: "-6" arg: "7" arg: "5"
+    arg: "1" arg: "2" arg: "-3" arg: "-4"
+    arg: "-5" arg: "6" arg: "-7" arg: "8"
+    arg: "4" arg: "-2" arg: "3" arg: "-1"
+    arg: "-8" arg: "-6" arg: "7" arg: "5"
+    arg: "1" arg: "2" arg: "-3" arg: "-4"
+    arg: "-5" arg: "6" arg: "-7" arg: "8"
+    arg: "4" arg: "-2" arg: "3" arg: "-1"
+    arg: "-8" arg: "-6" arg: "7" arg: "5"
+    arg: "1" arg: "2" arg: "-3" arg: "-4"
+    arg: "-5" arg: "6" arg: "-7" arg: "8"
+    arg: "4" arg: "-2" arg: "3" arg: "-1"
+    arg: "-8" arg: "-6" arg: "7" arg: "5"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "-2" arg: "-3" arg: "4"
+  }
+}
+operand {
+  name: "out"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 }
+}
+operation {
+  type: "FullyConnected"
+  fullyconnected_options {
+    activation: NONE
+  }
+  input: "in"
+  input: "weight"
+  input: "bias"
+  output: "out"
+}
+input: "in"
+output: "out"
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_003/test.reverse b/res/TensorFlowLiteRecipes/FullyConnected_003/test.reverse

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/res/TensorFlowLiteRecipes/L2Normalize_U8_000/test.recipe b/res/TensorFlowLiteRecipes/L2Normalize_U8_000/test.recipe

new file mode 100644 (file)

index 0000000..3fff5cd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/L2Normalize_U8_000/test.recipe
@@ -0,0 +1,22 @@
+operand {
+  name: "ifm1"
+  type: UINT8
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+  quant { min: 0 max: 2 scale: 0.0078125 zero_point: 128}
+}
+operand {
+  name: "ofm"
+  type: UINT8
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+  quant { min: 0 max: 2 scale: 0.0078125 zero_point: 128}
+}
+operation {
+  type: "L2Normalize"
+  l2norm_options {
+    activation: NONE
+  }
+  input: "ifm1"
+  output: "ofm"
+}
+input: "ifm1"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/L2Normalize_U8_000/test.reverse b/res/TensorFlowLiteRecipes/L2Normalize_U8_000/test.reverse

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/res/TensorFlowLiteRecipes/Logistic_U8_000/test.recipe b/res/TensorFlowLiteRecipes/Logistic_U8_000/test.recipe

new file mode 100644 (file)

index 0000000..7b2a84d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Logistic_U8_000/test.recipe
@@ -0,0 +1,19 @@
+operand {
+  name: "ifm"
+  type: UINT8
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+  quant { min: 0 max: 1 scale: 0.00390625 zero_point: -128 }
+}
+operand {
+  name: "ofm"
+  type: UINT8
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+  quant { min: 0 max: 1 scale: 0.00390625 zero_point: -128 }
+}
+operation {
+  type: "Logistic"
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Logistic_U8_000/test.reverse b/res/TensorFlowLiteRecipes/Logistic_U8_000/test.reverse

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_000/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_BN_000/test.recipe

new file mode 100644 (file)

index 0000000..65248f2
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_000/test.recipe
@@ -0,0 +1,149 @@
+operand {
+  name: "Const_transposed"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 3
+    dim: 3
+    dim: 1
+  }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "0.1"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+}
+operand {
+  name: "FusedBatchNormV3"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 4
+    dim: 4
+    dim: 1
+  }
+  quant {
+    quantized_dimension: 0
+  }
+}
+operand {
+  name: "FusedBatchNormV3_add_param"
+  type: FLOAT32
+  shape {
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "-2.04724"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+}
+operand {
+  name: "FusedBatchNormV3_mul_0"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 4
+    dim: 4
+    dim: 1
+  }
+  quant {
+    quantized_dimension: 0
+  }
+}
+operand {
+  name: "FusedBatchNormV3_mul_0_param"
+  type: FLOAT32
+  shape {
+    dim: 1
+  }
+  filler {
+    tag: "explicit"
+    arg: "2.00834"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+}
+operand {
+  name: "Hole"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 2
+    dim: 2
+    dim: 1
+  }
+  quant {
+    min: 0
+    max: 255
+    quantized_dimension: 0
+  }
+}
+operand {
+  name: "conv2d_transpose"
+  type: FLOAT32
+  shape {
+    dim: 1
+    dim: 4
+    dim: 4
+    dim: 1
+  }
+  quant {
+    quantized_dimension: 0
+  }
+}
+operand {
+  name: "conv2d_transpose/input_sizes"
+  type: INT32
+  shape {
+    dim: 4
+  }
+  filler {
+    tag: "explicit"
+    arg: "1"
+    arg: "4"
+    arg: "4"
+    arg: "1"
+  }
+  quant {
+    quantized_dimension: 0
+  }
+}
+operation {
+  type: "TransposeConv"
+  input: "conv2d_transpose/input_sizes"
+  input: "Const_transposed"
+  input: "Hole"
+  output: "conv2d_transpose"
+  transpose_conv_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+  }
+}
+operation {
+  type: "Mul"
+  input: "conv2d_transpose"
+  input: "FusedBatchNormV3_mul_0_param"
+  output: "FusedBatchNormV3_mul_0"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "FusedBatchNormV3_mul_0"
+  input: "FusedBatchNormV3_add_param"
+  output: "FusedBatchNormV3"
+  add_options {
+    activation: NONE
+  }
+}
+input: "Hole"
+output: "FusedBatchNormV3"
diff --git a/res/TensorFlowLiteRecipes/ResizeBilinear_U8_000/test.recipe b/res/TensorFlowLiteRecipes/ResizeBilinear_U8_000/test.recipe

new file mode 100644 (file)

index 0000000..6ae87b9
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ResizeBilinear_U8_000/test.recipe
@@ -0,0 +1,32 @@
+operand {
+  name: "ifm1"
+  type: UINT8
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+  quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+  name: "size"
+  type: INT32
+  shape { dim: 2 }
+  filler {
+    tag: "constant" arg: "16" arg: "16"
+  }
+}
+operand {
+  name: "ofm"
+  type: UINT8
+  shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+  quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operation {
+  type: "ResizeBilinear"
+  input: "ifm1"
+  input: "size"
+  output: "ofm"
+  resize_bilinear_options {
+    align_corners: false
+    half_pixel_centers: false
+  }
+}
+input: "ifm1"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/ResizeBilinear_U8_000/test.reverse b/res/TensorFlowLiteRecipes/ResizeBilinear_U8_000/test.reverse

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/res/TensorFlowLiteRecipes/SpaceToDepth_U8_000/test.recipe b/res/TensorFlowLiteRecipes/SpaceToDepth_U8_000/test.recipe

new file mode 100644 (file)

index 0000000..ec403dd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/SpaceToDepth_U8_000/test.recipe
@@ -0,0 +1,22 @@
+operand {
+  name: "ifm"
+  type: UINT8
+  shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+  quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+  name: "ofm"
+  type: UINT8
+  shape { dim: 1 dim: 2 dim: 2 dim: 12 }
+  quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operation {
+  type: "SpaceToDepth"
+  space_to_depth_options {
+    block_size: 2
+  }
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/SpaceToDepth_U8_000/test.reverse b/res/TensorFlowLiteRecipes/SpaceToDepth_U8_000/test.reverse

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/res/TensorFlowLiteRecipes/TransposeConv_000/test.recipe b/res/TensorFlowLiteRecipes/TransposeConv_000/test.recipe

index 79271a4..1313e26 100644 (file)
--- a/res/TensorFlowLiteRecipes/TransposeConv_000/test.recipe
+++ b/res/TensorFlowLiteRecipes/TransposeConv_000/test.recipe
@@ -10,7 +10,7 @@ operand {
  operand {
    name: "ker"
    type: FLOAT32
-  shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+  shape { dim: 3 dim: 1 dim: 1 dim: 3 }
    filler {
      tag: "gaussian"
      arg: "0.0"
diff --git a/res/TensorFlowLiteRecipes/TransposeConv_001/test.recipe b/res/TensorFlowLiteRecipes/TransposeConv_001/test.recipe

new file mode 100644 (file)

index 0000000..ad76100
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/TransposeConv_001/test.recipe
@@ -0,0 +1,45 @@
+operand {
+  name: "out_shape"
+  type: INT32
+  shape { dim: 4 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "4" arg: "4" arg: "1" 
+  }
+}
+operand {
+  name: "ker"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+  filler {
+    tag: "explicit"
+    arg: "1" arg: "2" arg: "-3" arg: "-4" arg: "5" arg: "-6"
+    arg: "7" arg: "8" arg: "-9" arg: "-10" arg: "11" arg: "-12"
+    arg: "13" arg: "14" arg: "-15" arg: "-16" arg: "17" arg: "-18"
+  }
+}
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 4 dim: 4 dim: 1 }
+}
+
+operation {
+  type: "TransposeConv"
+  transpose_conv_options {
+    padding: SAME
+    stride_w: 1
+    stride_h: 1
+  }
+  input: "out_shape"
+  input: "ker"
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/TransposeConv_001/test.reverse b/res/TensorFlowLiteRecipes/TransposeConv_001/test.reverse

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/res/TensorFlowLiteRecipes/Unique_000/test.recipe b/res/TensorFlowLiteRecipes/Unique_000/test.recipe

new file mode 100644 (file)

index 0000000..887380c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Unique_000/test.recipe
@@ -0,0 +1,27 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 4 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { }
+}
+operand {
+  name: "ofm_idx"
+  type: INT32
+  shape { dim: 4 }
+}
+operation {
+  type: "Unique"
+  unique_options {
+    idx_out_type: INT32
+  }
+  input: "ifm"
+  output: "ofm"
+  output: "ofm_idx"
+}
+input: "ifm"
+output: "ofm"
+output: "ofm_idx"
diff --git a/res/TensorFlowLiteRecipes/Unique_000/test.reverse b/res/TensorFlowLiteRecipes/Unique_000/test.reverse

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/res/TensorFlowLiteRecipes/Unique_001/test.recipe b/res/TensorFlowLiteRecipes/Unique_001/test.recipe

new file mode 100644 (file)

index 0000000..9beb516
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Unique_001/test.recipe
@@ -0,0 +1,27 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 4 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { }
+}
+operand {
+  name: "ofm_idx"
+  type: INT64
+  shape { dim: 4 }
+}
+operation {
+  type: "Unique"
+  unique_options {
+    idx_out_type: INT64
+  }
+  input: "ifm"
+  output: "ofm"
+  output: "ofm_idx"
+}
+input: "ifm"
+output: "ofm"
+output: "ofm_idx"
diff --git a/res/TensorFlowLiteRecipes/Unique_001/test.reverse b/res/TensorFlowLiteRecipes/Unique_001/test.reverse

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/res/TensorFlowLiteRecipes/Unique_002/test.recipe b/res/TensorFlowLiteRecipes/Unique_002/test.recipe

new file mode 100644 (file)

index 0000000..67b947f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Unique_002/test.recipe
@@ -0,0 +1,27 @@
+operand {
+  name: "ifm"
+  type: INT32
+  shape { dim: 5 }
+}
+operand {
+  name: "ofm"
+  type: INT32
+  shape { }
+}
+operand {
+  name: "ofm_idx"
+  type: INT32
+  shape { dim: 5 }
+}
+operation {
+  type: "Unique"
+  unique_options {
+    idx_out_type: INT32
+  }
+  input: "ifm"
+  output: "ofm"
+  output: "ofm_idx"
+}
+input: "ifm"
+output: "ofm"
+output: "ofm_idx"
diff --git a/res/TensorFlowLiteRecipes/Unique_002/test.reverse b/res/TensorFlowLiteRecipes/Unique_002/test.reverse

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/res/TensorFlowLiteRecipes/Unique_003/test.recipe b/res/TensorFlowLiteRecipes/Unique_003/test.recipe

new file mode 100644 (file)

index 0000000..375db66
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Unique_003/test.recipe
@@ -0,0 +1,27 @@
+operand {
+  name: "ifm"
+  type: INT32
+  shape { dim: 5 }
+}
+operand {
+  name: "ofm"
+  type: INT32
+  shape { }
+}
+operand {
+  name: "ofm_idx"
+  type: INT64
+  shape { dim: 5 }
+}
+operation {
+  type: "Unique"
+  unique_options {
+    idx_out_type: INT64
+  }
+  input: "ifm"
+  output: "ofm"
+  output: "ofm_idx"
+}
+input: "ifm"
+output: "ofm"
+output: "ofm_idx"
diff --git a/res/TensorFlowLiteRecipes/Unique_003/test.reverse b/res/TensorFlowLiteRecipes/Unique_003/test.reverse

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/res/TensorFlowLiteRecipes/Unique_U8_000/test.recipe b/res/TensorFlowLiteRecipes/Unique_U8_000/test.recipe

new file mode 100644 (file)

index 0000000..d3985e4
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Unique_U8_000/test.recipe
@@ -0,0 +1,28 @@
+operand {
+  name: "ifm"
+  type: UINT8
+  shape { dim: 4 }
+  quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+  name: "ofm"
+  type: UINT8
+  shape { }
+}
+operand {
+  name: "ofm_idx"
+  type: INT32
+  shape { dim: 4 }
+}
+operation {
+  type: "Unique"
+  unique_options {
+    idx_out_type: INT32
+  }
+  input: "ifm"
+  output: "ofm"
+  output: "ofm_idx"
+}
+input: "ifm"
+output: "ofm"
+output: "ofm_idx"
diff --git a/res/TensorFlowLiteRecipes/Unique_U8_000/test.reverse b/res/TensorFlowLiteRecipes/Unique_U8_000/test.reverse

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/res/TensorFlowLiteRecipes/Unique_U8_001/test.recipe b/res/TensorFlowLiteRecipes/Unique_U8_001/test.recipe

new file mode 100644 (file)

index 0000000..b08dd85
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Unique_U8_001/test.recipe
@@ -0,0 +1,28 @@
+operand {
+  name: "ifm"
+  type: UINT8
+  shape { dim: 5 }
+  quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operand {
+  name: "ofm"
+  type: UINT8
+  shape { }
+}
+operand {
+  name: "ofm_idx"
+  type: INT64
+  shape { dim: 5 }
+}
+operation {
+  type: "Unique"
+  unique_options {
+    idx_out_type: INT64
+  }
+  input: "ifm"
+  output: "ofm"
+  output: "ofm_idx"
+}
+input: "ifm"
+output: "ofm"
+output: "ofm_idx"
diff --git a/res/TensorFlowLiteRecipes/Unique_U8_001/test.reverse b/res/TensorFlowLiteRecipes/Unique_U8_001/test.reverse

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/res/TensorFlowLiteSchema/2.3.0/schema.fbs b/res/TensorFlowLiteSchema/2.3.0/schema.fbs

new file mode 100644 (file)

index 0000000..b7f41c7
--- /dev/null
+++ b/res/TensorFlowLiteSchema/2.3.0/schema.fbs
@@ -0,0 +1,1094 @@
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+// Version 0: Initial version.
+// Version 1: Add subgraphs to schema.
+// Version 2: Rename operators to conform to NN API.
+// Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers.
+
+namespace tflite;
+
+// This corresponds to the version.
+file_identifier "TFL3";
+// File extension of any written files.
+file_extension "tflite";
+
+// IMPORTANT: All new members of tables, enums and unions must be added at the
+// end to ensure backwards compatibility.
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+  FLOAT32 = 0,
+  FLOAT16 = 1,
+  INT32 = 2,
+  UINT8 = 3,
+  INT64 = 4,
+  STRING = 5,
+  BOOL = 6,
+  INT16 = 7,
+  COMPLEX64 = 8,
+  INT8 = 9,
+  FLOAT64 = 10,
+}
+
+// Custom quantization parameters for experimenting with new quantization
+// techniques.
+table CustomQuantization {
+  custom:[ubyte] (force_align: 16);
+}
+
+// Represents a specific quantization technique's parameters.
+union QuantizationDetails {
+  CustomQuantization,
+}
+
+// Parameters for converting a quantized tensor back to float.
+table QuantizationParameters {
+  // These four parameters are the asymmetric linear quantization parameters.
+  // Given a quantized value q, the corresponding float value f should be:
+  //   f = scale * (q - zero_point)
+  // For other quantization types, the QuantizationDetails below is used.
+  min:[float];  // For importing back into tensorflow.
+  max:[float];  // For importing back into tensorflow.
+  scale:[float];  // For dequantizing the tensor's values.
+  zero_point:[long];
+
+  // If this is not none, the other quantization parameters (i.e. min, max,
+  // scale, zero_point fields above) are ignored and the value of the
+  // QuantizationDetails union should be used.
+  details:QuantizationDetails;
+
+  // Specifies the dimension of the Tensor's shape that the scales and
+  // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
+  // with quantization params:
+  //   scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1
+  // will be quantized across the second dimension of t.
+  //   t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
+  //   t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
+  //   t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
+  quantized_dimension:int;
+}
+
+// Sparse tensors.
+// We use a modification of the TACO format.
+// Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf
+//
+// To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1),
+// potentially with a k-dimensional block (0 <= k <= n) with dims
+// (dn, ..., dn+k-1), the format needs to specify:
+//   1. In what order to traverse these dimensions. For example, to store a 2-D
+//      matrix in row major order, the traversal order would be (d0, d1),
+//      whereas to store it in column major order, the traversal order would be
+//      (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order
+//      could be (d0, d1, d2, d3).
+//   2. How each block dimension in (dn, ..., dn+k-1) maps to the original
+//      tensor dimension in (d0, ..., dn-1).
+//   3. In the traversal order defined above, the format (dense vs. sparse) and
+//      index metadata for each dimension. For a dense dimension, this is just
+//      the size of that dimension. For a sparse dimension, it's the same as
+//      the compressed index defined in the Compressed Sparse Row (CSR) format.
+//      (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html)
+
+// The storage type for a dimension. Currently we support:
+//   1. DENSE: each coordinate in this dimension is stored implicitly.
+//   2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The
+//      compression technique is the same what CSR uses.
+// More types like a sparse dimension with a different compression technique
+// could be added to the list in the future.
+enum DimensionType : byte {
+  DENSE = 0,
+  SPARSE_CSR = 1,
+}
+
+table Int32Vector {
+  values:[int];
+}
+
+table Uint16Vector {
+  values:[ushort] (force_align: 4);
+}
+
+table Uint8Vector {
+  values:[ubyte] (force_align: 4);
+}
+
+// Variable-typed buffer to store the index metadata for a sparse dimension.
+// The widest type is Int32 instead of UInt32 because tensor's shape is a int32
+// vector. We don't want the per-dimensional index to overflow that range.
+union SparseIndexVector {
+  Int32Vector,
+  Uint16Vector,
+  Uint8Vector
+}
+
+table DimensionMetadata {
+  // Whether a dimension is dense or sparse.
+  format:DimensionType;
+  // Index metadata used for a dimension.
+  //   - If format is DimensionType.DENSE then we use the dense_size field to
+  //     store the size of that dimension. Each index in that dimension is
+  //     stored implicitly.
+  //   - If format is DimensionType.SPARSE_CSR then we use array_segments and
+  //     array_indices to encode that dimension. array_segments represents how
+  //     to segment the indices array, each segment corresponds to one element
+  //     in the previous dimension. array_indices represents the index of the
+  //     non-zero elements within this dimension (as those in the CSR matrix
+  //     format, where the first array is row pointers and the second array is
+  //     column indices).
+  dense_size:int;
+  array_segments:SparseIndexVector;
+  array_indices:SparseIndexVector;
+}
+
+// Parameters to encode a sparse TfLite tensor.
+table SparsityParameters {
+  // The traversal order of the dimensions defined in the `shape` field of the
+  // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1,
+  // ..., dn-1),
+  //   - if not block sparse, the traversal_order is just a permutation of (d0,
+  //     ..., dn-1). For example, a 2-D matrix stored in row-major order would
+  //     have traversal_order = (d0, d1).
+  //   - if block sparse with a k-dimensional block (0 <= k <= n), the
+  //     traversal_order has n + k elements. The first n elements are still a
+  //     permutation of (d0, ..., dn-1). The lask k elements are a permutation
+  //     of (dn, ..., dn+k-1), defining how to traverse a block internally. For
+  //     example, a 2-D matrix with 2-D blocks, both stored in row-major order
+  //     would have traversal_order = (d0, d1, d2, d3).
+  traversal_order:[int];
+  // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n),
+  // stores how a block dimension in (dn, ..., dn+k-1) maps to the original
+  // tensor dimension in (d0, ..., dn).
+  // It's stored in the order of (dn, ..., dn+k-1).
+  // If not block-sparse, this field is NULL.
+  block_map:[int];
+  // In the traversal order defined above, the metadata needed for
+  // each dimension to locate the non-zero values in the original dense tensor.
+  // The size of the dim_metadata array = the size of the traversal_order array
+  // = n + k.
+  dim_metadata:[DimensionMetadata];
+}
+
+table Tensor {
+  // The tensor shape. The meaning of each entry is operator-specific but
+  // builtin ops use: [batch size, height, width, number of channels] (That's
+  // Tensorflow's NHWC).
+  shape:[int];
+  type:TensorType;
+  // An index that refers to the buffers table at the root of the model. Or,
+  // if there is no data buffer associated (i.e. intermediate results), then
+  // this is 0 (which refers to an always existent empty buffer).
+  //
+  // The data_buffer itself is an opaque container, with the assumption that the
+  // target device is little-endian. In addition, all builtin operators assume
+  // the memory is ordered such that if `shape` is [4, 3, 2], then index
+  // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
+  buffer:uint;
+  name:string;  // For debugging and importing back into tensorflow.
+  quantization:QuantizationParameters;  // Optional.
+
+  is_variable:bool = false;
+
+  // Parameters to encode a sparse tensor. See the example in
+  // tensorflow/lite/testdata/sparse_tensor.json.
+  sparsity:SparsityParameters;  // Optional.
+
+  // Encodes `shape` with unknown dimensions. Unknown dimensions are
+  // represented with -1.
+  shape_signature:[int]; // Optional.
+}
+
+// A list of builtin operators. Builtin operators are slightly faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+
+enum BuiltinOperator : byte {
+  ADD = 0,
+  AVERAGE_POOL_2D = 1,
+  CONCATENATION = 2,
+  CONV_2D = 3,
+  DEPTHWISE_CONV_2D = 4,
+  DEPTH_TO_SPACE = 5,
+  DEQUANTIZE = 6,
+  EMBEDDING_LOOKUP = 7,
+  FLOOR = 8,
+  FULLY_CONNECTED = 9,
+  HASHTABLE_LOOKUP = 10,
+  L2_NORMALIZATION = 11,
+  L2_POOL_2D = 12,
+  LOCAL_RESPONSE_NORMALIZATION = 13,
+  LOGISTIC = 14,
+  LSH_PROJECTION = 15,
+  LSTM = 16,
+  MAX_POOL_2D = 17,
+  MUL = 18,
+  RELU = 19,
+  // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
+  // since different model developers use RELU1 in different ways. Never
+  // create another op called RELU1.
+  RELU_N1_TO_1 = 20,
+  RELU6 = 21,
+  RESHAPE = 22,
+  RESIZE_BILINEAR = 23,
+  RNN = 24,
+  SOFTMAX = 25,
+  SPACE_TO_DEPTH = 26,
+  SVDF = 27,
+  TANH = 28,
+  // TODO(aselle): Consider rename to CONCATENATE_EMBEDDINGS
+  CONCAT_EMBEDDINGS = 29,
+  SKIP_GRAM = 30,
+  CALL = 31,
+  CUSTOM = 32,
+  EMBEDDING_LOOKUP_SPARSE = 33,
+  PAD = 34,
+  UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+  GATHER = 36,
+  BATCH_TO_SPACE_ND = 37,
+  SPACE_TO_BATCH_ND = 38,
+  TRANSPOSE = 39,
+  MEAN = 40,
+  SUB = 41,
+  DIV = 42,
+  SQUEEZE = 43,
+  UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+  STRIDED_SLICE = 45,
+  BIDIRECTIONAL_SEQUENCE_RNN = 46,
+  EXP = 47,
+  TOPK_V2 = 48,
+  SPLIT = 49,
+  LOG_SOFTMAX = 50,
+  // DELEGATE is a special op type for the operations which are delegated to
+  // other backends.
+  // WARNING: Experimental interface, subject to change
+  DELEGATE = 51,
+  BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+  CAST = 53,
+  PRELU = 54,
+  MAXIMUM = 55,
+  ARG_MAX = 56,
+  MINIMUM = 57,
+  LESS = 58,
+  NEG = 59,
+  PADV2 = 60,
+  GREATER = 61,
+  GREATER_EQUAL = 62,
+  LESS_EQUAL = 63,
+  SELECT = 64,
+  SLICE = 65,
+  SIN = 66,
+  TRANSPOSE_CONV = 67,
+  SPARSE_TO_DENSE = 68,
+  TILE = 69,
+  EXPAND_DIMS = 70,
+  EQUAL = 71,
+  NOT_EQUAL = 72,
+  LOG = 73,
+  SUM = 74,
+  SQRT = 75,
+  RSQRT = 76,
+  SHAPE = 77,
+  POW = 78,
+  ARG_MIN = 79,
+  FAKE_QUANT = 80,
+  REDUCE_PROD = 81,
+  REDUCE_MAX = 82,
+  PACK = 83,
+  LOGICAL_OR = 84,
+  ONE_HOT = 85,
+  LOGICAL_AND = 86,
+  LOGICAL_NOT = 87,
+  UNPACK = 88,
+  REDUCE_MIN = 89,
+  FLOOR_DIV = 90,
+  REDUCE_ANY = 91,
+  SQUARE = 92,
+  ZEROS_LIKE = 93,
+  FILL = 94,
+  FLOOR_MOD = 95,
+  RANGE = 96,
+  RESIZE_NEAREST_NEIGHBOR = 97,
+  LEAKY_RELU = 98,
+  SQUARED_DIFFERENCE = 99,
+  MIRROR_PAD = 100,
+  ABS = 101,
+  SPLIT_V = 102,
+  UNIQUE = 103,
+  CEIL = 104,
+  REVERSE_V2 = 105,
+  ADD_N = 106,
+  GATHER_ND = 107,
+  COS = 108,
+  WHERE = 109,
+  RANK = 110,
+  ELU = 111,
+  REVERSE_SEQUENCE = 112,
+  MATRIX_DIAG = 113,
+  QUANTIZE = 114,
+  MATRIX_SET_DIAG = 115,
+  ROUND = 116,
+  HARD_SWISH = 117,
+  IF = 118,
+  WHILE = 119,
+  NON_MAX_SUPPRESSION_V4 = 120,
+  NON_MAX_SUPPRESSION_V5 = 121,
+  SCATTER_ND = 122,
+  SELECT_V2 = 123,
+  DENSIFY = 124,
+  SEGMENT_SUM = 125,
+  BATCH_MATMUL = 126
+}
+
+
+// Options for the builtin operators.
+union BuiltinOptions {
+  Conv2DOptions,
+  DepthwiseConv2DOptions,
+  ConcatEmbeddingsOptions,
+  LSHProjectionOptions,
+  Pool2DOptions,
+  SVDFOptions,
+  RNNOptions,
+  FullyConnectedOptions,
+  SoftmaxOptions,
+  ConcatenationOptions,
+  AddOptions,
+  L2NormOptions,
+  LocalResponseNormalizationOptions,
+  LSTMOptions,
+  ResizeBilinearOptions,
+  CallOptions,
+  ReshapeOptions,
+  SkipGramOptions,
+  SpaceToDepthOptions,
+  EmbeddingLookupSparseOptions,
+  MulOptions,
+  PadOptions,
+  GatherOptions,
+  BatchToSpaceNDOptions,
+  SpaceToBatchNDOptions,
+  TransposeOptions,
+  ReducerOptions,
+  SubOptions,
+  DivOptions,
+  SqueezeOptions,
+  SequenceRNNOptions,
+  StridedSliceOptions,
+  ExpOptions,
+  TopKV2Options,
+  SplitOptions,
+  LogSoftmaxOptions,
+  CastOptions,
+  DequantizeOptions,
+  MaximumMinimumOptions,
+  ArgMaxOptions,
+  LessOptions,
+  NegOptions,
+  PadV2Options,
+  GreaterOptions,
+  GreaterEqualOptions,
+  LessEqualOptions,
+  SelectOptions,
+  SliceOptions,
+  TransposeConvOptions,
+  SparseToDenseOptions,
+  TileOptions,
+  ExpandDimsOptions,
+  EqualOptions,
+  NotEqualOptions,
+  ShapeOptions,
+  PowOptions,
+  ArgMinOptions,
+  FakeQuantOptions,
+  PackOptions,
+  LogicalOrOptions,
+  OneHotOptions,
+  LogicalAndOptions,
+  LogicalNotOptions,
+  UnpackOptions,
+  FloorDivOptions,
+  SquareOptions,
+  ZerosLikeOptions,
+  FillOptions,
+  BidirectionalSequenceLSTMOptions,
+  BidirectionalSequenceRNNOptions,
+  UnidirectionalSequenceLSTMOptions,
+  FloorModOptions,
+  RangeOptions,
+  ResizeNearestNeighborOptions,
+  LeakyReluOptions,
+  SquaredDifferenceOptions,
+  MirrorPadOptions,
+  AbsOptions,
+  SplitVOptions,
+  UniqueOptions,
+  ReverseV2Options,
+  AddNOptions,
+  GatherNdOptions,
+  CosOptions,
+  WhereOptions,
+  RankOptions,
+  ReverseSequenceOptions,
+  MatrixDiagOptions,
+  QuantizeOptions,
+  MatrixSetDiagOptions,
+  HardSwishOptions,
+  IfOptions,
+  WhileOptions,
+  DepthToSpaceOptions,
+  NonMaxSuppressionV4Options,
+  NonMaxSuppressionV5Options,
+  ScatterNdOptions,
+  SelectV2Options,
+  DensifyOptions,
+  SegmentSumOptions,
+  BatchMatMulOptions
+}
+
+enum Padding : byte { SAME, VALID }
+
+enum ActivationFunctionType : byte {
+  NONE = 0,
+  RELU = 1,
+  RELU_N1_TO_1 = 2,
+  RELU6 = 3,
+  TANH = 4,
+  SIGN_BIT = 5,
+}
+
+table Conv2DOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  fused_activation_function:ActivationFunctionType;
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+table Pool2DOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  filter_width:int;
+  filter_height:int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+  // Parameters for DepthwiseConv version 1 or above.
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  // `depth_multiplier` is redundant. It's used by CPU kernels in
+  // TensorFlow 2.0 or below, but ignored in versions above.
+  // See comments in lite/c/builtin_op_data.h for more details.
+  depth_multiplier:int;
+  fused_activation_function:ActivationFunctionType;
+  // Parameters for DepthwiseConv version 2 or above.
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+table ConcatEmbeddingsOptions {
+  num_channels:int;
+  num_columns_per_channel:[int];
+  embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+  UNKNOWN = 0,
+  SPARSE = 1,
+  DENSE = 2,
+}
+
+table LSHProjectionOptions {
+  type: LSHProjectionType;
+}
+
+table SVDFOptions {
+  rank:int;
+  fused_activation_function:ActivationFunctionType;
+  // For weights-only quantization, use asymmetric quantization for non
+  // constant inputs at evaluation time.
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+  fused_activation_function:ActivationFunctionType;
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with RNNCell.
+table SequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
+table BidirectionalSequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+  merge_outputs: bool;
+  asymmetric_quantize_inputs:bool;
+}
+
+enum FullyConnectedOptionsWeightsFormat: byte {
+  DEFAULT = 0,
+  SHUFFLED4x16INT8 = 1,
+}
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+  // Parameters for FullyConnected version 1 or above.
+  fused_activation_function:ActivationFunctionType;
+
+  // Parameters for FullyConnected version 2 or above.
+  weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
+
+  // Parameters for FullyConnected version 5 or above.
+  // If set to true, then the number of dimension is preserved. Furthermore,
+  // all but the last dimension of the input and output shapes will be equal.
+  keep_num_dims: bool;
+
+  // Parameters for FullyConnected version 7 or above.
+  // If set to true, then weights-only op will use asymmetric quantization for
+  // inputs.
+  asymmetric_quantize_inputs: bool;
+}
+
+table SoftmaxOptions {
+  beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+  axis:int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table MulOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+  radius:int;
+  bias:float;
+  alpha:float;
+  beta:float;
+}
+
+enum LSTMKernelType : byte {
+  // Full LSTM kernel which supports peephole and projection.
+  FULL = 0,
+  // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
+  BASIC = 1,
+}
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+  // Parameters for LSTM version 1 or above.
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // Parameters for LSTM version 2 or above.
+  // Basic kernel is only supported in version 2 or above.
+  kernel_type: LSTMKernelType = FULL;
+
+  // Parameters for LSTM version 4 or above.
+  asymmetric_quantize_inputs: bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true then first dimension is sequence, otherwise batch.
+  time_major:bool;
+
+  // Parameter for Unidirectional Sequence LSTM version 4.
+  asymmetric_quantize_inputs:bool;
+}
+
+table BidirectionalSequenceLSTMOptions {
+  // Parameters supported by version 1:
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true, store the outputs of both directions into the first output.
+  merge_outputs: bool;
+
+  // Parameters supported by version 2:
+  // If true then first dimension is sequence, otherwise batch.
+  // Version 1 implementations assumed time_major to be true, so this default
+  // value should never change.
+  time_major: bool = true;
+
+  // Parameters for version 3 or above.
+  asymmetric_quantize_inputs:bool;
+}
+
+table ResizeBilinearOptions {
+  new_height: int (deprecated);
+  new_width: int (deprecated);
+  align_corners: bool;
+  half_pixel_centers: bool;
+}
+
+table ResizeNearestNeighborOptions {
+  align_corners: bool;
+  half_pixel_centers: bool;
+}
+
+// A call operation options
+table CallOptions {
+  // The subgraph index that needs to be called.
+  subgraph:uint;
+}
+
+table PadOptions {
+}
+
+table PadV2Options {
+}
+
+table ReshapeOptions {
+  new_shape:[int];
+}
+
+table SpaceToBatchNDOptions {
+}
+
+table BatchToSpaceNDOptions {
+}
+
+table SkipGramOptions {
+  ngram_size: int;
+  max_skip_size: int;
+  include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+  block_size: int;
+}
+
+table DepthToSpaceOptions {
+  block_size: int;
+}
+
+table SubOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table DivOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table TopKV2Options {
+}
+
+enum CombinerType : byte {
+  SUM = 0,
+  MEAN = 1,
+  SQRTN = 2,
+}
+
+table EmbeddingLookupSparseOptions {
+  combiner:CombinerType;
+}
+
+table GatherOptions {
+  axis: int;
+}
+
+table TransposeOptions {
+}
+
+table ExpOptions {
+}
+
+table CosOptions {
+}
+
+table ReducerOptions {
+  keep_dims: bool;
+}
+
+table SqueezeOptions {
+  squeeze_dims:[int];
+}
+
+table SplitOptions {
+  num_splits: int;
+}
+
+table SplitVOptions {
+  num_splits: int;
+}
+
+table StridedSliceOptions {
+  begin_mask: int;
+  end_mask: int;
+  ellipsis_mask: int;
+  new_axis_mask: int;
+  shrink_axis_mask: int;
+}
+
+table LogSoftmaxOptions {
+}
+
+table CastOptions {
+  in_data_type: TensorType;
+  out_data_type: TensorType;
+}
+
+table DequantizeOptions {
+}
+
+table MaximumMinimumOptions {
+}
+
+table TileOptions {
+}
+
+table ArgMaxOptions {
+  output_type : TensorType;
+}
+
+table ArgMinOptions {
+  output_type : TensorType;
+}
+
+table GreaterOptions {
+}
+
+table GreaterEqualOptions {
+}
+
+table LessOptions {
+}
+
+table LessEqualOptions {
+}
+
+table NegOptions {
+}
+
+table SelectOptions {
+}
+
+table SliceOptions {
+}
+
+table TransposeConvOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+}
+
+table ExpandDimsOptions {
+}
+
+table SparseToDenseOptions {
+  validate_indices:bool;
+}
+
+table EqualOptions {
+}
+
+table NotEqualOptions {
+}
+
+table ShapeOptions {
+  // Optional output type of the operation (int32 or int64). Defaults to int32.
+  out_type : TensorType;
+}
+
+table RankOptions {
+}
+
+table PowOptions {
+}
+
+table FakeQuantOptions {
+  // Parameters supported by version 1:
+  min:float;
+  max:float;
+  num_bits:int;
+
+  // Parameters supported by version 2:
+  narrow_range:bool;
+}
+
+table PackOptions {
+  values_count:int;
+  axis:int;
+}
+
+table LogicalOrOptions {
+}
+
+table OneHotOptions {
+  axis:int;
+}
+
+table AbsOptions {
+}
+
+
+table HardSwishOptions {
+}
+
+table LogicalAndOptions {
+}
+
+table LogicalNotOptions {
+}
+
+table UnpackOptions {
+  num:int;
+  axis:int;
+}
+
+table FloorDivOptions {
+}
+
+table SquareOptions {
+}
+
+table ZerosLikeOptions {
+}
+
+table FillOptions {
+}
+
+table FloorModOptions {
+}
+
+table RangeOptions {
+}
+
+table LeakyReluOptions {
+  alpha:float;
+}
+
+table SquaredDifferenceOptions {
+}
+
+enum MirrorPadMode : byte {
+  // Doesn't include borders.
+  REFLECT = 0,
+  // Includes borders.
+  SYMMETRIC = 1,
+}
+
+table MirrorPadOptions {
+  mode:MirrorPadMode;
+}
+
+table UniqueOptions {
+  idx_out_type:TensorType = INT32;
+}
+
+table ReverseV2Options {
+}
+
+table AddNOptions {
+}
+
+table GatherNdOptions {
+}
+
+table WhereOptions {
+}
+
+table ReverseSequenceOptions {
+  seq_dim:int;
+  batch_dim:int = 0;
+}
+
+table MatrixDiagOptions {
+}
+
+table QuantizeOptions {
+}
+
+table MatrixSetDiagOptions {
+}
+
+table IfOptions {
+  then_subgraph_index:int;
+  else_subgraph_index:int;
+}
+
+table WhileOptions {
+  cond_subgraph_index:int;
+  body_subgraph_index:int;
+}
+
+table NonMaxSuppressionV4Options {
+}
+
+table NonMaxSuppressionV5Options {
+}
+
+table ScatterNdOptions {
+}
+
+table SelectV2Options {
+}
+
+table DensifyOptions {
+}
+
+table SegmentSumOptions {
+}
+
+table BatchMatMulOptions {
+  adj_x:bool;
+  adj_y:bool;
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+  builtin_code:BuiltinOperator;
+  custom_code:string;
+
+  // The version of the operator. The version need to be bumped whenever new
+  // parameters are introduced into an op.
+  version:int = 1;
+}
+
+enum CustomOptionsFormat : byte {
+  FLEXBUFFERS = 0,
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+  // Index into the operator_codes array. Using an integer here avoids
+  // complicate map lookups.
+  opcode_index:uint;
+
+  // Optional input are indicated by -1.
+  inputs:[int];
+  outputs:[int];
+
+  builtin_options:BuiltinOptions;
+  custom_options:[ubyte];
+  custom_options_format:CustomOptionsFormat;
+
+  // A list of booleans indicating the input tensors which are being mutated by
+  // this operator.(e.g. used by RNN and LSTM).
+  // For example, if the "inputs" array refers to 5 tensors and the second and
+  // fifth are mutable variables, then this list will contain
+  // [false, true, false, false, true].
+  //
+  // If the list is empty, no variable is mutated in this operator.
+  // The list either has the same length as `inputs`, or is empty.
+  mutating_variable_inputs:[bool];
+
+  // A list of indices to the subgraph's "tensors" that are internal to an Op.
+  // Internal tensors are those that do not flow in or out of the operation,
+  // but instead are part of internal computation. As such, the operation's
+  // implementation may manage its memory more efficiently. They are needed
+  // however (i.e. not just an implementation detail) since they are part of the
+  // computation, which may require relevant metadata such as quantization
+  // parameters.
+  intermediates:[int];
+}
+
+// The root type, defining a subgraph, which typically represents an entire
+// model.
+table SubGraph {
+  // A list of all tensors used in this subgraph.
+  tensors:[Tensor];
+
+  // Indices of the tensors that are inputs into this subgraph. Note this is
+  // the list of non-static tensors that feed into the subgraph for inference.
+  inputs:[int];
+
+  // Indices of the tensors that are outputs out of this subgraph. Note this is
+  // the list of output tensors that are considered the product of the
+  // subgraph's inference.
+  outputs:[int];
+
+  // All operators, in execution order.
+  operators:[Operator];
+
+  // Name of this subgraph (used for debugging).
+  name:string;
+}
+
+// Table of raw data buffers (used for constant tensors). Referenced by tensors
+// by index. The generous alignment accommodates mmap-friendly data structures.
+table Buffer {
+  data:[ubyte] (force_align: 16);
+}
+
+table Metadata {
+  // A human readable string to uniquely identify a Metadata.
+  name:string;
+  // An index to the buffers table.
+  buffer:uint;
+}
+
+table Model {
+  // Version of the schema.
+  version:uint;
+
+  // A list of all operator codes used in this model. This is
+  // kept in order because operators carry an index into this
+  // vector.
+  operator_codes:[OperatorCode];
+
+  // All the subgraphs of the model. The 0th is assumed to be the main
+  // model.
+  subgraphs:[SubGraph];
+
+  // A description of the model.
+  description:string;
+
+  // Buffers of the model.
+  // Note the 0th entry of this array must be an empty buffer (sentinel).
+  // This is a convention so that tensors without a buffer can provide 0 as
+  // their buffer.
+  buffers:[Buffer];
+
+  // Metadata about the model. Indirects into the existings buffers list.
+  // Deprecated, prefer to use metadata field.
+  metadata_buffer:[int];
+
+  // Metadata about the model.
+  metadata:[Metadata];
+}
+
+root_type Model;
diff --git a/res/TensorFlowLiteSchema/SCHEMA.lst b/res/TensorFlowLiteSchema/SCHEMA.lst

index 2044e2e..73dfacd 100644 (file)
--- a/res/TensorFlowLiteSchema/SCHEMA.lst
+++ b/res/TensorFlowLiteSchema/SCHEMA.lst
@@ -5,3 +5,4 @@ VERSION,URL
  2.1.0,https://raw.githubusercontent.com/tensorflow/tensorflow/v2.1.0/tensorflow/lite/schema/schema.fbs
  2.2.0,https://raw.githubusercontent.com/tensorflow/tensorflow/v2.2.0/tensorflow/lite/schema/schema.fbs
  2.3.0-rc0,https://raw.githubusercontent.com/tensorflow/tensorflow/v2.3.0-rc0/tensorflow/lite/schema/schema.fbs
+2.3.0,https://raw.githubusercontent.com/tensorflow/tensorflow/v2.3.0/tensorflow/lite/schema/schema.fbs
diff --git a/res/TensorFlowPythonExamples/examples/while_2/__init__.py b/res/TensorFlowPythonExamples/examples/while_2/__init__.py

new file mode 100644 (file)

index 0000000..af1c745
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/while_2/__init__.py
@@ -0,0 +1,32 @@
+import tensorflow as tf
+
+i = tf.constant(0, shape=[1, 0], dtype=tf.int32, name='i')
+x = tf.compat.v1.placeholder(shape=[1, 1], dtype=tf.int32, name='Hole')
+
+c = lambda i: tf.compat.v1.less(tf.compat.v1.size(i[0]), 10)
+b = lambda i: tf.concat([i, x], axis=1)
+
+# this loop changs i's shape from [1, 0] -> [1, 1] -> [1, 2] -> ... -> [1, 10]
+r = tf.compat.v1.while_loop(
+    c, b, [i], name="While", shape_invariants=[tf.TensorShape([1, None])])
+
+output = tf.compat.v1.identity(r, name="Output")
+
+# by adding the following code, [[1 1 1 1 1 1 1 1 1 1]] and (1, 10) will be printed
+#
+# import numpy as np
+# x_val = np.array([[1]])
+# with tf.Session() as sess:
+#   result = sess.run(r, feed_dict={x:x_val})
+#   print(result)
+#   print(result.shape)
+
+# with TF 2.3, tf2tflite throws the following error
+#
+# Exception: venv/tf-2.3/lib/python3.6/site-packages/tensorflow/python/eager/lift_to_graph.py:339:0:
+# error: body function result type tensor<1x1xi32> is incompatible with result type tensor<1x0xi32>
+# at index 0
+# ...
+# note: see current operation: %1:2 = "tf.While"(%0, %arg0)
+# {body = @_functionalize_body_00, cond = @_functionalize_cond_00, device = "", is_stateless = false, output_shapes = [], parallel_iterations = 10 : i64}
+# : (tensor<1x0xi32>, tensor<1x1xi32>) -> (tensor<1x0xi32>, tensor<1x1xi32>)
diff --git a/res/TensorFlowPythonExamples/examples/while_3/__init__.py b/res/TensorFlowPythonExamples/examples/while_3/__init__.py

new file mode 100644 (file)

index 0000000..840846e
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/while_3/__init__.py
@@ -0,0 +1,33 @@
+import tensorflow as tf
+
+x = tf.compat.v1.placeholder(shape=[1, None], dtype=tf.int32, name='Hole')
+i = tf.compat.v1.placeholder(shape=[1, None], dtype=tf.int32, name='Hole_2')
+
+
+def c(ii):
+    rs = tf.compat.v1.shape(ii)
+    r1 = rs[1]
+    return tf.compat.v1.less(r1, 10)
+
+
+def b(ii):
+    return tf.concat([ii, x], axis=1)
+
+
+# this loop changes i's shape from [1, 0] -> [1, 1] -> [1, 2] -> ... -> [1, 10]
+r = tf.compat.v1.while_loop(
+    c, b, [i], name="While", shape_invariants=[tf.TensorShape([1, None])])
+
+output = tf.compat.v1.identity(r, name="Output")
+
+# by adding the following code, [[123 1 2 3 1 2 3 1 2 3]] and (1, 10) will be printed
+#
+'''
+import numpy as np
+i_val = np.array([[123]], dtype=np.int32)
+x_val = np.array([[1, 2, 3]], dtype=np.int32)
+with tf.compat.v1.Session() as sess:
+  result = sess.run(r, feed_dict={x:x_val, i:i_val})
+  print(result)
+  print(result.shape)
+'''
diff --git a/res/TensorFlowPythonModels/examples/tconv-bn/__init__.py b/res/TensorFlowPythonModels/examples/tconv-bn/__init__.py

new file mode 100644 (file)

index 0000000..ae034e8
--- /dev/null
+++ b/res/TensorFlowPythonModels/examples/tconv-bn/__init__.py
@@ -0,0 +1,27 @@
+import tensorflow as tf
+import numpy as np
+
+input_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 2, 2, 1), name="Hole")
+W = np.ones(9).reshape((3, 3, 1, 1))
+filter_ = tf.compat.v1.constant(W, dtype=tf.float32)
+tconv_ = tf.compat.v1.nn.conv2d_transpose(
+    input_, filter_, output_shape=(1, 4, 4, 1), strides=[1, 1, 1, 1], padding='VALID')
+
+scale_ = tf.compat.v1.constant([1.0177339315414429], dtype=tf.float32)
+offset_ = tf.compat.v1.constant([0.015628524124622345], dtype=tf.float32)
+mean_ = tf.compat.v1.constant([1.027155211195349693], dtype=tf.float32)
+variance_ = tf.compat.v1.constant([0.25580066442489624], dtype=tf.float32)
+bn_out, _, _ = tf.compat.v1.nn.fused_batch_norm(
+    tconv_,
+    scale_,
+    offset_,
+    mean=mean_,
+    variance=variance_,
+    epsilon=0.0010000000474974513,
+    is_training=False)
+'''
+python ../../compiler/tf2tfliteV2/tf2tfliteV2.py --v1 \
+-i tconv-bn.pbtxt \
+-o tconv-bn.tflite \
+-I Hole -O FusedBatchNorm
+'''
diff --git a/runtime/contrib/android/api/build.gradle b/runtime/contrib/android/api/build.gradle

index 70eb802..5c17043 100644 (file)
--- a/runtime/contrib/android/api/build.gradle
+++ b/runtime/contrib/android/api/build.gradle
@@ -8,7 +8,7 @@ android {
          minSdkVersion 26
          targetSdkVersion 29
          versionCode 1
-        versionName "1.7.0"
+        versionName "1.8.0"
  
          externalNativeBuild {
              ndkBuild {
diff --git a/runtime/contrib/android_benchmark_app/README.md b/runtime/contrib/android_benchmark_app/README.md

index ce165cd..19640e3 100644 (file)
--- a/runtime/contrib/android_benchmark_app/README.md
+++ b/runtime/contrib/android_benchmark_app/README.md
@@ -28,7 +28,7 @@ make TARGET_OS=android \
      EXT_ACL_FOLDER=/home/hanjoung/ws/temp/arm_compute-v19.05-bin-android/lib/android-arm64-v8a-neon-cl \
      ANDROID_BUILD_TOOLS_DIR=/home/hanjoung/ws/android-tools/sdk/build-tools/27.0.3/ \
      ANDROID_SDK_DIR=/home/hanjoung/ws/android-tools/sdk \
-    TFLITE_MODEL_PATH=/Users/hanjoung/ws/ghent/STAR/nnfw/tests/scripts/framework/cache/MODELS/mobilenet/mobilenet_v1_0.25_128.tflite \
+    TFLITE_MODEL_PATH=/Users/hanjoung/ws/ghent/STAR/nnfw/tests/scripts/models/cache/MODELS/mobilenet/mobilenet_v1_0.25_128.tflite \
      ANDROID_BOOST_ROOT=/home/hanjoung/ws/gh/moritz-wundke/Boost-for-Android/build/out/arm64-v8a
  ```
  
diff --git a/runtime/libs/benchmark/CMakeLists.txt b/runtime/libs/benchmark/CMakeLists.txt

index 2af0ffa..748b2d1 100644 (file)
--- a/runtime/libs/benchmark/CMakeLists.txt
+++ b/runtime/libs/benchmark/CMakeLists.txt
@@ -1,6 +1,5 @@
  file(GLOB_RECURSE SOURCES "src/*.cpp")
  
-add_library(nnfw_lib_benchmark SHARED ${SOURCES})
+add_library(nnfw_lib_benchmark STATIC ${SOURCES})
  target_include_directories(nnfw_lib_benchmark PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
  target_link_libraries(nnfw_lib_benchmark PRIVATE ${LIB_PTHREAD})
-install(TARGETS nnfw_lib_benchmark DESTINATION lib)
diff --git a/runtime/libs/benchmark/src/Result.cpp b/runtime/libs/benchmark/src/Result.cpp

index 7a3f9a5..df573da 100644 (file)
--- a/runtime/libs/benchmark/src/Result.cpp
+++ b/runtime/libs/benchmark/src/Result.cpp
@@ -166,7 +166,7 @@ Result::Result(const Phases &phases)
    if (option.memory)
    {
      print_memory = true;
-    for (int i = PhaseEnum::MODEL_LOAD; i <= PhaseEnum::EXECUTE; ++i)
+    for (int i = PhaseEnum::MODEL_LOAD; i < PhaseEnum::EXECUTE; ++i)
      {
        auto phase = phases.at(gPhaseStrings[i]);
        for (int j = MemoryType::RSS; j <= MemoryType::PSS; ++j)
diff --git a/runtime/libs/misc/include/misc/polymorphic_downcast.h b/runtime/libs/misc/include/misc/polymorphic_downcast.h

index ee885eb..412b864 100644 (file)
--- a/runtime/libs/misc/include/misc/polymorphic_downcast.h
+++ b/runtime/libs/misc/include/misc/polymorphic_downcast.h
@@ -27,7 +27,9 @@ namespace misc
  
  template <typename DstType, typename SrcType> inline DstType polymorphic_downcast(SrcType *x)
  {
+#ifndef __ANDROID__
    assert(dynamic_cast<DstType>(x) == x);
+#endif
    return static_cast<DstType>(x);
  }
  
diff --git a/runtime/nnapi-header/include/NeuralNetworksEx.h b/runtime/nnapi-header/include/NeuralNetworksEx.h

index 87f0e30..d15262e 100644 (file)
--- a/runtime/nnapi-header/include/NeuralNetworksEx.h
+++ b/runtime/nnapi-header/include/NeuralNetworksEx.h
@@ -558,7 +558,26 @@ typedef enum {
     * Outputs:
     * * 0: The sum, a tensor of the same type as input0.
     */
-  ANEURALNETWORKS_ADDV2_EX = 50039
+  ANEURALNETWORKS_ADDV2_EX = 50039,
+
+  ANEURALNETWORKS_STATELESS_RANDOM_UNIFORM_EX = 50040,
+
+  /** Splits a tensor value into a list of sub tensors.
+   *
+   * Supported tensor {@link OperandCode}:
+   * * {@link ANEURALNETWORKS_TENSOR_FLOAT32, ANEURALNETWORKS_TENSOR_INT32}
+   *
+   * Supported tensor rank: up to 4
+   *
+   * Inputs:
+   * * 0: A tensor to split.
+   * * 1: A tensor containing the sizes of each output tensor along split_dim
+   * * 2: The dimension along which to split
+   *
+   * Outputs:
+   * * 0: Tensor objects resulting from splitting value.
+   */
+  ANEURALNETWORKS_SPLIT_V_EX = 50041
  
  } OperationCodeEx; // extends OperationCode
  
diff --git a/runtime/onert/api/CMakeLists.txt b/runtime/onert/api/CMakeLists.txt

index 0cce338..49a5aa0 100644 (file)
--- a/runtime/onert/api/CMakeLists.txt
+++ b/runtime/onert/api/CMakeLists.txt
@@ -4,9 +4,9 @@ set(ONERT_DEV nnfw-dev)
  add_library(${ONERT_DEV} SHARED ${API_SRC})
  
  # Public headers to publish
-# nnfw_debug.h is header for runtime developer, so it will not be installed
-# But runtime developer can use nnfw_debug.h by linking nnfw-dev
-set(NNFW_API_HEADERS include/nnfw.h include/nnfw_dev.h)
+# nnfw_internal.h is header for runtime developer, so it will not be installed
+# But runtime developer can use nnfw_internal.h by linking nnfw-dev
+set(NNFW_API_HEADERS include/nnfw.h include/nnfw_experimental.h)
  
  target_link_libraries(${ONERT_DEV} PUBLIC nnfw-nnapi-header)
  target_link_libraries(${ONERT_DEV} PUBLIC onert_core)
diff --git a/runtime/onert/api/include/nnfw.h b/runtime/onert/api/include/nnfw.h

index 031aabd..ef3678b 100644 (file)
--- a/runtime/onert/api/include/nnfw.h
+++ b/runtime/onert/api/include/nnfw.h
@@ -99,6 +99,10 @@ typedef enum {
    NNFW_STATUS_ERROR = 1,
    /** Unexpected null argument is given. */
    NNFW_STATUS_UNEXPECTED_NULL = 2,
+  /** When a function was called but it is not valid for the current session state. */
+  NNFW_STATUS_INVALID_STATE = 3,
+  /** When it is out of memory */
+  NNFW_STATUS_OUT_OF_MEMORY = 4,
  } NNFW_STATUS;
  
  /**
@@ -432,10 +436,10 @@ NNFW_STATUS nnfw_output_tensorinfo(nnfw_session *session, uint32_t index,
   *
   * <p>Supported backends differs on each platforms.
   * For example, `x86_64` supports "cpu" only.
- * Can set multiple backends by semicolon (ex: "acl_cl;cpu").
- * Among the multiple backends, the 1st element is used as default backend.</p>
- *
- * @note      Possible backend strings are: "cpu", "acl_cl", "acl_neon", "srcn"
+ * Multiple backends can be set and they must be separated by a semicolon (ex: "acl_cl;cpu").
+ * For each backend string, `libbackend_{backend}.so` will be dynamically loaded during
+ * {@link nnfw_prepare}.
+ * Among the multiple backends, the 1st element is used as the default backend.</p>
   *
   * @param[in] session session to which avilable backends are set
   * @param[in] backends available backends on which nnfw uses
@@ -449,12 +453,10 @@ NNFW_STATUS nnfw_set_available_backends(nnfw_session *session, const char *backe
   *
   * This function should be called before {@link nnfw_prepare} is invoked.
   *
- * <p>Supported backends differs on each platforms.
- * For example, `x86_64` supports "cpu" only.
- * The backend for op has higher priority than available backends specified by
- * nnfw_set_available_backends.</p>
+ * <p>The backend for op has higher priority than available backends specified by
+ * {@link nnfw_set_available_backends}.</p>
   *
- * @note      Possible backend strings are: "cpu", "acl_cl", "acl_neon"
+ * @deprecated Deprecated since 1.8.0.
   *
   * @param[in] session session to be modified
   * @param[in] op operation to be set
diff --git a/runtime/onert/api/include/nnfw_dev.h b/runtime/onert/api/include/nnfw_experimental.h

similarity index 94%

rename from runtime/onert/api/include/nnfw_dev.h

rename to runtime/onert/api/include/nnfw_experimental.h

index ecf0597..4cd5c58 100644 (file)
--- a/runtime/onert/api/include/nnfw_dev.h
+++ b/runtime/onert/api/include/nnfw_experimental.h
@@ -14,8 +14,8 @@
   * limitations under the License.
   */
  
-#ifndef __NNFW_DEV_H__
-#define __NNFW_DEV_H__
+#ifndef __NNFW_EXPERIMENTAL_H__
+#define __NNFW_EXPERIMENTAL_H__
  
  #include "nnfw.h"
  
@@ -62,4 +62,4 @@ typedef struct
  NNFW_STATUS nnfw_register_custom_op_info(nnfw_session *session, const char *id,
                                           custom_kernel_registration_info *info);
  
-#endif // __NNFW_DEV_H__
+#endif // __NNFW_EXPERIMENTAL_H__
diff --git a/runtime/onert/api/include/nnfw_debug.h b/runtime/onert/api/include/nnfw_internal.h

similarity index 67%

rename from runtime/onert/api/include/nnfw_debug.h

rename to runtime/onert/api/include/nnfw_internal.h

index 7af06a2..eb4b6d6 100644 (file)
--- a/runtime/onert/api/include/nnfw_debug.h
+++ b/runtime/onert/api/include/nnfw_internal.h
@@ -14,8 +14,8 @@
   * limitations under the License.
   */
  
-#ifndef __NNFW_DEBUG_H__
-#define __NNFW_DEBUG_H__
+#ifndef __NNFW_INTERNAL_H__
+#define __NNFW_INTERNAL_H__
  
  #include "nnfw.h"
  
@@ -23,4 +23,16 @@ NNFW_STATUS nnfw_set_config(nnfw_session *session, const char *key, const char *
  
  NNFW_STATUS nnfw_get_config(nnfw_session *session, const char *key, char *value, size_t value_size);
  
-#endif // __NNFW_DEBUG_H__
+/**
+ * @brief Load a circle model from buffer.
+ *
+ * The buffer must outlive the session.
+ *
+ * @param[in] session session
+ * @param[in] buffer  Pointer to the buffer
+ * @param[in] size    Buffer size
+ * @return NNFW_STATUS
+ */
+NNFW_STATUS nnfw_load_circle_from_buffer(nnfw_session *session, uint8_t *buffer, size_t size);
+
+#endif // __NNFW_INTERNAL_H__
diff --git a/runtime/onert/api/include/nnfw_version.h b/runtime/onert/api/include/nnfw_version.h

index d787870..320271a 100644 (file)
--- a/runtime/onert/api/include/nnfw_version.h
+++ b/runtime/onert/api/include/nnfw_version.h
@@ -21,6 +21,6 @@
   * NNFW_VERSION is a uint32 value representing nnfw runtime version
   * in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch
   */
-#define NNFW_VERSION 0x01000700
+#define NNFW_VERSION 0x01000800
  
  #endif // __NNFW_VERSION_H__
diff --git a/runtime/onert/api/src/CustomKernel.h b/runtime/onert/api/src/CustomKernel.h

index b4fec87..a42f7a6 100644 (file)
--- a/runtime/onert/api/src/CustomKernel.h
+++ b/runtime/onert/api/src/CustomKernel.h
@@ -17,7 +17,7 @@
  #ifndef __ONERT_BACKEND_CUSTOM_KERNEL_H__
  #define __ONERT_BACKEND_CUSTOM_KERNEL_H__
  
-#include "nnfw_dev.h"
+#include "nnfw_experimental.h"
  
  #include "backend/CustomKernelBuilder.h"
  #include "exec/IFunction.h"
diff --git a/runtime/onert/api/src/nnfw_api.cc b/runtime/onert/api/src/nnfw_api.cc

index 0747583..d65158f 100644 (file)
--- a/runtime/onert/api/src/nnfw_api.cc
+++ b/runtime/onert/api/src/nnfw_api.cc
@@ -31,6 +31,8 @@ STATIC_ASSERT_ENUM_CHECK(NNFW_TYPE_TENSOR_INT64, 5);
  STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_NO_ERROR, 0);
  STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_ERROR, 1);
  STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_UNEXPECTED_NULL, 2);
+STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_INVALID_STATE, 3);
+STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_OUT_OF_MEMORY, 4);
  
  STATIC_ASSERT_ENUM_CHECK(NNFW_LAYOUT_NONE, 0);
  STATIC_ASSERT_ENUM_CHECK(NNFW_LAYOUT_CHANNELS_LAST, 1);
@@ -57,8 +59,9 @@ NNFW_STATUS nnfw_create_session(nnfw_session **session)
  {
    NNFW_RETURN_ERROR_IF_NULL(session);
  
-  *session = new nnfw_session();
-
+  *session = new (std::nothrow) nnfw_session();
+  if (*session == nullptr)
+    return NNFW_STATUS_OUT_OF_MEMORY;
    return NNFW_STATUS_NO_ERROR;
  }
  
@@ -338,3 +341,9 @@ NNFW_STATUS nnfw_query_info_u32(nnfw_session *session, NNFW_INFO_ID id, uint32_t
    // It should not be reached.
    return NNFW_STATUS_ERROR;
  }
+
+NNFW_STATUS nnfw_load_circle_from_buffer(nnfw_session *session, uint8_t *buffer, size_t size)
+{
+  NNFW_RETURN_ERROR_IF_NULL(session);
+  return session->load_circle_from_buffer(buffer, size);
+}
diff --git a/runtime/onert/api/src/nnfw_api_internal.cc b/runtime/onert/api/src/nnfw_api_internal.cc

index d03ddd4..eb0b743 100644 (file)
--- a/runtime/onert/api/src/nnfw_api_internal.cc
+++ b/runtime/onert/api/src/nnfw_api_internal.cc
@@ -73,15 +73,33 @@ nnfw_session::nnfw_session()
  
  nnfw_session::~nnfw_session() = default;
  
-NNFW_STATUS nnfw_session::load_model_from_file(const char *package_dir)
+NNFW_STATUS nnfw_session::load_circle_from_buffer(uint8_t *buffer, size_t size)
  {
    if (!isStateInitialized())
+    return NNFW_STATUS_INVALID_STATE;
+
+  if (!buffer)
+    return NNFW_STATUS_UNEXPECTED_NULL;
+
+  if (size == 0)
      return NNFW_STATUS_ERROR;
  
+  _subgraphs = onert::circle_loader::loadModel(buffer, size);
+  _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs);
+
+  _state = State::MODEL_LOADED;
+  return NNFW_STATUS_NO_ERROR;
+}
+
+NNFW_STATUS nnfw_session::load_model_from_file(const char *package_dir)
+{
+  if (!isStateInitialized())
+    return NNFW_STATUS_INVALID_STATE;
+
    if (!package_dir)
    {
      std::cerr << "package_dir is null." << std::endl;
-    return NNFW_STATUS_ERROR;
+    return NNFW_STATUS_UNEXPECTED_NULL;
    }
  
    if (!null_terminating(package_dir, MAX_PATH_LENGTH))
@@ -156,7 +174,7 @@ NNFW_STATUS nnfw_session::prepare()
        std::cerr << "invalid state";
      }
      std::cerr << std::endl;
-    return NNFW_STATUS_ERROR;
+    return NNFW_STATUS_INVALID_STATE;
    }
  
    if (!_subgraphs || !primary_subgraph() || primary_subgraph()->isBuildingPhase())
@@ -188,7 +206,7 @@ NNFW_STATUS nnfw_session::run()
    {
      std::cerr << "Error during nnfw_session::run : "
                << "run should be run after prepare" << std::endl;
-    return NNFW_STATUS_ERROR;
+    return NNFW_STATUS_INVALID_STATE;
    }
  
    try
@@ -211,7 +229,7 @@ NNFW_STATUS nnfw_session::run_async()
    {
      std::cerr << "Error during nnfw_session::run_async : "
                << "run_async should be run after prepare" << std::endl;
-    return NNFW_STATUS_ERROR;
+    return NNFW_STATUS_INVALID_STATE;
    }
  
    _execution->startExecute();
@@ -241,7 +259,7 @@ NNFW_STATUS nnfw_session::set_input(uint32_t index, NNFW_TYPE /*type*/, const vo
    if (!isStatePreparedOrFinishedRun())
    {
      std::cerr << "Error during nnfw_session::set_input : invalid state" << std::endl;
-    return NNFW_STATUS_ERROR;
+    return NNFW_STATUS_INVALID_STATE;
    }
  
    if (!buffer && length != 0)
@@ -270,7 +288,7 @@ NNFW_STATUS nnfw_session::set_output(uint32_t index, NNFW_TYPE /*type*/, void *b
    if (!isStatePreparedOrFinishedRun())
    {
      std::cerr << "Error during nnfw_session::set_output : invalid state" << std::endl;
-    return NNFW_STATUS_ERROR;
+    return NNFW_STATUS_INVALID_STATE;
    }
  
    if (!buffer && length != 0)
@@ -296,14 +314,14 @@ NNFW_STATUS nnfw_session::set_output(uint32_t index, NNFW_TYPE /*type*/, void *b
  NNFW_STATUS nnfw_session::input_size(uint32_t *number)
  {
    if (isStateInitialized()) // Model is not loaded
-    return NNFW_STATUS_ERROR;
+    return NNFW_STATUS_INVALID_STATE;
  
    try
    {
      if (number == nullptr)
      {
        std::cerr << "Error during nnfw_session::input_size, number is null pointer." << std::endl;
-      return NNFW_STATUS_ERROR;
+      return NNFW_STATUS_UNEXPECTED_NULL;
      }
      *number = primary_subgraph()->getInputs().size();
    }
@@ -318,14 +336,14 @@ NNFW_STATUS nnfw_session::input_size(uint32_t *number)
  NNFW_STATUS nnfw_session::output_size(uint32_t *number)
  {
    if (isStateInitialized()) // Model is not loaded
-    return NNFW_STATUS_ERROR;
+    return NNFW_STATUS_INVALID_STATE;
  
    try
    {
      if (number == nullptr)
      {
        std::cerr << "Error during nnfw_session::output_size, number is null pointer." << std::endl;
-      return NNFW_STATUS_ERROR;
+      return NNFW_STATUS_UNEXPECTED_NULL;
      }
      *number = primary_subgraph()->getOutputs().size();
    }
@@ -410,7 +428,7 @@ NNFW_STATUS nnfw_session::apply_tensorinfo(uint32_t index, nnfw_tensorinfo ti)
      {
        std::cerr << "Error during set_input_tensorinfo : should be run after load_model"
                  << std::endl;
-      return NNFW_STATUS_ERROR;
+      return NNFW_STATUS_INVALID_STATE;
      }
  
      if (ti.rank <= 0 || ti.rank > NNFW_MAX_RANK)
@@ -463,13 +481,16 @@ NNFW_STATUS nnfw_session::set_input_tensorinfo(uint32_t index, const nnfw_tensor
  
  NNFW_STATUS nnfw_session::input_tensorinfo(uint32_t index, nnfw_tensorinfo *ti)
  {
+  if (isStateInitialized())
+    return NNFW_STATUS_INVALID_STATE;
+
    try
    {
      if (ti == nullptr)
      {
        std::cerr << "Error during nnfw_session::input_tensorinfo, tensorinfo is null pointer."
                  << std::endl;
-      return NNFW_STATUS_ERROR;
+      return NNFW_STATUS_UNEXPECTED_NULL;
      }
      if (index >= primary_subgraph()->getInputs().size())
      {
@@ -499,13 +520,13 @@ NNFW_STATUS nnfw_session::input_tensorinfo(uint32_t index, nnfw_tensorinfo *ti)
  NNFW_STATUS nnfw_session::output_tensorinfo(uint32_t index, nnfw_tensorinfo *ti)
  {
    if (isStateInitialized())
-    return NNFW_STATUS_ERROR;
+    return NNFW_STATUS_INVALID_STATE;
  
    if (ti == nullptr)
    {
      std::cerr << "Error during nnfw_session::output_tensorinfo, tensorinfo is null pointer."
                << std::endl;
-    return NNFW_STATUS_ERROR;
+    return NNFW_STATUS_UNEXPECTED_NULL;
    }
  
    if (index >= primary_subgraph()->getOutputs().size())
@@ -570,14 +591,14 @@ static std::string get_op_backend_string(std::string op)
  NNFW_STATUS nnfw_session::set_available_backends(const char *backends)
  {
    if (!isStateModelLoaded())
-    return NNFW_STATUS_ERROR;
+    return NNFW_STATUS_INVALID_STATE;
  
    try
    {
-    if (!backends || null_terminating(backends, MAX_BACKEND_NAME_LENGTH) == false)
-    {
+    if (!backends)
+      return NNFW_STATUS_UNEXPECTED_NULL;
+    if (null_terminating(backends, MAX_BACKEND_NAME_LENGTH) == false)
        return NNFW_STATUS_ERROR;
-    }
  
      auto &options = _compiler->options();
  
@@ -596,15 +617,15 @@ NNFW_STATUS nnfw_session::set_available_backends(const char *backends)
  NNFW_STATUS nnfw_session::set_op_backend(const char *op, const char *backend)
  {
    if (!isStateModelLoaded())
-    return NNFW_STATUS_ERROR;
+    return NNFW_STATUS_INVALID_STATE;
  
    try
    {
-    if (!op || !null_terminating(op, MAX_OP_NAME_LENGTH) || !backend ||
+    if (!op || !backend)
+      return NNFW_STATUS_UNEXPECTED_NULL;
+    if (!null_terminating(op, MAX_OP_NAME_LENGTH) ||
          !null_terminating(backend, MAX_BACKEND_NAME_LENGTH))
-    {
        return NNFW_STATUS_ERROR;
-    }
  
      auto key = get_op_backend_string(op);
  
@@ -627,7 +648,10 @@ NNFW_STATUS nnfw_session::set_op_backend(const char *op, const char *backend)
  NNFW_STATUS nnfw_session::set_config(const char *key, const char *value)
  {
    if (!isStateModelLoaded())
-    return NNFW_STATUS_ERROR;
+    return NNFW_STATUS_INVALID_STATE;
+
+  if (!key || !value)
+    return NNFW_STATUS_UNEXPECTED_NULL;
  
    auto &options = _compiler->options();
  
@@ -693,7 +717,10 @@ onert::ir::Graph *nnfw_session::primary_subgraph()
  NNFW_STATUS nnfw_session::get_config(const char *key, char *value, size_t value_size)
  {
    if (!isStateModelLoaded())
-    return NNFW_STATUS_ERROR;
+    return NNFW_STATUS_INVALID_STATE;
+
+  if (!key || !value)
+    return NNFW_STATUS_UNEXPECTED_NULL;
  
    auto &options = _compiler->options();
  
diff --git a/runtime/onert/api/src/nnfw_api_internal.h b/runtime/onert/api/src/nnfw_api_internal.h

index 1154f04..1c3c370 100644 (file)
--- a/runtime/onert/api/src/nnfw_api_internal.h
+++ b/runtime/onert/api/src/nnfw_api_internal.h
@@ -18,7 +18,7 @@
  #define __API_NNFW_API_INTERNAL_H__
  
  #include "nnfw.h"
-#include "nnfw_dev.h"
+#include "nnfw_experimental.h"
  
  #include <util/GeneralConfigSource.h>
  
@@ -127,9 +127,15 @@ public:
    NNFW_STATUS set_available_backends(const char *backends);
    NNFW_STATUS set_op_backend(const char *op, const char *backend);
  
+  //
+  // Internal-only API
+  //
+
    NNFW_STATUS set_config(const char *key, const char *value);
    NNFW_STATUS get_config(const char *key, char *value, size_t value_size);
  
+  NNFW_STATUS load_circle_from_buffer(uint8_t *buffer, size_t size);
+
  private:
    onert::ir::Graph *primary_subgraph();
    bool isStateInitialized();
diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.cc b/runtime/onert/backend/acl_cl/KernelGenerator.cc

index 3ca4058..a84f983 100644 (file)
--- a/runtime/onert/backend/acl_cl/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_cl/KernelGenerator.cc
@@ -31,6 +31,7 @@
  #include "exec/FunctionSequence.h"
  #include "util/logging.h"
  #include "util/Utils.h"
+#include "AclKernelGen.h"
  
  namespace onert
  {
@@ -76,15 +77,15 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
    const auto block_size_index{
        node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  auto block_size_alloc = _tensor_builder->at(block_size_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto block_size_tensor = _tensor_builder->at(block_size_index).get();
  
    assert(_ctx.at(block_size_index).data());
  
    auto fn = std::make_unique<::arm_compute::CLBatchToSpaceLayer>();
  
-  fn->configure(ifm_alloc->handle(), block_size_alloc->handle(), ofm_alloc->handle());
+  fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -96,15 +97,27 @@ void KernelGenerator::visit(const ir::operation::Cast &node)
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  const auto input_sub_type = _ctx.at(ifm_index).typeInfo().type() == ir::DataType::BOOL8
-                                  ? arm_compute::SubDataType::BOOL
-                                  : arm_compute::SubDataType::NONE;
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
  
-  auto fn = std::make_unique<::arm_compute::CLCast>();
+  std::unique_ptr<::arm_compute::IFunction> fn;
+  if (ifm_tensor->data_type() == ofm_tensor->data_type())
+  {
+    auto l = std::make_unique<::arm_compute::CLCopy>();
+
+    l->configure(ifm_tensor->handle(), ofm_tensor->handle());
+
+    fn = std::move(l);
+  }
+  else
+  {
+    auto l = std::make_unique<::arm_compute::CLCast>();
  
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), input_sub_type);
+    // TODO Support converting float to int32 as round down
+    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
+
+    fn = std::move(l);
+  }
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -132,10 +145,10 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
                                              ker_width, ker_height);
    const auto activation = node.param().activation;
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  auto ker_alloc = _tensor_builder->at(ker_index).get();
-  auto bias_alloc = _tensor_builder->at(bias_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ker_tensor = _tensor_builder->at(ker_index).get();
+  auto bias_tensor = _tensor_builder->at(bias_index).get();
  
    const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
    const auto act_info = acl_common::asActivationLayerInfo(activation);
@@ -143,8 +156,9 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
    auto fn = std::make_unique<::arm_compute::CLConvolutionLayer>(
        _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
  
-  fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), ofm_alloc->handle(),
-                conv_info, ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
+  fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
+                ofm_tensor->handle(), conv_info, ::arm_compute::WeightsInfo(),
+                ::arm_compute::Size2D(1U, 1U), act_info);
  
    _return_fn = asAclClFunction(std::move(fn));
  }
@@ -171,10 +185,10 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
    const auto multiplier = node.param().multiplier;
    const auto activation = node.param().activation;
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  auto ker_alloc = _tensor_builder->at(ker_index).get();
-  auto bias_alloc = _tensor_builder->at(bias_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ker_tensor = _tensor_builder->at(ker_index).get();
+  auto bias_tensor = _tensor_builder->at(bias_index).get();
  
    const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
    const auto act_info = acl_common::asActivationLayerInfo(activation);
@@ -182,8 +196,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
    {
      auto fn = std::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
  
-    fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
-                  ofm_alloc->handle(), conv_info, multiplier, act_info);
+    fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
+                  ofm_tensor->handle(), conv_info, multiplier, act_info);
  
      _return_fn = asAclClFunction(std::move(fn));
    }
@@ -191,88 +205,28 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
  
  void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
  {
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::MaxPool2D::Input::INPUT)};
-
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
+      node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::MAX);
  
-  const auto kh = node.param().kh;
-  const auto kw = node.param().kw;
-  const auto stride = node.param().stride;
-  const auto padding =
-      ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+  const auto ofm_index{node.getOutputs().at(0)};
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
    const auto activation = node.param().activation;
-
-  VERBOSE(MaxPool2D) << "IFM_H: " << ifm_shape.H << std::endl;
-  VERBOSE(MaxPool2D) << "IFM_W: " << ifm_shape.W << std::endl;
-  VERBOSE(MaxPool2D) << "OFM_H: " << ofm_shape.H << std::endl;
-  VERBOSE(MaxPool2D) << "OFM_W: " << ofm_shape.W << std::endl;
-  VERBOSE(MaxPool2D) << "KER_H: " << kh << std::endl;
-  VERBOSE(MaxPool2D) << "KER_W: " << kw << std::endl;
-  VERBOSE(MaxPool2D) << "STRIDE_H: " << stride.vertical << std::endl;
-  VERBOSE(MaxPool2D) << "STRIDE_W: " << stride.horizontal << std::endl;
-  VERBOSE(MaxPool2D) << "PAD(T): " << padding.top << std::endl;
-  VERBOSE(MaxPool2D) << "PAD(B): " << padding.bottom << std::endl;
-  VERBOSE(MaxPool2D) << "PAD(L): " << padding.left << std::endl;
-  VERBOSE(MaxPool2D) << "PAD(R): " << padding.right << std::endl;
-
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
-  ::arm_compute::PoolingLayerInfo info{::arm_compute::PoolingType::MAX,
-                                       ::arm_compute::Size2D{kw, kh},
-                                       acl_common::asPadStrideInfo(padding, stride)};
-
-  auto fn = std::make_unique<::arm_compute::CLPoolingLayer>();
-
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
    _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+      asAclClFunction(std::move(raw_fn)),
+      ActivationBuilder::generate(activation, ofm_tensor->handle()));
  }
  
  void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
  {
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)};
-
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
+      node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::AVG);
  
-  const auto kh = node.param().kh;
-  const auto kw = node.param().kw;
-  const auto stride = node.param().stride;
-  const auto padding =
-      ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+  const auto ofm_index{node.getOutputs().at(0)};
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
    const auto activation = node.param().activation;
-
-  VERBOSE(AvgPool2D) << "IFM_H: " << ifm_shape.H << std::endl;
-  VERBOSE(AvgPool2D) << "IFM_W: " << ifm_shape.W << std::endl;
-  VERBOSE(AvgPool2D) << "OFM_H: " << ofm_shape.H << std::endl;
-  VERBOSE(AvgPool2D) << "OFM_W: " << ofm_shape.W << std::endl;
-  VERBOSE(AvgPool2D) << "KER_H: " << kh << std::endl;
-  VERBOSE(AvgPool2D) << "KER_W: " << kw << std::endl;
-  VERBOSE(AvgPool2D) << "STRIDE_H: " << stride.vertical << std::endl;
-  VERBOSE(AvgPool2D) << "STRIDE_W: " << stride.horizontal << std::endl;
-  VERBOSE(AvgPool2D) << "PAD(T): " << padding.top << std::endl;
-  VERBOSE(AvgPool2D) << "PAD(B): " << padding.bottom << std::endl;
-  VERBOSE(AvgPool2D) << "PAD(L): " << padding.left << std::endl;
-  VERBOSE(AvgPool2D) << "PAD(R): " << padding.right << std::endl;
-
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
-  ::arm_compute::PoolingLayerInfo info{
-      ::arm_compute::PoolingType::AVG, ::arm_compute::Size2D{kw, kh},
-      acl_common::asPadStrideInfo(padding, stride), true /* exclude_padding */};
-
-  auto fn = std::make_unique<::arm_compute::CLPoolingLayer>();
-
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
    _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+      asAclClFunction(std::move(raw_fn)),
+      ActivationBuilder::generate(activation, ofm_tensor->handle()));
  }
  
  void KernelGenerator::visit(const ir::operation::Concat &node)
@@ -296,7 +250,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
      return;
    }
  
-  auto output_alloc = _tensor_builder->at(ofm_index).get();
+  auto output_tensor = _tensor_builder->at(ofm_index).get();
    std::vector<::arm_compute::ICLTensor *> input_tensors;
    for (auto &ifm_ind : input_indexes)
      input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle());
@@ -305,7 +259,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
    if (input_indexes.size() < 2)
    {
      auto l = std::make_unique<::arm_compute::CLCopy>();
-    l->configure(input_tensors.at(0), output_alloc->handle());
+    l->configure(input_tensors.at(0), output_tensor->handle());
      fn = std::move(l);
    }
    else
@@ -313,10 +267,10 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
      auto l = std::make_unique<::arm_compute::CLConcatenateLayer>();
      const auto rank = _ctx.at(ofm_index).shape().rank();
      const auto frontend_layout = _current_op_seq_layout;
-    const auto backend_layout = output_alloc->layout();
+    const auto backend_layout = output_tensor->layout();
      const auto fixed_axis =
          acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
-    l->configure(input_tensors, output_alloc->handle(), fixed_axis);
+    l->configure(input_tensors, output_tensor->handle(), fixed_axis);
      fn = std::move(l);
    }
  
@@ -327,75 +281,15 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
  
  void KernelGenerator::visit(const ir::operation::FullyConnected &node)
  {
-  using ir::operation::FullyConnected;
-
    const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
-  const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
-  const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
-
-  const auto input_rank = _ctx.at(input_index).shape().rank();
-
-  const auto output_size =
-      _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 1);
-  UNUSED_RELEASE(output_size);
-  assert(_ctx.at(bias_index).shape().dim(0) == output_size);
-  assert(_ctx.at(weight_index).shape().dim(0) == output_size);
-  const auto batch_size =
-      _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 2);
-  const auto input_size =
-      _ctx.at(weight_index).shape().dim(_ctx.at(weight_index).shape().rank() - 1);
-
-  // Check for reshaping input's shape into rank-2
-  bool needs_reshape = false;
-  ir::Shape reshape(2);
-  if (input_rank == 3 || input_rank == 4)
-  {
-    const auto &ifm_shape = _ctx.at(input_index).shape();
-    auto feature_size = 1;
-    for (int i = 0; i < ifm_shape.rank(); ++i)
-    {
-      feature_size *= ifm_shape.dim(i);
-    }
-
-    UNUSED_RELEASE(feature_size);
-    assert(feature_size == batch_size * input_size);
-
-    // for reshaping
-    needs_reshape = true;
-    reshape.dim(0) = batch_size; /* H */
-    reshape.dim(1) = input_size; /* W */
-  }
-
+  auto output_tensor = _tensor_builder->at(output_index).get();
    const auto activation = node.param().activation;
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  const auto input_alloc = _tensor_builder->at(input_index).get();
-  const auto weight_alloc = _tensor_builder->at(weight_index).get();
-  const auto bias_alloc = _tensor_builder->at(bias_index).get();
-  const auto frontend_layout = _current_op_seq_layout;
-  const auto acl_layout = output_alloc->handle()->info()->data_layout();
-
-  auto fn = std::make_unique<arm_compute::CLFullyConnectedReshapingLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
-  arm_compute::CLFullyConnectedReshapingLayer::KernelType kernel_type =
-      arm_compute::CLFullyConnectedReshapingLayer::KernelType::GENERAL;
-  if (_ctx.at(weight_index).isConstant())
-  {
-    kernel_type = arm_compute::CLFullyConnectedReshapingLayer::KernelType::PREPROCESSED_WEIGHTS;
-    assert(_ctx.at(weight_index).data());
-  }
-  fn->configure(
-      input_alloc->handle(), weight_alloc->handle(), bias_alloc->handle(), output_alloc->handle(),
-      needs_reshape,
-      ::onert::backend::acl_common::asTensorShape(
-          reshape, frontend_layout, ::onert::backend::acl_common::asRuntimeLayout(acl_layout)),
-      kernel_type);
-
+  auto fn = acl_common::kernelGenFullyConnected<acl_common::AclClFunction, ::arm_compute::ICLTensor,
+                                                ::arm_compute::CLFullyConnectedReshapingLayer>(
+      node, _ctx, _tensor_builder, _current_op_seq_layout);
    _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)),
-      ActivationBuilder::generate(activation, output_alloc->handle()));
+      std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
  }
  
  void KernelGenerator::visit(const ir::operation::Mul &node)
@@ -406,17 +300,18 @@ void KernelGenerator::visit(const ir::operation::Mul &node)
  
    const auto activation = node.param().activation;
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->at(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
  
    auto fn = std::make_unique<::arm_compute::CLPixelWiseMultiplication>();
  
-  fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(), 1.0, // scale
+  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
                  arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
  
    _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+      asAclClFunction(std::move(fn)),
+      ActivationBuilder::generate(activation, ofm_tensor->handle()));
  }
  
  void KernelGenerator::visit(const ir::operation::Reduce &node)
@@ -427,14 +322,14 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
    const auto keep_dims{node.param().keep_dims};
    const auto reduce_type = node.param().reduce_type;
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
  
    // Convert to ACL axes taking into account negative values and possible duplicates.
    const auto &axes = _ctx.at(axes_index);
    const auto input_rank = _ctx.at(input_index).shape().rank();
    const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = input_alloc->layout();
+  const auto backend_layout = input_tensor->layout();
  
    std::unique_ptr<arm_compute::IFunction> fn;
    if (reduce_type == ir::operation::Reduce::ReduceType::MEAN)
@@ -443,7 +338,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
  
      const auto acl_axes =
          acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
-    l->configure(input_alloc->handle(), acl_axes, keep_dims, output_alloc->handle());
+    l->configure(input_tensor->handle(), acl_axes, keep_dims, output_tensor->handle());
  
      fn = std::move(l);
    }
@@ -453,7 +348,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
          _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
  
      const auto acl_axes = acl_common::asSet(axes, input_rank, frontend_layout, backend_layout);
-    l->configure(input_alloc->handle(), output_alloc->handle(), acl_axes, keep_dims,
+    l->configure(input_tensor->handle(), output_tensor->handle(), acl_axes, keep_dims,
                   acl_common::convertReduceType(reduce_type));
  
      fn = std::move(l);
@@ -469,13 +364,13 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
  
    // NOTE This operation must not be changed the layout from frontend to backend
    //      So, PermutationOperationPass makes layouts of frontend and backend the same.
    const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = output_alloc->layout();
+  const auto backend_layout = output_tensor->layout();
    assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) ||
           frontend_layout == backend_layout);
    UNUSED_RELEASE(frontend_layout);
@@ -483,7 +378,7 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
  
    auto fn = std::make_unique<::arm_compute::CLReshapeLayer>();
  
-  fn->configure(input_alloc->handle(), output_alloc->handle());
+  fn->configure(input_tensor->handle(), output_tensor->handle());
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -503,10 +398,10 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
    (void)dims;
    (void)ndim;
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
    auto fn = std::make_unique<arm_compute::CLReshapeLayer>();
-  fn->configure(input_alloc->handle(), output_alloc->handle());
+  fn->configure(input_tensor->handle(), output_tensor->handle());
    auto acl_fn = asAclClFunction(std::move(fn));
    _return_fn = std::move(acl_fn);
  }
@@ -516,15 +411,15 @@ void KernelGenerator::visit(const ir::operation::Tanh &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
  
    auto fn = std::make_unique<arm_compute::CLActivationLayer>();
  
    const ::arm_compute::ActivationLayerInfo act_info{
        ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
  
-  fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -538,13 +433,13 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
  
    const auto beta = node.param().beta;
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
  
    auto fn = std::make_unique<::arm_compute::CLSoftmaxLayer>(
        _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
  
-  fn->configure(input_alloc->handle(), output_alloc->handle(), beta);
+  fn->configure(input_tensor->handle(), output_tensor->handle(), beta);
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -558,10 +453,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
    const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
    const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
  
-  auto outputData_alloc = _tensor_builder->at(output_index).get();
-  auto inputData_alloc = _tensor_builder->at(input_index).get();
+  auto outputData_tensor = _tensor_builder->at(output_index).get();
+  auto inputData_tensor = _tensor_builder->at(input_index).get();
    const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = inputData_alloc->layout();
+  const auto backend_layout = inputData_tensor->layout();
  
    // Set initializers for indices data such as order of inputData
    int input_rank = _ctx.at(input_index).shape().rank();
@@ -613,7 +508,7 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
  
    auto fn = std::make_unique<::arm_compute::CLSlice>();
  
-  fn->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set);
+  fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -628,10 +523,10 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
    const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
    const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
  
-  auto outputData_alloc = _tensor_builder->at(output_index).get();
-  auto inputData_alloc = _tensor_builder->at(input_index).get();
+  auto outputData_tensor = _tensor_builder->at(output_index).get();
+  auto inputData_tensor = _tensor_builder->at(input_index).get();
    const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = inputData_alloc->layout();
+  const auto backend_layout = inputData_tensor->layout();
  
    // Set initializers for indices data such as order of inputData
    int input_rank = _ctx.at(input_index).shape().rank();
@@ -704,7 +599,7 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
  
    auto fn = std::make_unique<::arm_compute::CLStridedSlice>();
  
-  fn->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set,
+  fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set,
                  strides_set, begin_mask, end_mask, shrink_axis_mask);
  
    auto acl_fn = asAclClFunction(std::move(fn));
@@ -720,10 +615,10 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
  
    const auto rank = _ctx.at(ifm_idx).shape().rank();
  
-  auto ofm_alloc = _tensor_builder->at(ofm_idx).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_idx).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
    const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = ifm_alloc->layout();
+  const auto backend_layout = ifm_tensor->layout();
  
    std::vector<std::int32_t> pv(perm.cbegin(), perm.cend());
    // Reversed
@@ -732,7 +627,7 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
  
    auto fn = std::make_unique<::arm_compute::CLPermute>();
  
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), backend_pv);
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), backend_pv);
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -747,17 +642,18 @@ void KernelGenerator::visit(const ir::operation::Add &node)
  
    const auto activation = node.param().activation;
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->at(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
  
    auto fn = std::make_unique<::arm_compute::CLArithmeticAddition>();
  
-  fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(),
+  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
                  arm_compute::ConvertPolicy::SATURATE);
  
    _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+      asAclClFunction(std::move(fn)),
+      ActivationBuilder::generate(activation, ofm_tensor->handle()));
  }
  
  void KernelGenerator::visit(const ir::operation::Sub &node)
@@ -768,17 +664,18 @@ void KernelGenerator::visit(const ir::operation::Sub &node)
  
    const auto activation = node.param().activation;
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->at(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
  
    auto fn = std::make_unique<::arm_compute::CLArithmeticSubtraction>();
  
-  fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(),
+  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
                  arm_compute::ConvertPolicy::SATURATE);
  
    _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+      asAclClFunction(std::move(fn)),
+      ActivationBuilder::generate(activation, ofm_tensor->handle()));
  }
  
  void KernelGenerator::visit(const ir::operation::Div &node)
@@ -789,16 +686,17 @@ void KernelGenerator::visit(const ir::operation::Div &node)
  
    const auto activation = node.param().activation;
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->at(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
  
    auto fn = std::make_unique<::arm_compute::CLArithmeticDivision>();
  
-  fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
  
    _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+      asAclClFunction(std::move(fn)),
+      ActivationBuilder::generate(activation, ofm_tensor->handle()));
  }
  
  void KernelGenerator::visit(const ir::operation::Exp &node)
@@ -806,12 +704,12 @@ void KernelGenerator::visit(const ir::operation::Exp &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
  
    auto fn = std::make_unique<::arm_compute::CLExpLayer>();
  
-  fn->configure(input_alloc->handle(), output_alloc->handle());
+  fn->configure(input_tensor->handle(), output_tensor->handle());
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -823,12 +721,12 @@ void KernelGenerator::visit(const ir::operation::ExpandDims &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
  
    auto fn = std::make_unique<::arm_compute::CLReshapeLayer>();
  
-  fn->configure(input_alloc->handle(), output_alloc->handle());
+  fn->configure(input_tensor->handle(), output_tensor->handle());
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -842,20 +740,21 @@ void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
    const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
    const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  auto gamma_alloc = _tensor_builder->at(gamma_index).get();
-  auto beta_alloc = _tensor_builder->at(beta_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto gamma_tensor = _tensor_builder->at(gamma_index).get();
+  auto beta_tensor = _tensor_builder->at(beta_index).get();
    auto epsilon = node.param().epsilon;
    auto activation = node.param().activation;
  
    auto fn = std::make_unique<::arm_compute::CLInstanceNormalizationLayerEx>();
  
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), gamma_alloc->handle(),
-                beta_alloc->handle(), epsilon);
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(),
+                beta_tensor->handle(), epsilon);
  
    _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+      asAclClFunction(std::move(fn)),
+      ActivationBuilder::generate(activation, ofm_tensor->handle()));
  }
  
  void KernelGenerator::visit(const ir::operation::Logistic &node)
@@ -863,15 +762,15 @@ void KernelGenerator::visit(const ir::operation::Logistic &node)
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
  
    const ::arm_compute::ActivationLayerInfo act_info{
        ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
  
    auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
  
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -884,13 +783,13 @@ void KernelGenerator::visit(const ir::operation::LogicalAnd &node)
    const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
    const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input0_alloc = _tensor_builder->at(input0_index).get();
-  auto input1_alloc = _tensor_builder->at(input1_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input0_tensor = _tensor_builder->at(input0_index).get();
+  auto input1_tensor = _tensor_builder->at(input1_index).get();
  
    auto fn = std::make_unique<::arm_compute::CLBinaryLogicalOp>();
  
-  fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle(),
+  fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
                  ::arm_compute::BinaryLogicalOperation::AND);
  
    auto acl_fn = asAclClFunction(std::move(fn));
@@ -900,159 +799,8 @@ void KernelGenerator::visit(const ir::operation::LogicalAnd &node)
  
  void KernelGenerator::visit(const ir::operation::LSTM &node)
  {
-  // TODO Support dynamic rnn
-  // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
-  const auto scratch_buffer_index{
-      node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
-  const auto output_state_out_index{
-      node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
-  const auto cell_state_out_index{
-      node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
-  const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
-
-  const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
-  const auto input_to_input_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
-  const auto input_to_forget_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
-  const auto input_to_cell_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
-  const auto input_to_output_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
-  const auto recurrent_to_input_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
-  const auto recurrent_to_forget_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
-  const auto recurrent_to_cell_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
-  const auto recurrent_to_output_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
-  const auto cell_to_input_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
-  const auto cell_to_forget_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
-  const auto cell_to_output_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
-  const auto input_gate_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
-  const auto forget_gate_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
-  const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
-  const auto output_gate_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
-  const auto projection_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
-  const auto projection_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
-  const auto output_state_in_index{
-      node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
-  const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
-  const auto cell_threshold = node.param().cell_threshold;
-  const auto projection_threshold = node.param().projection_threshold;
-
-  bool has_input_to_input_weights = _ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
-                                    _ctx.at(input_to_input_weights_index).shape().dim(1) != 0;
-  bool has_recurrent_to_input_weights =
-      _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
-      _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
-  bool has_cell_to_forget_weights = _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
-  bool has_cell_to_output_weights = _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
-  bool has_projection_weights = _ctx.at(projection_weights_index).shape().dim(0) != 0 &&
-                                _ctx.at(projection_weights_index).shape().dim(1) != 0;
-  bool has_projection_bias = _ctx.at(projection_bias_index).shape().dim(0);
-
-  // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
-  // true: no CIFG
-  // false: CIFG
-  // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
-  bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
-
-  // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
-  // But the cell_to_input_weights does not exist in regular CIFG although peephole.
-  // true: peephole
-  // false: no peephole
-  bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights;
-
-  // NOTE Although the projection weights has data the projection bias may not have data.
-  bool has_projection_param = has_projection_weights;
-
-  const auto activation = node.param().activation;
-  const auto cell_clip = cell_threshold;
-  const auto projection_clip = projection_threshold;
-  assert(cell_clip >= 0.f && projection_clip >= 0.f);
-
-  auto scratch_buffer_alloc = _tensor_builder->at(scratch_buffer_index).get();
-  auto output_state_out_alloc = _tensor_builder->at(output_state_out_index).get();
-  auto cell_state_out_alloc = _tensor_builder->at(cell_state_out_index).get();
-  auto output_alloc = _tensor_builder->at(output_index).get();
-
-  auto input_alloc = _tensor_builder->at(input_index).get();
-
-  auto input_to_forget_weights_alloc = _tensor_builder->at(input_to_forget_weights_index).get();
-  auto input_to_cell_weights_alloc = _tensor_builder->at(input_to_cell_weights_index).get();
-  auto input_to_output_weights_alloc = _tensor_builder->at(input_to_output_weights_index).get();
-  auto recurrent_to_forget_weights_alloc =
-      _tensor_builder->at(recurrent_to_forget_weights_index).get();
-  auto recurrent_to_cell_weights_alloc = _tensor_builder->at(recurrent_to_cell_weights_index).get();
-  auto recurrent_to_output_weights_alloc =
-      _tensor_builder->at(recurrent_to_output_weights_index).get();
-
-  auto forget_gate_bias_alloc = _tensor_builder->at(forget_gate_bias_index).get();
-  auto cell_bias_alloc = _tensor_builder->at(cell_bias_index).get();
-  auto output_gate_bias_alloc = _tensor_builder->at(output_gate_bias_index).get();
-  auto output_state_in_alloc = _tensor_builder->at(output_state_in_index).get();
-  auto cell_state_in_alloc = _tensor_builder->at(cell_state_in_index).get();
-
-  auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
-
-  auto fn = std::make_unique<::arm_compute::CLLSTMLayer>();
-
-  ::arm_compute::LSTMParams<::arm_compute::ICLTensor> lstm_params{};
-  if (has_cifg_param)
-  {
-    auto input_to_input_weights_alloc =
-        _tensor_builder->at(input_to_input_weights_index).get(); // optional
-    auto recurrent_to_input_weights_alloc =
-        _tensor_builder->at(recurrent_to_input_weights_index).get(); // optional
-    auto cell_to_input_weights_handle =
-        has_peephole_param ? _tensor_builder->at(cell_to_input_weights_index).get()->handle()
-                           : nullptr; // optional (non-cifg && peephole)
-    auto input_gate_bias_alloc = _tensor_builder->at(input_gate_bias_index).get(); // optional
-    lstm_params.set_cifg_params(input_to_input_weights_alloc->handle(),
-                                recurrent_to_input_weights_alloc->handle(),
-                                cell_to_input_weights_handle, input_gate_bias_alloc->handle());
-  }
-  if (has_peephole_param)
-  {
-    auto cell_to_forget_weights_alloc =
-        _tensor_builder->at(cell_to_forget_weights_index).get(); // optional
-    auto cell_to_output_weights_alloc =
-        _tensor_builder->at(cell_to_output_weights_index).get(); // optional
-    lstm_params.set_peephole_params(cell_to_forget_weights_alloc->handle(),
-                                    cell_to_output_weights_alloc->handle());
-  }
-  if (has_projection_param)
-  {
-    auto projection_weights_alloc = _tensor_builder->at(projection_weights_index).get(); // optional
-    auto projection_bias_handle = has_projection_bias
-                                      ? _tensor_builder->at(projection_bias_index).get()->handle()
-                                      : nullptr; // optional
-    lstm_params.set_projection_params(projection_weights_alloc->handle(), projection_bias_handle);
-  }
-
-  fn->configure(
-      input_alloc->handle(), input_to_forget_weights_alloc->handle(),
-      input_to_cell_weights_alloc->handle(), input_to_output_weights_alloc->handle(),
-      recurrent_to_forget_weights_alloc->handle(), recurrent_to_cell_weights_alloc->handle(),
-      recurrent_to_output_weights_alloc->handle(), forget_gate_bias_alloc->handle(),
-      cell_bias_alloc->handle(), output_gate_bias_alloc->handle(), output_state_in_alloc->handle(),
-      cell_state_in_alloc->handle(), scratch_buffer_alloc->handle(),
-      output_state_out_alloc->handle(), cell_state_out_alloc->handle(), output_alloc->handle(),
-      lstm_params, act_info, cell_clip, projection_clip);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = acl_common::kernelGenLSTM<acl_common::AclClFunction, ::arm_compute::ICLTensor,
+                                         ::arm_compute::CLLSTMLayer>(node, _ctx, _tensor_builder);
  }
  
  void KernelGenerator::visit(const ir::operation::Comparison &node)
@@ -1063,13 +811,13 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
  
    const auto comparison_type = node.param().comparison_type;
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input0_alloc = _tensor_builder->at(input0_index).get();
-  auto input1_alloc = _tensor_builder->at(input1_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input0_tensor = _tensor_builder->at(input0_index).get();
+  auto input1_tensor = _tensor_builder->at(input1_index).get();
  
    auto fn = std::make_unique<::arm_compute::CLComparison>();
  
-  fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle(),
+  fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
                  (arm_compute::ComparisonOperation)comparison_type);
  
    auto acl_fn = asAclClFunction(std::move(fn));
@@ -1107,13 +855,13 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
    for (const auto &input_index : input_indexes)
    {
      size_t input_rank = _ctx.at(input_index).shape().rank();
-    const auto &input_alloc = _tensor_builder->at(input_index);
-    orig_inputs_acl_tensor_shapes.emplace_back(input_alloc->info()->tensor_shape());
-    assert(input_rank == input_alloc->num_dimensions());
-    if (input_rank != input_alloc->info()->num_dimensions())
+    const auto &input_tensor = _tensor_builder->at(input_index);
+    orig_inputs_acl_tensor_shapes.emplace_back(input_tensor->info()->tensor_shape());
+    assert(input_rank == input_tensor->num_dimensions());
+    if (input_rank != input_tensor->info()->num_dimensions())
      {
        // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
-      input_alloc->info()->set_tensor_shape(acl_common::asTensorShape(
+      input_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
            _ctx.at(input_index).shape(), _current_op_seq_layout, backend_layout, false));
      }
    }
@@ -1135,8 +883,8 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
    const auto ofm_idx{node.getOutputs().at(0)};
    const auto ifm_idx{node.getInputs().at(0)};
    const auto permute_type = node.getPermuteType();
-  auto ofm_alloc = _tensor_builder->at(ofm_idx).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_idx).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
    const auto rank = _ctx.at(ofm_idx).shape().rank();
    assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
  
@@ -1149,7 +897,7 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
  
      auto l = std::make_unique<::arm_compute::CLPermute>();
  
-    l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv);
+    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
  
      fn = std::move(l);
    }
@@ -1160,7 +908,7 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
  
      auto l = std::make_unique<::arm_compute::CLPermute>();
  
-    l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv);
+    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
  
      fn = std::move(l);
    }
@@ -1168,7 +916,7 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
    {
      auto l = std::make_unique<::arm_compute::CLCopy>();
  
-    l->configure(ifm_alloc->handle(), ofm_alloc->handle());
+    l->configure(ifm_tensor->handle(), ofm_tensor->handle());
  
      fn = std::move(l);
    }
@@ -1183,12 +931,12 @@ void KernelGenerator::visit(const ir::operation::RSQRT &node)
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
  
    auto fn = std::make_unique<::arm_compute::CLRsqrtLayer>();
  
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
  
    _return_fn = asAclClFunction(std::move(fn));
  }
@@ -1198,15 +946,15 @@ void KernelGenerator::visit(const ir::operation::ReLU &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)};
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
  
    auto fn = std::make_unique<arm_compute::CLActivationLayer>();
  
    const ::arm_compute::ActivationLayerInfo act_info{
        ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
  
-  fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -1219,12 +967,12 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
  
    const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
  
    auto fn = std::make_unique<::arm_compute::CLScale>();
  
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(),
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(),
                  ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
                  ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
  
@@ -1238,15 +986,15 @@ void KernelGenerator::visit(const ir::operation::ReLU1 &node)
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
  
    const ::arm_compute::ActivationLayerInfo act_info{
        ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
  
    auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
  
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -1258,15 +1006,15 @@ void KernelGenerator::visit(const ir::operation::ReLU6 &node)
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
  
    const ::arm_compute::ActivationLayerInfo act_info{
        ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f};
  
    auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
  
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -1288,25 +1036,25 @@ void KernelGenerator::visit(const ir::operation::RNN &node)
  
    const auto activation = node.param().activation;
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto hidden_state_out_alloc = _tensor_builder->at(hidden_state_out_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto hidden_state_out_tensor = _tensor_builder->at(hidden_state_out_index).get();
  
-  auto input_alloc = _tensor_builder->at(input_index).get();
-  auto weights_alloc = _tensor_builder->at(weights_index).get();
-  auto recurrent_weights_alloc = _tensor_builder->at(recurrent_weights_index).get();
-  auto bias_alloc = _tensor_builder->at(bias_index).get();
-  auto hidden_state_in_alloc = _tensor_builder->at(hidden_state_in_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
+  auto weights_tensor = _tensor_builder->at(weights_index).get();
+  auto recurrent_weights_tensor = _tensor_builder->at(recurrent_weights_index).get();
+  auto bias_tensor = _tensor_builder->at(bias_index).get();
+  auto hidden_state_in_tensor = _tensor_builder->at(hidden_state_in_index).get();
    auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
  
    auto copy_layer = std::make_unique<::arm_compute::CLCopy>();
-  copy_layer->configure(hidden_state_in_alloc->handle(), hidden_state_out_alloc->handle());
+  copy_layer->configure(hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
    _return_fn = asAclClFunction(std::move(copy_layer));
  
-  auto fn = std::make_unique<::arm_compute::CLRNNLayerEx>(
+  auto fn = std::make_unique<::arm_compute::CLRNNLayer>(
        _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-  fn->configure(input_alloc->handle(), weights_alloc->handle(), recurrent_weights_alloc->handle(),
-                bias_alloc->handle(), hidden_state_out_alloc->handle(), output_alloc->handle(),
-                act_info);
+  fn->configure(input_tensor->handle(), weights_tensor->handle(),
+                recurrent_weights_tensor->handle(), bias_tensor->handle(),
+                hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
    _return_fn = asAclClFunction(std::move(fn));
  }
  
@@ -1315,12 +1063,12 @@ void KernelGenerator::visit(const ir::operation::Floor &node)
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
  
    auto fn = std::make_unique<::arm_compute::CLFloor>();
  
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -1335,10 +1083,10 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
        node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
    const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  auto block_size_alloc = _tensor_builder->at(block_size_index).get();
-  auto paddings_alloc = _tensor_builder->at(paddings_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto block_size_tensor = _tensor_builder->at(block_size_index).get();
+  auto paddings_tensor = _tensor_builder->at(paddings_index).get();
  
    assert(_ctx.at(block_size_index).data());
    assert(_ctx.at(paddings_index).data());
@@ -1346,8 +1094,8 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
    std::unique_ptr<::arm_compute::IFunction> fn;
  
    auto l = std::make_unique<::arm_compute::CLSpaceToBatchLayer>();
-  l->configure(ifm_alloc->handle(), block_size_alloc->handle(), paddings_alloc->handle(),
-               ofm_alloc->handle());
+  l->configure(ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
+               ofm_tensor->handle());
    fn = std::move(l);
  
    auto acl_fn = asAclClFunction(std::move(fn));
@@ -1362,12 +1110,12 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
  
    auto block_size = node.param().block_size;
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
  
-  auto fn = std::make_unique<::arm_compute::CLSpaceToDepth>();
+  auto fn = std::make_unique<::arm_compute::CLSpaceToDepthLayer>();
  
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), block_size);
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), block_size);
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -1376,32 +1124,15 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
  
  void KernelGenerator::visit(const ir::operation::L2Pool2D &node)
  {
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::L2Pool2D::Input::INPUT)};
+  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
+      node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::L2);
  
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
-
-  uint32_t kw = node.param().kw;
-  uint32_t kh = node.param().kh;
-  const auto stride = node.param().stride;
-  const auto padding =
-      ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+  const auto ofm_index{node.getOutputs().at(0)};
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
    const auto activation = node.param().activation;
-
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
-  ::arm_compute::PoolingLayerInfo info{
-      ::arm_compute::PoolingType::L2, ::arm_compute::Size2D{kw, kh},
-      ::onert::backend::acl_common::asPadStrideInfo(padding, stride)};
-
-  auto fn = std::make_unique<::arm_compute::CLPoolingLayer>();
-
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
    _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+      asAclClFunction(std::move(raw_fn)),
+      ActivationBuilder::generate(activation, ofm_tensor->handle()));
  }
  
  void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
@@ -1410,13 +1141,13 @@ void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
    const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
    const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto lookups_alloc = _tensor_builder->at(lookups_index).get();
-  auto values_alloc = _tensor_builder->at(values_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto lookups_tensor = _tensor_builder->at(lookups_index).get();
+  auto values_tensor = _tensor_builder->at(values_index).get();
  
    auto fn = std::make_unique<::arm_compute::CLEmbeddingLookup>();
  
-  fn->configure(values_alloc->handle(), output_alloc->handle(), lookups_alloc->handle());
+  fn->configure(values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -1442,15 +1173,15 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
    float beta = 0.5f;                             // pow(reduction, -0.5) = 1 / sqrt(reduction)
    float bias = 0.0f;                             // Don't offset the reduction.
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
  
    const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
                                                                 radius, alpha, beta, bias, false);
  
    auto fn = std::make_unique<::arm_compute::CLNormalizationLayer>();
  
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info);
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -1466,17 +1197,17 @@ void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
    const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
    const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto hits_alloc = _tensor_builder->at(hits_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto hits_tensor = _tensor_builder->at(hits_index).get();
  
-  auto lookups_alloc = _tensor_builder->at(lookups_index).get();
-  auto keys_alloc = _tensor_builder->at(keys_index).get();
-  auto values_alloc = _tensor_builder->at(values_index).get();
+  auto lookups_tensor = _tensor_builder->at(lookups_index).get();
+  auto keys_tensor = _tensor_builder->at(keys_index).get();
+  auto values_tensor = _tensor_builder->at(values_index).get();
  
    auto fn = std::make_unique<::arm_compute::CLHashtableLookup>();
  
-  fn->configure(lookups_alloc->handle(), keys_alloc->handle(), values_alloc->handle(),
-                output_alloc->handle(), hits_alloc->handle());
+  fn->configure(lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
+                output_tensor->handle(), hits_tensor->handle());
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -1489,13 +1220,13 @@ void KernelGenerator::visit(const ir::operation::PReLU &node)
    const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
    const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  auto alpha_alloc = _tensor_builder->at(alpha_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto alpha_tensor = _tensor_builder->at(alpha_index).get();
  
-  auto fn = std::make_unique<::arm_compute::CLPReLU>();
+  auto fn = std::make_unique<::arm_compute::CLPReluLayer>();
  
-  fn->configure(ifm_alloc->handle(), alpha_alloc->handle(), ofm_alloc->handle());
+  fn->configure(ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -1518,7 +1249,6 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
           (node.param().padding.type == ir::PaddingType::VALID));
    auto padding = ir::calculatePadding(node.param().padding, ofm_shape, ifm_shape, stride,
                                        ker_shape.W, ker_shape.H);
-
    uint32_t invalid_horizontal = 0;
    uint32_t invalid_vertical = 0;
    if (node.param().padding.type == ir::PaddingType::VALID)
@@ -1528,17 +1258,17 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
      invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
    }
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  auto ker_alloc = _tensor_builder->at(ker_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ker_tensor = _tensor_builder->at(ker_index).get();
  
    const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
  
    auto fn = std::make_unique<::arm_compute::CLTransposeConvLayer>(
        _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
  
-  fn->configure(ifm_alloc->handle(), ker_alloc->handle(), nullptr, ofm_alloc->handle(), tconv_info,
-                invalid_horizontal, invalid_vertical);
+  fn->configure(ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(),
+                tconv_info, invalid_horizontal, invalid_vertical);
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -1550,15 +1280,15 @@ void KernelGenerator::visit(const ir::operation::SQRT &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)};
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
  
    const ::arm_compute::ActivationLayerInfo act_info{
        ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
  
    auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
  
-  fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -1571,13 +1301,13 @@ void KernelGenerator::visit(const ir::operation::LogicalOr &node)
    const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
    const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input0_alloc = _tensor_builder->at(input0_index).get();
-  auto input1_alloc = _tensor_builder->at(input1_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input0_tensor = _tensor_builder->at(input0_index).get();
+  auto input1_tensor = _tensor_builder->at(input1_index).get();
  
    auto fn = std::make_unique<::arm_compute::CLBitwiseOr>();
  
-  fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle());
+  fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -1589,12 +1319,12 @@ void KernelGenerator::visit(const ir::operation::LogicalNot &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)};
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
  
    auto fn = std::make_unique<::arm_compute::CLBitwiseNot>();
  
-  fn->configure(input_alloc->handle(), output_alloc->handle());
+  fn->configure(input_tensor->handle(), output_tensor->handle());
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -1607,13 +1337,13 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
    const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
    const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->at(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
  
    auto fn = std::make_unique<::arm_compute::CLElementwiseSquaredDiff>();
  
-  fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -1634,13 +1364,13 @@ void KernelGenerator::visit(const ir::operation::TopKV2 &node)
  
    const auto k = node.param().k;
  
-  auto values_alloc = _tensor_builder->at(outputValues_index).get();
-  auto indices_alloc = _tensor_builder->at(outputIndices_index).get();
-  auto input_alloc = _tensor_builder->at(inputData_index).get();
+  auto values_tensor = _tensor_builder->at(outputValues_index).get();
+  auto indices_tensor = _tensor_builder->at(outputIndices_index).get();
+  auto input_tensor = _tensor_builder->at(inputData_index).get();
  
    auto fn = std::make_unique<::arm_compute::CLTopKV2>();
  
-  fn->configure(input_alloc->handle(), k, values_alloc->handle(), indices_alloc->handle());
+  fn->configure(input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle());
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -1659,9 +1389,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
    const auto axis_value = (axis_raw < 0 ? (ifm_rank + axis_raw) : axis_raw);
    const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  auto indices_alloc = _tensor_builder->at(indices_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto indices_tensor = _tensor_builder->at(indices_index).get();
  
    // NOTE The frontend layout and backend layout must be the same for this operation.
    //      If not the same, we have to add a stage(?) to perform permutation of output tensor. It
@@ -1671,43 +1401,43 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
    //      a model. For example, if a model in NHWC has this operation as output rank == 4, indices
    //      rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
    //      and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
-  const auto backend_layout = ofm_alloc->layout();
+  const auto backend_layout = ofm_tensor->layout();
    UNUSED_RELEASE(backend_layout);
-  assert(backend_layout == ifm_alloc->layout());
-  assert(backend_layout == indices_alloc->layout());
+  assert(backend_layout == ifm_tensor->layout());
+  assert(backend_layout == indices_tensor->layout());
    assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
  
    auto fn = std::make_unique<::arm_compute::CLGatherEx>();
  
    // input is n-D, indices k-D, output is (n + k - 1)-D
    size_t n = ifm_rank;
-  assert(n == ifm_alloc->num_dimensions());
+  assert(n == ifm_tensor->num_dimensions());
    size_t k = _ctx.at(indices_index).shape().rank();
-  assert(k == indices_alloc->num_dimensions());
+  assert(k == indices_tensor->num_dimensions());
  
    // Disable applied dim_correction
-  const auto orig_ifm_acl_tensor_shape = ifm_alloc->info()->tensor_shape();
-  if (n != ifm_alloc->info()->num_dimensions())
+  const auto orig_ifm_acl_tensor_shape = ifm_tensor->info()->tensor_shape();
+  if (n != ifm_tensor->info()->num_dimensions())
    {
      // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
      const auto ifm = _ctx.at(ifm_index);
-    ifm_alloc->info()->set_tensor_shape(
+    ifm_tensor->info()->set_tensor_shape(
          acl_common::asTensorShape(ifm.shape(), _current_op_seq_layout, backend_layout, false));
    }
-  const auto orig_indice_acl_tensor_shape = indices_alloc->info()->tensor_shape();
-  if (k != indices_alloc->info()->num_dimensions())
+  const auto orig_indice_acl_tensor_shape = indices_tensor->info()->tensor_shape();
+  if (k != indices_tensor->info()->num_dimensions())
    {
      // This means that high dimension's value is 1 and indices tensor is applied dim_correction
      const auto indices = _ctx.at(indices_index);
-    indices_alloc->info()->set_tensor_shape(
+    indices_tensor->info()->set_tensor_shape(
          acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false));
    }
  
-  fn->configure(ifm_alloc->handle(), indices_alloc->handle(), ofm_alloc->handle(), axis);
+  fn->configure(ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
  
    // Revert disabling applied dim_correction
-  ifm_alloc->info()->set_tensor_shape(orig_ifm_acl_tensor_shape);
-  indices_alloc->info()->set_tensor_shape(orig_indice_acl_tensor_shape);
+  ifm_tensor->info()->set_tensor_shape(orig_ifm_acl_tensor_shape);
+  indices_tensor->info()->set_tensor_shape(orig_indice_acl_tensor_shape);
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -1719,12 +1449,12 @@ void KernelGenerator::visit(const ir::operation::Neg &node)
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
  
    auto fn = std::make_unique<::arm_compute::CLNeg>();
  
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -1736,15 +1466,15 @@ void KernelGenerator::visit(const ir::operation::Abs &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
  
    const ::arm_compute::ActivationLayerInfo act_info{
        ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
  
    auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
  
-  fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -1761,11 +1491,11 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
  
    assert((ifm_shape.rank() - 1) == ofm_shape.rank());
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
    const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
    auto frontend_layout = _current_op_seq_layout;
-  auto backend_layout = ifm_alloc->layout();
+  auto backend_layout = ifm_tensor->layout();
  
    int axis_value = node.param().axis;
    if (axis_value < 0)
@@ -1776,10 +1506,10 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
    auto acl_axis =
        acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
  
-  auto fn = std::make_unique<::arm_compute::CLArgOperation>();
+  auto fn = std::make_unique<::arm_compute::CLArgMinMaxLayer>();
  
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), {acl_axis},
-                ::arm_compute::ArgOperation::MAX);
+  fn->configure(ifm_tensor->handle(), acl_axis, ofm_tensor->handle(),
+                ::arm_compute::ReductionOperation::ARG_IDX_MAX);
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -1791,12 +1521,12 @@ void KernelGenerator::visit(const ir::operation::Dequantize &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
  
-  auto fn = std::make_unique<::arm_compute::CLCast>();
+  auto fn = std::make_unique<::arm_compute::CLDequantizationLayer>();
  
-  fn->configure(input_alloc->handle(), output_alloc->handle(), arm_compute::SubDataType::NONE);
+  fn->configure(input_tensor->handle(), output_tensor->handle());
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -1814,15 +1544,15 @@ void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &nod
    auto beta = node.param().beta;
    auto bias = node.param().bias;
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
  
    const auto norm_info = ::arm_compute::NormalizationLayerInfo(
        ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
  
    auto fn = std::make_unique<::arm_compute::CLNormalizationLayer>();
  
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info);
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -1837,12 +1567,12 @@ void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
    auto block_size = node.param().block_size;
    assert(block_size > 0);
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
  
-  auto fn = std::make_unique<::arm_compute::CLDepthToSpace>();
+  auto fn = std::make_unique<::arm_compute::CLDepthToSpaceLayer>();
  
-  fn->configure(input_alloc->handle(), output_alloc->handle(), block_size);
+  fn->configure(input_tensor->handle(), output_tensor->handle(), block_size);
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -1860,13 +1590,13 @@ void KernelGenerator::visit(const ir::operation::Split &node)
    for (const auto &output : node.getOutputs())
      output_indexes.emplace_back(output);
  
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  std::vector<arm_compute::ICLTensor *> output_allocs;
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  std::vector<arm_compute::ICLTensor *> output_tensors;
    for (const auto &ofm_ind : output_indexes)
-    output_allocs.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
+    output_tensors.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
  
    const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = ifm_alloc->layout();
+  const auto backend_layout = ifm_tensor->layout();
    auto axis = node.param().axis;
    if (axis < 0)
      axis += ifm_rank;
@@ -1874,7 +1604,7 @@ void KernelGenerator::visit(const ir::operation::Split &node)
  
    auto fn = std::make_unique<::arm_compute::CLSplit>();
  
-  fn->configure(ifm_alloc->handle(), output_allocs, axis);
+  fn->configure(ifm_tensor->handle(), output_tensors, axis);
  
    _return_fn = asAclClFunction(std::move(fn));
  }
@@ -1906,13 +1636,13 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
    for (const auto &output_index : output_indexes)
    {
      size_t output_rank = _ctx.at(output_index).shape().rank();
-    const auto &output_alloc = _tensor_builder->at(output_index);
-    orig_outputs_acl_tensor_shapes.emplace_back(output_alloc->info()->tensor_shape());
-    assert(output_rank == output_alloc->num_dimensions());
-    if (output_rank != output_alloc->info()->num_dimensions())
+    const auto &output_tensor = _tensor_builder->at(output_index);
+    orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape());
+    assert(output_rank == output_tensor->num_dimensions());
+    if (output_rank != output_tensor->info()->num_dimensions())
      {
        // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
-      output_alloc->info()->set_tensor_shape(acl_common::asTensorShape(
+      output_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
            _ctx.at(output_index).shape(), _current_op_seq_layout, backend_layout, false));
      }
    }
@@ -1959,12 +1689,12 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
  
    // Disable applied dim_correction
    size_t input_rank = _ctx.at(input_index).shape().rank();
-  const auto &input_alloc = _tensor_builder->at(input_index);
-  assert(input_rank == input_alloc->num_dimensions());
-  if (input_rank != input_alloc->info()->num_dimensions())
+  const auto &input_tensor = _tensor_builder->at(input_index);
+  assert(input_rank == input_tensor->num_dimensions());
+  if (input_rank != input_tensor->info()->num_dimensions())
    {
      // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
-    input_alloc->info()->set_tensor_shape(acl_common::asTensorShape(
+    input_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
          _ctx.at(input_index).shape(), frontend_layout, backend_layout, false));
    }
  
@@ -1982,13 +1712,13 @@ void KernelGenerator::visit(const ir::operation::Min &node)
    const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
    const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->at(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
  
    auto fn = std::make_unique<::arm_compute::CLElementwiseMin>();
  
-  fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -2001,13 +1731,13 @@ void KernelGenerator::visit(const ir::operation::Max &node)
    const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
    const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->at(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
  
    auto fn = std::make_unique<::arm_compute::CLElementwiseMax>();
  
-  fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
  
    auto acl_fn = asAclClFunction(std::move(fn));
  
@@ -2019,12 +1749,12 @@ void KernelGenerator::visit(const ir::operation::ConvertFp32ToFp16 &node)
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp32ToFp16::Input::INPUT)};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
  
    auto fn = std::make_unique<::arm_compute::CLDepthConvertLayer>();
  
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), ::arm_compute::ConvertPolicy::SATURATE,
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE,
                  0);
  
    auto acl_fn = asAclClFunction(std::move(fn));
@@ -2037,12 +1767,12 @@ void KernelGenerator::visit(const ir::operation::ConvertFp16ToFp32 &node)
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp16ToFp32::Input::INPUT)};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
  
    auto fn = std::make_unique<::arm_compute::CLDepthConvertLayer>();
  
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), ::arm_compute::ConvertPolicy::SATURATE,
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE,
                  0);
  
    auto acl_fn = asAclClFunction(std::move(fn));
diff --git a/runtime/onert/backend/acl_common/AclKernelGen.h b/runtime/onert/backend/acl_common/AclKernelGen.h

new file mode 100644 (file)

index 0000000..9f7ce37
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclKernelGen.h
@@ -0,0 +1,315 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_COMMON_ACL_KERNEL_GEN_H_
+#define __ONERT_BACKEND_ACL_COMMON_ACL_KERNEL_GEN_H_
+
+#include <exec/IFunction.h>
+#include <ir/Operands.h>
+
+#include <ir/operation/LSTM.h>
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+template <typename T_FunctionWrapper, typename T_Tensor, typename T_ACLLayer,
+          typename T_TensorBuilder>
+std::unique_ptr<exec::IFunction>
+kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands,
+              const std::shared_ptr<T_TensorBuilder> &tensor_builder)
+{
+  // TODO Support dynamic rnn
+  // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
+  const auto scratch_buffer_index{
+      node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
+  const auto output_state_out_index{
+      node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
+  const auto cell_state_out_index{
+      node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
+  const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
+
+  const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
+  const auto input_to_input_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
+  const auto input_to_forget_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
+  const auto input_to_cell_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
+  const auto input_to_output_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
+  const auto recurrent_to_input_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
+  const auto recurrent_to_forget_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
+  const auto recurrent_to_cell_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
+  const auto recurrent_to_output_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
+  const auto cell_to_input_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
+  const auto cell_to_forget_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
+  const auto cell_to_output_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
+  const auto input_gate_bias_index{
+      node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
+  const auto forget_gate_bias_index{
+      node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
+  const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
+  const auto output_gate_bias_index{
+      node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
+  const auto projection_weights_index{
+      node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
+  const auto projection_bias_index{
+      node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
+  const auto output_state_in_index{
+      node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
+  const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
+  const auto cell_threshold = node.param().cell_threshold;
+  const auto projection_threshold = node.param().projection_threshold;
+
+  bool has_input_to_input_weights = operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
+                                    operands.at(input_to_input_weights_index).shape().dim(1) != 0;
+  bool has_recurrent_to_input_weights =
+      operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+      operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
+  bool has_cell_to_forget_weights = operands.at(cell_to_forget_weights_index).shape().dim(0) != 0;
+  bool has_cell_to_output_weights = operands.at(cell_to_output_weights_index).shape().dim(0) != 0;
+  bool has_projection_weights = operands.at(projection_weights_index).shape().dim(0) != 0 &&
+                                operands.at(projection_weights_index).shape().dim(1) != 0;
+  bool has_projection_bias = operands.at(projection_bias_index).shape().dim(0);
+
+  // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
+  // true: no CIFG
+  // false: CIFG
+  // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
+  bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
+
+  // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
+  // But the cell_to_input_weights does not exist in regular CIFG although peephole.
+  // true: peephole
+  // false: no peephole
+  bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights;
+
+  // NOTE Although the projection weights has data the projection bias may not have data.
+  bool has_projection_param = has_projection_weights;
+
+  const auto activation = node.param().activation;
+  const auto cell_clip = cell_threshold;
+  const auto projection_clip = projection_threshold;
+  assert(cell_clip >= 0.f && projection_clip >= 0.f);
+
+  auto scratch_buffer_tensor = tensor_builder->at(scratch_buffer_index).get();
+  auto output_state_out_tensor = tensor_builder->at(output_state_out_index).get();
+  auto cell_state_out_tensor = tensor_builder->at(cell_state_out_index).get();
+  auto output_tensor = tensor_builder->at(output_index).get();
+
+  auto input_tensor = tensor_builder->at(input_index).get();
+
+  auto input_to_forget_weights_tensor = tensor_builder->at(input_to_forget_weights_index).get();
+  auto input_to_cell_weights_tensor = tensor_builder->at(input_to_cell_weights_index).get();
+  auto input_to_output_weights_tensor = tensor_builder->at(input_to_output_weights_index).get();
+  auto recurrent_to_forget_weights_tensor =
+      tensor_builder->at(recurrent_to_forget_weights_index).get();
+  auto recurrent_to_cell_weights_tensor = tensor_builder->at(recurrent_to_cell_weights_index).get();
+  auto recurrent_to_output_weights_tensor =
+      tensor_builder->at(recurrent_to_output_weights_index).get();
+
+  auto forget_gate_bias_tensor = tensor_builder->at(forget_gate_bias_index).get();
+  auto cell_bias_tensor = tensor_builder->at(cell_bias_index).get();
+  auto output_gate_bias_tensor = tensor_builder->at(output_gate_bias_index).get();
+  auto output_state_in_tensor = tensor_builder->at(output_state_in_index).get();
+  auto cell_state_in_tensor = tensor_builder->at(cell_state_in_index).get();
+
+  auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
+
+  auto fn = std::make_unique<T_ACLLayer>();
+
+  ::arm_compute::LSTMParams<T_Tensor> lstm_params{};
+  if (has_cifg_param)
+  {
+    auto input_to_input_weights_tensor =
+        tensor_builder->at(input_to_input_weights_index).get(); // optional
+    auto recurrent_to_input_weights_tensor =
+        tensor_builder->at(recurrent_to_input_weights_index).get(); // optional
+    auto cell_to_input_weights_handle =
+        has_peephole_param ? tensor_builder->at(cell_to_input_weights_index).get()->handle()
+                           : nullptr; // optional (non-cifg && peephole)
+    auto input_gate_bias_tensor = tensor_builder->at(input_gate_bias_index).get(); // optional
+    lstm_params.set_cifg_params(input_to_input_weights_tensor->handle(),
+                                recurrent_to_input_weights_tensor->handle(),
+                                cell_to_input_weights_handle, input_gate_bias_tensor->handle());
+  }
+  if (has_peephole_param)
+  {
+    auto cell_to_forget_weights_tensor =
+        tensor_builder->at(cell_to_forget_weights_index).get(); // optional
+    auto cell_to_output_weights_tensor =
+        tensor_builder->at(cell_to_output_weights_index).get(); // optional
+    lstm_params.set_peephole_params(cell_to_forget_weights_tensor->handle(),
+                                    cell_to_output_weights_tensor->handle());
+  }
+  if (has_projection_param)
+  {
+    auto projection_weights_tensor = tensor_builder->at(projection_weights_index).get(); // optional
+    auto projection_bias_handle = has_projection_bias
+                                      ? tensor_builder->at(projection_bias_index).get()->handle()
+                                      : nullptr; // optional
+    lstm_params.set_projection_params(projection_weights_tensor->handle(), projection_bias_handle);
+  }
+
+  fn->configure(input_tensor->handle(), input_to_forget_weights_tensor->handle(),
+                input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(),
+                recurrent_to_forget_weights_tensor->handle(),
+                recurrent_to_cell_weights_tensor->handle(),
+                recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(),
+                cell_bias_tensor->handle(), output_gate_bias_tensor->handle(),
+                output_state_in_tensor->handle(), cell_state_in_tensor->handle(),
+                scratch_buffer_tensor->handle(), output_state_out_tensor->handle(),
+                cell_state_out_tensor->handle(), output_tensor->handle(), lstm_params, act_info,
+                cell_clip, projection_clip);
+
+  return std::make_unique<T_FunctionWrapper>(std::move(fn));
+}
+
+template <typename T_FunctionWrapper, typename T_Tensor, typename T_ACLLayer,
+          typename T_TensorBuilder>
+std::unique_ptr<exec::IFunction>
+kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Operands &operands,
+                        const std::shared_ptr<T_TensorBuilder> &tensor_builder, ir::Layout layout)
+{
+  using ir::operation::FullyConnected;
+
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
+  const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
+  const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
+
+  const auto input_rank = operands.at(input_index).shape().rank();
+
+  const auto output_size =
+      operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 1);
+  UNUSED_RELEASE(output_size);
+  assert(operands.at(bias_index).shape().dim(0) == output_size);
+  assert(operands.at(weight_index).shape().dim(0) == output_size);
+  const auto batch_size =
+      operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 2);
+  const auto input_size =
+      operands.at(weight_index).shape().dim(operands.at(weight_index).shape().rank() - 1);
+
+  // Check for reshaping input's shape into rank-2
+  bool needs_reshape = false;
+  ir::Shape reshape(2);
+  if (input_rank == 3 || input_rank == 4)
+  {
+    const auto &ifm_shape = operands.at(input_index).shape();
+    auto feature_size = 1;
+    for (int i = 0; i < ifm_shape.rank(); ++i)
+    {
+      feature_size *= ifm_shape.dim(i);
+    }
+
+    UNUSED_RELEASE(feature_size);
+    assert(feature_size == batch_size * input_size);
+
+    // for reshaping
+    needs_reshape = true;
+    reshape.dim(0) = batch_size; /* H */
+    reshape.dim(1) = input_size; /* W */
+  }
+
+  auto output_tensor = tensor_builder->at(output_index).get();
+  const auto input_tensor = tensor_builder->at(input_index).get();
+  const auto weight_tensor = tensor_builder->at(weight_index).get();
+  const auto bias_tensor = tensor_builder->at(bias_index).get();
+  const auto frontend_layout = layout;
+  const auto acl_layout = output_tensor->handle()->info()->data_layout();
+
+  auto fn =
+      std::make_unique<T_ACLLayer>(tensor_builder->acl_tensor_manager()->internal_buffer_manager());
+
+  typename T_ACLLayer::KernelType kernel_type = T_ACLLayer::KernelType::GENERAL;
+  if (operands.at(weight_index).isConstant())
+  {
+    kernel_type = T_ACLLayer::KernelType::PREPROCESSED_WEIGHTS;
+    assert(operands.at(weight_index).data());
+  }
+
+  fn->configure(
+      input_tensor->handle(), weight_tensor->handle(), bias_tensor->handle(),
+      output_tensor->handle(), needs_reshape,
+      ::onert::backend::acl_common::asTensorShape(
+          reshape, frontend_layout, ::onert::backend::acl_common::asRuntimeLayout(acl_layout)),
+      kernel_type);
+
+  return std::make_unique<T_FunctionWrapper>(std::move(fn));
+}
+
+template <typename T_ACLLayer, typename T_PoolOp, typename T_TensorBuilder>
+std::unique_ptr<::arm_compute::IFunction>
+kernelGenPool2D(const T_PoolOp &node, const ir::Operands &operands,
+                const std::shared_ptr<T_TensorBuilder> &tensor_builder, ir::Layout layout,
+                ::arm_compute::PoolingType pooling_type)
+{
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(0)};
+
+  const auto ofm_shape = operands.at(ofm_index).shape().asFeature(layout);
+  const auto ifm_shape = operands.at(ifm_index).shape().asFeature(layout);
+
+  const auto kh = node.param().kh;
+  const auto kw = node.param().kw;
+  const auto stride = node.param().stride;
+  const auto padding =
+      ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+
+  VERBOSE(Pool2DParam) << "IFM_H: " << ifm_shape.H << std::endl;
+  VERBOSE(Pool2DParam) << "IFM_W: " << ifm_shape.W << std::endl;
+  VERBOSE(Pool2DParam) << "OFM_H: " << ofm_shape.H << std::endl;
+  VERBOSE(Pool2DParam) << "OFM_W: " << ofm_shape.W << std::endl;
+  VERBOSE(Pool2DParam) << "KER_H: " << kh << std::endl;
+  VERBOSE(Pool2DParam) << "KER_W: " << kw << std::endl;
+  VERBOSE(Pool2DParam) << "STRIDE_H: " << stride.vertical << std::endl;
+  VERBOSE(Pool2DParam) << "STRIDE_W: " << stride.horizontal << std::endl;
+  VERBOSE(Pool2DParam) << "PAD(T): " << padding.top << std::endl;
+  VERBOSE(Pool2DParam) << "PAD(B): " << padding.bottom << std::endl;
+  VERBOSE(Pool2DParam) << "PAD(L): " << padding.left << std::endl;
+  VERBOSE(Pool2DParam) << "PAD(R): " << padding.right << std::endl;
+
+  auto ofm_tensor = tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = tensor_builder->at(ifm_index).get();
+
+  ::arm_compute::PoolingLayerInfo info{
+      pooling_type, ::arm_compute::Size2D{kw, kh}, ifm_tensor->info()->data_layout(),
+      acl_common::asPadStrideInfo(padding, stride), true /* exclude_padding */};
+
+  auto fn = std::make_unique<T_ACLLayer>();
+
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), info);
+
+  return fn;
+}
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_COMMON_ACL_KERNEL_GEN_H_
diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.cc b/runtime/onert/backend/acl_neon/KernelGenerator.cc

index e471867..1195b83 100644 (file)
--- a/runtime/onert/backend/acl_neon/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_neon/KernelGenerator.cc
@@ -31,6 +31,7 @@
  #include "exec/NopFunction.h"
  #include "util/logging.h"
  #include "util/Utils.h"
+#include "AclKernelGen.h"
  
  namespace onert
  {
@@ -74,15 +75,15 @@ void KernelGenerator::visit(const ir::operation::Abs &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
  
    const ::arm_compute::ActivationLayerInfo act_info{
        ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
  
    auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
  
-  fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
  
    auto acl_fn = asAclFunction(std::move(fn));
  
@@ -96,10 +97,10 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
  
    const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
    auto frontend_layout = _current_op_seq_layout;
-  auto backend_layout = ifm_alloc->layout();
+  auto backend_layout = ifm_tensor->layout();
  
    int axis_value = node.param().axis;
    if (axis_value < 0)
@@ -112,7 +113,7 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
  
    auto fn = std::make_unique<::arm_compute::NEArgMinMaxLayer>();
  
-  fn->configure(ifm_alloc->handle(), fixed_axis, ofm_alloc->handle(),
+  fn->configure(ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(),
                  arm_compute::ReductionOperation::ARG_IDX_MAX);
  
    auto acl_fn = asAclFunction(std::move(fn));
@@ -127,15 +128,15 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
    const auto block_size_index{
        node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  auto block_size_alloc = _tensor_builder->at(block_size_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto block_size_tensor = _tensor_builder->at(block_size_index).get();
  
    assert(_ctx.at(block_size_index).data());
  
    auto fn = std::make_unique<::arm_compute::NEBatchToSpaceLayer>();
  
-  fn->configure(ifm_alloc->handle(), block_size_alloc->handle(), ofm_alloc->handle());
+  fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
  
    auto acl_fn = asAclFunction(std::move(fn));
  
@@ -147,15 +148,26 @@ void KernelGenerator::visit(const ir::operation::Cast &node)
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
  
-  auto fn = std::make_unique<::arm_compute::NECast>();
+  std::unique_ptr<::arm_compute::IFunction> fn;
+  if (ifm_tensor->data_type() == ofm_tensor->data_type())
+  {
+    auto l = std::make_unique<::arm_compute::NECopy>();
  
-  auto input_sub_type = _ctx.at(ifm_index).typeInfo().type() == ir::DataType::BOOL8
-                            ? arm_compute::SubDataType::BOOL
-                            : arm_compute::SubDataType::NONE;
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), input_sub_type);
+    l->configure(ifm_tensor->handle(), ofm_tensor->handle());
+
+    fn = std::move(l);
+  }
+  else
+  {
+    auto l = std::make_unique<::arm_compute::NECast>();
+
+    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
+
+    fn = std::move(l);
+  }
  
    auto acl_fn = asAclFunction(std::move(fn));
  
@@ -183,10 +195,10 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
                                              ker_width, ker_height);
    const auto activation = node.param().activation;
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  auto ker_alloc = _tensor_builder->at(ker_index).get();
-  auto bias_alloc = _tensor_builder->at(bias_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ker_tensor = _tensor_builder->at(ker_index).get();
+  auto bias_tensor = _tensor_builder->at(bias_index).get();
  
    const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
    const auto act_info = acl_common::asActivationLayerInfo(activation);
@@ -194,8 +206,9 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
    auto fn = std::make_unique<::arm_compute::NEConvolutionLayer>(
        _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
  
-  fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), ofm_alloc->handle(),
-                conv_info, ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
+  fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
+                ofm_tensor->handle(), conv_info, ::arm_compute::WeightsInfo(),
+                ::arm_compute::Size2D(1U, 1U), act_info);
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -208,12 +221,12 @@ void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
    auto block_size = node.param().block_size;
    assert(block_size > 0);
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
  
-  auto fn = std::make_unique<::arm_compute::NEDepthToSpaceLayerEx>();
+  auto fn = std::make_unique<::arm_compute::NEDepthToSpaceLayer>();
  
-  fn->configure(input_alloc->handle(), output_alloc->handle(), block_size);
+  fn->configure(input_tensor->handle(), output_tensor->handle(), block_size);
  
    auto acl_fn = asAclFunction(std::move(fn));
  
@@ -242,10 +255,10 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
    const auto multiplier = node.param().multiplier;
    const auto activation = node.param().activation;
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  auto ker_alloc = _tensor_builder->at(ker_index).get();
-  auto bias_alloc = _tensor_builder->at(bias_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ker_tensor = _tensor_builder->at(ker_index).get();
+  auto bias_tensor = _tensor_builder->at(bias_index).get();
  
    const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
    const auto act_info = acl_common::asActivationLayerInfo(activation);
@@ -253,8 +266,8 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
    {
      auto fn = std::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
  
-    fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
-                  ofm_alloc->handle(), conv_info, multiplier, act_info);
+    fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
+                  ofm_tensor->handle(), conv_info, multiplier, act_info);
  
      _return_fn = asAclFunction(std::move(fn));
    }
@@ -265,12 +278,12 @@ void KernelGenerator::visit(const ir::operation::Dequantize &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
  
    auto fn = std::make_unique<::arm_compute::NEDequantizationLayer>();
  
-  fn->configure(input_alloc->handle(), output_alloc->handle());
+  fn->configure(input_tensor->handle(), output_tensor->handle());
  
    auto acl_fn = asAclFunction(std::move(fn));
  
@@ -279,88 +292,28 @@ void KernelGenerator::visit(const ir::operation::Dequantize &node)
  
  void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
  {
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::MaxPool2D::Input::INPUT)};
-
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
+      node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::MAX);
  
-  const auto kh = node.param().kh;
-  const auto kw = node.param().kw;
-  const auto stride = node.param().stride;
-  const auto padding =
-      ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+  const auto ofm_index{node.getOutputs().at(0)};
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
    const auto activation = node.param().activation;
-
-  VERBOSE(MaxPool2D) << "IFM_H: " << ifm_shape.H << std::endl;
-  VERBOSE(MaxPool2D) << "IFM_W: " << ifm_shape.W << std::endl;
-  VERBOSE(MaxPool2D) << "OFM_H: " << ofm_shape.H << std::endl;
-  VERBOSE(MaxPool2D) << "OFM_W: " << ofm_shape.W << std::endl;
-  VERBOSE(MaxPool2D) << "KER_H: " << kh << std::endl;
-  VERBOSE(MaxPool2D) << "KER_W: " << kw << std::endl;
-  VERBOSE(MaxPool2D) << "STRIDE_H: " << stride.vertical << std::endl;
-  VERBOSE(MaxPool2D) << "STRIDE_W: " << stride.horizontal << std::endl;
-  VERBOSE(MaxPool2D) << "PAD(T): " << padding.top << std::endl;
-  VERBOSE(MaxPool2D) << "PAD(B): " << padding.bottom << std::endl;
-  VERBOSE(MaxPool2D) << "PAD(L): " << padding.left << std::endl;
-  VERBOSE(MaxPool2D) << "PAD(R): " << padding.right << std::endl;
-
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
-  ::arm_compute::PoolingLayerInfo info{::arm_compute::PoolingType::MAX,
-                                       ::arm_compute::Size2D{kw, kh},
-                                       acl_common::asPadStrideInfo(padding, stride)};
-
-  auto fn = std::make_unique<::arm_compute::NEPoolingLayer>();
-
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
    _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+      asAclFunction(std::move(raw_fn)),
+      ActivationBuilder::generate(activation, ofm_tensor->handle()));
  }
  
  void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
  {
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)};
-
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
+      node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::AVG);
  
-  const auto kh = node.param().kh;
-  const auto kw = node.param().kw;
-  const auto stride = node.param().stride;
-  const auto padding =
-      ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+  const auto ofm_index{node.getOutputs().at(0)};
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
    const auto activation = node.param().activation;
-
-  VERBOSE(AvgPool2D) << "IFM_H: " << ifm_shape.H << std::endl;
-  VERBOSE(AvgPool2D) << "IFM_W: " << ifm_shape.W << std::endl;
-  VERBOSE(AvgPool2D) << "OFM_H: " << ofm_shape.H << std::endl;
-  VERBOSE(AvgPool2D) << "OFM_W: " << ofm_shape.W << std::endl;
-  VERBOSE(AvgPool2D) << "KER_H: " << kh << std::endl;
-  VERBOSE(AvgPool2D) << "KER_W: " << kw << std::endl;
-  VERBOSE(AvgPool2D) << "STRIDE_H: " << stride.vertical << std::endl;
-  VERBOSE(AvgPool2D) << "STRIDE_W: " << stride.horizontal << std::endl;
-  VERBOSE(AvgPool2D) << "PAD(T): " << padding.top << std::endl;
-  VERBOSE(AvgPool2D) << "PAD(B): " << padding.bottom << std::endl;
-  VERBOSE(AvgPool2D) << "PAD(L): " << padding.left << std::endl;
-  VERBOSE(AvgPool2D) << "PAD(R): " << padding.right << std::endl;
-
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
-  ::arm_compute::PoolingLayerInfo info{
-      ::arm_compute::PoolingType::AVG, ::arm_compute::Size2D{kw, kh},
-      acl_common::asPadStrideInfo(padding, stride), true /* exclude_padding */};
-
-  auto fn = std::make_unique<::arm_compute::NEPoolingLayer>();
-
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
    _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+      asAclFunction(std::move(raw_fn)),
+      ActivationBuilder::generate(activation, ofm_tensor->handle()));
  }
  
  void KernelGenerator::visit(const ir::operation::Concat &node)
@@ -383,7 +336,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
      return;
    }
  
-  auto output_alloc = _tensor_builder->at(ofm_index).get();
+  auto output_tensor = _tensor_builder->at(ofm_index).get();
    std::vector<::arm_compute::ITensor *> input_tensors;
    for (const auto &ifm_ind : input_indexes)
      input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle());
@@ -392,7 +345,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
    if (input_indexes.size() < 2)
    {
      auto l = std::make_unique<::arm_compute::NECopy>();
-    l->configure(input_tensors.at(0), output_alloc->handle());
+    l->configure(input_tensors.at(0), output_tensor->handle());
      fn = std::move(l);
    }
    else
@@ -400,10 +353,10 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
      auto l = std::make_unique<::arm_compute::NEConcatenateLayer>();
      const auto rank = _ctx.at(ofm_index).shape().rank();
      const auto frontend_layout = _current_op_seq_layout;
-    const auto backend_layout = output_alloc->layout();
+    const auto backend_layout = output_tensor->layout();
      const auto fixed_axis =
          acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
-    l->configure(input_tensors, output_alloc->handle(), fixed_axis);
+    l->configure(input_tensors, output_tensor->handle(), fixed_axis);
      fn = std::move(l);
    }
  
@@ -418,13 +371,13 @@ void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
    const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
    const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto lookups_alloc = _tensor_builder->at(lookups_index).get();
-  auto values_alloc = _tensor_builder->at(values_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto lookups_tensor = _tensor_builder->at(lookups_index).get();
+  auto values_tensor = _tensor_builder->at(values_index).get();
  
    auto fn = std::make_unique<::arm_compute::NEEmbeddingLookup>();
  
-  fn->configure(values_alloc->handle(), output_alloc->handle(), lookups_alloc->handle());
+  fn->configure(values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
  
    auto acl_fn = asAclFunction(std::move(fn));
  
@@ -436,12 +389,12 @@ void KernelGenerator::visit(const ir::operation::Floor &node)
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
  
    auto fn = std::make_unique<::arm_compute::NEFloor>();
  
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
  
    auto acl_fn = asAclFunction(std::move(fn));
  
@@ -450,76 +403,15 @@ void KernelGenerator::visit(const ir::operation::Floor &node)
  
  void KernelGenerator::visit(const ir::operation::FullyConnected &node)
  {
-  using ir::operation::FullyConnected;
-
    const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
-  const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
-  const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
-
-  const auto input_rank = _ctx.at(input_index).shape().rank();
-
-  const auto output_size =
-      _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 1);
-  UNUSED_RELEASE(output_size);
-  assert(_ctx.at(bias_index).shape().dim(0) == output_size);
-  assert(_ctx.at(weight_index).shape().dim(0) == output_size);
-  const auto batch_size =
-      _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 2);
-  const auto input_size =
-      _ctx.at(weight_index).shape().dim(_ctx.at(weight_index).shape().rank() - 1);
-
-  // Check for reshaping input's shape into rank-2
-  bool needs_reshape = false;
-  ir::Shape reshape(2);
-  if (input_rank == 3 || input_rank == 4)
-  {
-    const auto &ifm_shape = _ctx.at(input_index).shape();
-    auto feature_size = 1;
-    for (int i = 0; i < ifm_shape.rank(); ++i)
-    {
-      feature_size *= ifm_shape.dim(i);
-    }
-
-    UNUSED_RELEASE(feature_size);
-    assert(feature_size == batch_size * input_size);
-
-    // for reshaping
-    needs_reshape = true;
-    reshape.dim(0) = batch_size; /* H */
-    reshape.dim(1) = input_size; /* W */
-  }
-
+  auto output_tensor = _tensor_builder->at(output_index).get();
    const auto activation = node.param().activation;
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  const auto input_alloc = _tensor_builder->at(input_index).get();
-  const auto weight_alloc = _tensor_builder->at(weight_index).get();
-  const auto bias_alloc = _tensor_builder->at(bias_index).get();
-  const auto frontend_layout = _current_op_seq_layout;
-  const auto acl_layout = output_alloc->handle()->info()->data_layout();
-
-  auto fn = std::make_unique<arm_compute::NEFullyConnectedReshapingLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
-  arm_compute::NEFullyConnectedReshapingLayer::KernelType kernel_type =
-      arm_compute::NEFullyConnectedReshapingLayer::KernelType::GENERAL;
-  if (_ctx.at(weight_index).isConstant())
-  {
-    kernel_type = arm_compute::NEFullyConnectedReshapingLayer::KernelType::PREPROCESSED_WEIGHTS;
-    assert(_ctx.at(weight_index).data());
-  }
-
-  fn->configure(
-      input_alloc->handle(), weight_alloc->handle(), bias_alloc->handle(), output_alloc->handle(),
-      needs_reshape,
-      ::onert::backend::acl_common::asTensorShape(
-          reshape, frontend_layout, ::onert::backend::acl_common::asRuntimeLayout(acl_layout)),
-      kernel_type);
-
+  auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ITensor,
+                                                ::arm_compute::NEFullyConnectedReshapingLayer>(
+      node, _ctx, _tensor_builder, _current_op_seq_layout);
    _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)),
-      ActivationBuilder::generate(activation, output_alloc->handle()));
+      std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
  }
  
  void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
@@ -531,17 +423,17 @@ void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
    const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
    const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto hits_alloc = _tensor_builder->at(hits_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto hits_tensor = _tensor_builder->at(hits_index).get();
  
-  auto lookups_alloc = _tensor_builder->at(lookups_index).get();
-  auto keys_alloc = _tensor_builder->at(keys_index).get();
-  auto values_alloc = _tensor_builder->at(values_index).get();
+  auto lookups_tensor = _tensor_builder->at(lookups_index).get();
+  auto keys_tensor = _tensor_builder->at(keys_index).get();
+  auto values_tensor = _tensor_builder->at(values_index).get();
  
    auto fn = std::make_unique<::arm_compute::NEHashtableLookup>();
  
-  fn->configure(lookups_alloc->handle(), keys_alloc->handle(), values_alloc->handle(),
-                output_alloc->handle(), hits_alloc->handle());
+  fn->configure(lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
+                output_tensor->handle(), hits_tensor->handle());
  
    auto acl_fn = asAclFunction(std::move(fn));
  
@@ -561,10 +453,10 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
    // Converting in reverse order
    const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  auto indices_alloc = _tensor_builder->at(indices_index).get();
-  const auto backend_layout = ofm_alloc->layout();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto indices_tensor = _tensor_builder->at(indices_index).get();
+  const auto backend_layout = ofm_tensor->layout();
    UNUSED_RELEASE(backend_layout);
  
    // NOTE The frontend layout and backend layout must be the same for this operation.
@@ -575,35 +467,35 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
    //      a model. For example, if a model in NHWC has this operation as output rank == 4, indices
    //      rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
    //      and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
-  assert(backend_layout == ifm_alloc->layout());
-  assert(backend_layout == indices_alloc->layout());
+  assert(backend_layout == ifm_tensor->layout());
+  assert(backend_layout == indices_tensor->layout());
    assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
  
    auto fn = std::make_unique<::arm_compute::NEGatherEx>();
  
    // input is n-D, indices k-D, output is (n + k - 1)-D
    size_t n = ifm_rank;
-  assert(n == ifm_alloc->num_dimensions());
+  assert(n == ifm_tensor->num_dimensions());
    size_t k = _ctx.at(indices_index).shape().rank();
-  assert(k == indices_alloc->num_dimensions());
+  assert(k == indices_tensor->num_dimensions());
  
    // Disable applied dim_correction
-  if (n != ifm_alloc->info()->num_dimensions())
+  if (n != ifm_tensor->info()->num_dimensions())
    {
      // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
      const auto ifm = _ctx.at(ifm_index);
-    ifm_alloc->info()->set_tensor_shape(
+    ifm_tensor->info()->set_tensor_shape(
          acl_common::asTensorShape(ifm.shape(), _current_op_seq_layout, backend_layout, false));
    }
-  if (k != indices_alloc->info()->num_dimensions())
+  if (k != indices_tensor->info()->num_dimensions())
    {
      // This means that high dimension's value is 1 and indices tensor is applied dim_correction
      const auto indices = _ctx.at(indices_index);
-    indices_alloc->info()->set_tensor_shape(
+    indices_tensor->info()->set_tensor_shape(
          acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false));
    }
  
-  fn->configure(ifm_alloc->handle(), indices_alloc->handle(), ofm_alloc->handle(), axis);
+  fn->configure(ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
  
    // acl_neon doesn't not revert disabling applied dim_correction because acl_neon's kernels would
    // use arm_compute::TensorInfo::offset_element_in_bytes()
@@ -621,20 +513,20 @@ void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
    const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
    const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  auto gamma_alloc = _tensor_builder->at(gamma_index).get();
-  auto beta_alloc = _tensor_builder->at(beta_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto gamma_tensor = _tensor_builder->at(gamma_index).get();
+  auto beta_tensor = _tensor_builder->at(beta_index).get();
    auto epsilon = node.param().epsilon;
    auto activation = node.param().activation;
  
    auto fn = std::make_unique<::arm_compute::NEInstanceNormalizationLayerEx>();
  
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), gamma_alloc->handle(),
-                beta_alloc->handle(), epsilon);
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(),
+                beta_tensor->handle(), epsilon);
  
    _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
  }
  
  void KernelGenerator::visit(const ir::operation::L2Normalization &node)
@@ -656,15 +548,15 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
    float beta = 0.5f;                             // pow(reduction, -0.5) = 1 / sqrt(reduction)
    float bias = 0.0f;                             // Don't offset the reduction.
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
  
    const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
                                                                 radius, alpha, beta, bias, false);
  
    auto fn = std::make_unique<::arm_compute::NENormalizationLayer>();
  
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info);
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
  
    auto acl_fn = asAclFunction(std::move(fn));
  
@@ -673,32 +565,15 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
  
  void KernelGenerator::visit(const ir::operation::L2Pool2D &node)
  {
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::L2Pool2D::Input::INPUT)};
-
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
+      node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::L2);
  
-  uint32_t kw = node.param().kw;
-  uint32_t kh = node.param().kh;
-  const auto stride = node.param().stride;
-  const auto padding =
-      ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+  const auto ofm_index{node.getOutputs().at(0)};
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
    const auto activation = node.param().activation;
-
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
-  ::arm_compute::PoolingLayerInfo info{
-      ::arm_compute::PoolingType::L2, ::arm_compute::Size2D{kw, kh},
-      ::onert::backend::acl_common::asPadStrideInfo(padding, stride)};
-
-  auto fn = std::make_unique<::arm_compute::NEPoolingLayer>();
-
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
    _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+      asAclFunction(std::move(raw_fn)),
+      ActivationBuilder::generate(activation, ofm_tensor->handle()));
  }
  
  void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node)
@@ -712,15 +587,15 @@ void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &nod
    auto beta = node.param().beta;
    auto bias = node.param().bias;
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
  
    const auto norm_info = ::arm_compute::NormalizationLayerInfo(
        ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
  
    auto fn = std::make_unique<::arm_compute::NENormalizationLayer>();
  
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info);
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
  
    auto acl_fn = asAclFunction(std::move(fn));
  
@@ -733,13 +608,13 @@ void KernelGenerator::visit(const ir::operation::LogicalAnd &node)
    const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
    const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input0_alloc = _tensor_builder->at(input0_index).get();
-  auto input1_alloc = _tensor_builder->at(input1_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input0_tensor = _tensor_builder->at(input0_index).get();
+  auto input1_tensor = _tensor_builder->at(input1_index).get();
  
    auto fn = std::make_unique<::arm_compute::NELogicalAnd>();
  
-  fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle());
+  fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
  
    auto acl_fn = asAclFunction(std::move(fn));
  
@@ -751,12 +626,12 @@ void KernelGenerator::visit(const ir::operation::LogicalNot &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)};
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
  
    auto fn = std::make_unique<::arm_compute::NEBitwiseNot>();
  
-  fn->configure(input_alloc->handle(), output_alloc->handle());
+  fn->configure(input_tensor->handle(), output_tensor->handle());
  
    auto acl_fn = asAclFunction(std::move(fn));
  
@@ -769,13 +644,13 @@ void KernelGenerator::visit(const ir::operation::LogicalOr &node)
    const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
    const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input0_alloc = _tensor_builder->at(input0_index).get();
-  auto input1_alloc = _tensor_builder->at(input1_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input0_tensor = _tensor_builder->at(input0_index).get();
+  auto input1_tensor = _tensor_builder->at(input1_index).get();
  
    auto fn = std::make_unique<::arm_compute::NELogicalOr>();
  
-  fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle());
+  fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
  
    auto acl_fn = asAclFunction(std::move(fn));
  
@@ -787,8 +662,8 @@ void KernelGenerator::visit(const ir::operation::Logistic &node)
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
  
    const ::arm_compute::ActivationLayerInfo act_info{
        ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
@@ -798,7 +673,7 @@ void KernelGenerator::visit(const ir::operation::Logistic &node)
    // instead of 'INF', and then the result of this op will be errors due to the 'NaN'.
    auto fn = std::make_unique<::arm_compute::NEActivationLayerEx>();
  
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
  
    auto acl_fn = asAclFunction(std::move(fn));
  
@@ -807,159 +682,8 @@ void KernelGenerator::visit(const ir::operation::Logistic &node)
  
  void KernelGenerator::visit(const ir::operation::LSTM &node)
  {
-  // TODO Support dynamic rnn
-  // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
-  const auto scratch_buffer_index{
-      node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
-  const auto output_state_out_index{
-      node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
-  const auto cell_state_out_index{
-      node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
-  const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
-
-  const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
-  const auto input_to_input_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
-  const auto input_to_forget_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
-  const auto input_to_cell_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
-  const auto input_to_output_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
-  const auto recurrent_to_input_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
-  const auto recurrent_to_forget_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
-  const auto recurrent_to_cell_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
-  const auto recurrent_to_output_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
-  const auto cell_to_input_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
-  const auto cell_to_forget_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
-  const auto cell_to_output_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
-  const auto input_gate_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
-  const auto forget_gate_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
-  const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
-  const auto output_gate_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
-  const auto projection_weights_index{
-      node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
-  const auto projection_bias_index{
-      node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
-  const auto output_state_in_index{
-      node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
-  const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
-  const auto cell_threshold = node.param().cell_threshold;
-  const auto projection_threshold = node.param().projection_threshold;
-
-  bool has_input_to_input_weights = _ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
-                                    _ctx.at(input_to_input_weights_index).shape().dim(1) != 0;
-  bool has_recurrent_to_input_weights =
-      _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
-      _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
-  bool has_cell_to_forget_weights = _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
-  bool has_cell_to_output_weights = _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
-  bool has_projection_weights = _ctx.at(projection_weights_index).shape().dim(0) != 0 &&
-                                _ctx.at(projection_weights_index).shape().dim(1) != 0;
-  bool has_projection_bias = _ctx.at(projection_bias_index).shape().dim(0);
-
-  // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
-  // true: no CIFG
-  // false: CIFG
-  // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
-  bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
-
-  // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
-  // But the cell_to_input_weights does not exist in regular CIFG although peephole.
-  // true: peephole
-  // false: no peephole
-  bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights;
-
-  // NOTE Although the projection weights has data the projection bias may not have data.
-  bool has_projection_param = has_projection_weights;
-
-  const auto activation = node.param().activation;
-  const auto cell_clip = cell_threshold;
-  const auto projection_clip = projection_threshold;
-  assert(cell_clip >= 0.f && projection_clip >= 0.f);
-
-  auto scratch_buffer_alloc = _tensor_builder->at(scratch_buffer_index).get();
-  auto output_state_out_alloc = _tensor_builder->at(output_state_out_index).get();
-  auto cell_state_out_alloc = _tensor_builder->at(cell_state_out_index).get();
-  auto output_alloc = _tensor_builder->at(output_index).get();
-
-  auto input_alloc = _tensor_builder->at(input_index).get();
-
-  auto input_to_forget_weights_alloc = _tensor_builder->at(input_to_forget_weights_index).get();
-  auto input_to_cell_weights_alloc = _tensor_builder->at(input_to_cell_weights_index).get();
-  auto input_to_output_weights_alloc = _tensor_builder->at(input_to_output_weights_index).get();
-  auto recurrent_to_forget_weights_alloc =
-      _tensor_builder->at(recurrent_to_forget_weights_index).get();
-  auto recurrent_to_cell_weights_alloc = _tensor_builder->at(recurrent_to_cell_weights_index).get();
-  auto recurrent_to_output_weights_alloc =
-      _tensor_builder->at(recurrent_to_output_weights_index).get();
-
-  auto forget_gate_bias_alloc = _tensor_builder->at(forget_gate_bias_index).get();
-  auto cell_bias_alloc = _tensor_builder->at(cell_bias_index).get();
-  auto output_gate_bias_alloc = _tensor_builder->at(output_gate_bias_index).get();
-  auto output_state_in_alloc = _tensor_builder->at(output_state_in_index).get();
-  auto cell_state_in_alloc = _tensor_builder->at(cell_state_in_index).get();
-
-  auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
-
-  auto fn = std::make_unique<::arm_compute::NELSTMLayer>();
-
-  ::arm_compute::LSTMParams<::arm_compute::ITensor> lstm_params{};
-  if (has_cifg_param)
-  {
-    auto input_to_input_weights_alloc =
-        _tensor_builder->at(input_to_input_weights_index).get(); // optional
-    auto recurrent_to_input_weights_alloc =
-        _tensor_builder->at(recurrent_to_input_weights_index).get(); // optional
-    auto cell_to_input_weights_handle =
-        has_peephole_param ? _tensor_builder->at(cell_to_input_weights_index).get()->handle()
-                           : nullptr; // optional (non-cifg && peephole)
-    auto input_gate_bias_alloc = _tensor_builder->at(input_gate_bias_index).get(); // optional
-    lstm_params.set_cifg_params(input_to_input_weights_alloc->handle(),
-                                recurrent_to_input_weights_alloc->handle(),
-                                cell_to_input_weights_handle, input_gate_bias_alloc->handle());
-  }
-  if (has_peephole_param)
-  {
-    auto cell_to_forget_weights_alloc =
-        _tensor_builder->at(cell_to_forget_weights_index).get(); // optional
-    auto cell_to_output_weights_alloc =
-        _tensor_builder->at(cell_to_output_weights_index).get(); // optional
-    lstm_params.set_peephole_params(cell_to_forget_weights_alloc->handle(),
-                                    cell_to_output_weights_alloc->handle());
-  }
-  if (has_projection_param)
-  {
-    auto projection_weights_alloc = _tensor_builder->at(projection_weights_index).get(); // optional
-    auto projection_bias_handle = has_projection_bias
-                                      ? _tensor_builder->at(projection_bias_index).get()->handle()
-                                      : nullptr; // optional
-    lstm_params.set_projection_params(projection_weights_alloc->handle(), projection_bias_handle);
-  }
-
-  fn->configure(
-      input_alloc->handle(), input_to_forget_weights_alloc->handle(),
-      input_to_cell_weights_alloc->handle(), input_to_output_weights_alloc->handle(),
-      recurrent_to_forget_weights_alloc->handle(), recurrent_to_cell_weights_alloc->handle(),
-      recurrent_to_output_weights_alloc->handle(), forget_gate_bias_alloc->handle(),
-      cell_bias_alloc->handle(), output_gate_bias_alloc->handle(), output_state_in_alloc->handle(),
-      cell_state_in_alloc->handle(), scratch_buffer_alloc->handle(),
-      output_state_out_alloc->handle(), cell_state_out_alloc->handle(), output_alloc->handle(),
-      lstm_params, act_info, cell_clip, projection_clip);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = acl_common::kernelGenLSTM<acl_common::AclFunction, ::arm_compute::ITensor,
+                                         ::arm_compute::NELSTMLayer>(node, _ctx, _tensor_builder);
  }
  
  void KernelGenerator::visit(const ir::operation::Mul &node)
@@ -970,18 +694,18 @@ void KernelGenerator::visit(const ir::operation::Mul &node)
  
    const auto activation = node.param().activation;
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->at(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
  
    auto fn = std::make_unique<::arm_compute::NEPixelWiseMultiplication>();
  
    // RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO
-  fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(), 1.0, // scale
+  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
                  arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
  
    _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
  }
  
  void KernelGenerator::visit(const ir::operation::Neg &node)
@@ -989,12 +713,12 @@ void KernelGenerator::visit(const ir::operation::Neg &node)
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
  
    auto fn = std::make_unique<::arm_compute::NENegLayer>();
  
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
  
    auto acl_fn = asAclFunction(std::move(fn));
  
@@ -1030,12 +754,12 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
    for (const auto &input_index : input_indexes)
    {
      size_t input_rank = _ctx.at(input_index).shape().rank();
-    const auto &input_alloc = _tensor_builder->at(input_index);
-    assert(input_rank == input_alloc->num_dimensions());
-    if (input_rank != input_alloc->info()->num_dimensions())
+    const auto &input_tensor = _tensor_builder->at(input_index);
+    assert(input_rank == input_tensor->num_dimensions());
+    if (input_rank != input_tensor->info()->num_dimensions())
      {
        // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
-      input_alloc->info()->set_tensor_shape(acl_common::asTensorShape(
+      input_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
            _ctx.at(input_index).shape(), _current_op_seq_layout, backend_layout, false));
      }
    }
@@ -1094,8 +818,8 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
    const auto ofm_idx{node.getOutputs().at(0)};
    const auto ifm_idx{node.getInputs().at(0)};
    const auto permute_type = node.getPermuteType();
-  auto ofm_alloc = _tensor_builder->at(ofm_idx).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_idx).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
    const auto rank = _ctx.at(ofm_idx).shape().rank();
    assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
  
@@ -1108,7 +832,7 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
  
      auto l = std::make_unique<::arm_compute::NEPermute>();
  
-    l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv);
+    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
  
      fn = std::move(l);
    }
@@ -1119,7 +843,7 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
  
      auto l = std::make_unique<::arm_compute::NEPermute>();
  
-    l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv);
+    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
  
      fn = std::move(l);
    }
@@ -1127,7 +851,7 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
    {
      auto l = std::make_unique<::arm_compute::NECopy>();
  
-    l->configure(ifm_alloc->handle(), ofm_alloc->handle());
+    l->configure(ifm_tensor->handle(), ofm_tensor->handle());
  
      fn = std::move(l);
    }
@@ -1143,15 +867,15 @@ void KernelGenerator::visit(const ir::operation::PReLU &node)
    const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
    const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  auto alpha_alloc = _tensor_builder->at(alpha_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto alpha_tensor = _tensor_builder->at(alpha_index).get();
  
    std::unique_ptr<::arm_compute::IFunction> fn;
  
-  auto l = std::make_unique<::arm_compute::NEPReLU>();
+  auto l = std::make_unique<::arm_compute::NEPReluLayer>();
  
-  l->configure(ifm_alloc->handle(), alpha_alloc->handle(), ofm_alloc->handle());
+  l->configure(ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
  
    fn = std::move(l);
  
@@ -1166,14 +890,14 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
    const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
    const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
  
    // Convert to ACL axes taking into account negative values and possible duplicates.
    const auto &axes = _ctx.at(axes_index);
    const auto input_rank = _ctx.at(input_index).shape().rank();
    const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = input_alloc->layout();
+  const auto backend_layout = input_tensor->layout();
    const auto reduce_axes =
        acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
    const auto reduce_type = node.param().reduce_type;
@@ -1182,11 +906,9 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
    std::unique_ptr<::arm_compute::IFunction> fn;
    if (reduce_type == ir::operation::Reduce::ReduceType::MEAN)
    {
-    // NOTE NEReduceMean has a bug that does not support NHWC layout
-    //      NEReduceMean intermediate tensors are always NCHW layout
-    auto l = std::make_unique<::arm_compute::NEReduceMeanEx>();
+    auto l = std::make_unique<::arm_compute::NEReduceMean>();
  
-    l->configure(input_alloc->handle(), reduce_axes, keep_dims, output_alloc->handle());
+    l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle());
  
      fn = std::move(l);
    }
@@ -1194,7 +916,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
    {
      auto l = std::make_unique<::arm_compute::NEReduceSum>();
  
-    l->configure(input_alloc->handle(), reduce_axes, keep_dims, output_alloc->handle());
+    l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle());
  
      fn = std::move(l);
    }
@@ -1202,7 +924,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
    {
      auto l = std::make_unique<::arm_compute::NEReduceOperation>();
  
-    l->configure(input_alloc->handle(), reduce_axes, keep_dims, output_alloc->handle(),
+    l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle(),
                   acl_common::convertReduceType(reduce_type));
  
      fn = std::move(l);
@@ -1218,15 +940,15 @@ void KernelGenerator::visit(const ir::operation::ReLU &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)};
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
  
    auto fn = std::make_unique<arm_compute::NEActivationLayer>();
  
    const ::arm_compute::ActivationLayerInfo act_info{
        ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
  
-  fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
  
    auto acl_fn = asAclFunction(std::move(fn));
  
@@ -1238,15 +960,15 @@ void KernelGenerator::visit(const ir::operation::ReLU1 &node)
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
  
    const ::arm_compute::ActivationLayerInfo act_info{
        ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
  
    auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
  
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
  
    auto acl_fn = asAclFunction(std::move(fn));
  
@@ -1258,15 +980,15 @@ void KernelGenerator::visit(const ir::operation::ReLU6 &node)
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
  
    const ::arm_compute::ActivationLayerInfo act_info{
        ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f};
  
    auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
  
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
  
    auto acl_fn = asAclFunction(std::move(fn));
  
@@ -1278,13 +1000,13 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
  
    // NOTE This operation must not be changed the layout from frontend to backend
    //      So, PermutationOperationPass makes layouts of frontend and backend the same.
    const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = output_alloc->layout();
+  const auto backend_layout = output_tensor->layout();
    assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) ||
           frontend_layout == backend_layout);
    UNUSED_RELEASE(frontend_layout);
@@ -1292,7 +1014,7 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
  
    auto fn = std::make_unique<arm_compute::NEReshapeLayer>();
  
-  fn->configure(input_alloc->handle(), output_alloc->handle());
+  fn->configure(input_tensor->handle(), output_tensor->handle());
  
    auto acl_fn = asAclFunction(std::move(fn));
  
@@ -1305,12 +1027,12 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
  
    const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
  
    auto fn = std::make_unique<::arm_compute::NEScale>();
  
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(),
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(),
                  ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
                  ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
  
@@ -1334,25 +1056,25 @@ void KernelGenerator::visit(const ir::operation::RNN &node)
  
    const auto activation = node.param().activation;
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto hidden_state_out_alloc = _tensor_builder->at(hidden_state_out_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto hidden_state_out_tensor = _tensor_builder->at(hidden_state_out_index).get();
  
-  auto input_alloc = _tensor_builder->at(input_index).get();
-  auto weights_alloc = _tensor_builder->at(weights_index).get();
-  auto recurrent_weights_alloc = _tensor_builder->at(recurrent_weights_index).get();
-  auto bias_alloc = _tensor_builder->at(bias_index).get();
-  auto hidden_state_in_alloc = _tensor_builder->at(hidden_state_in_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
+  auto weights_tensor = _tensor_builder->at(weights_index).get();
+  auto recurrent_weights_tensor = _tensor_builder->at(recurrent_weights_index).get();
+  auto bias_tensor = _tensor_builder->at(bias_index).get();
+  auto hidden_state_in_tensor = _tensor_builder->at(hidden_state_in_index).get();
    auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
  
    auto copy_layer = std::make_unique<::arm_compute::NECopy>();
-  copy_layer->configure(hidden_state_in_alloc->handle(), hidden_state_out_alloc->handle());
+  copy_layer->configure(hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
    _return_fn = asAclFunction(std::move(copy_layer));
  
-  auto fn = std::make_unique<::arm_compute::NERNNLayerEx>(
+  auto fn = std::make_unique<::arm_compute::NERNNLayer>(
        _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-  fn->configure(input_alloc->handle(), weights_alloc->handle(), recurrent_weights_alloc->handle(),
-                bias_alloc->handle(), hidden_state_out_alloc->handle(), output_alloc->handle(),
-                act_info);
+  fn->configure(input_tensor->handle(), weights_tensor->handle(),
+                recurrent_weights_tensor->handle(), bias_tensor->handle(),
+                hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
    _return_fn = asAclFunction(std::move(fn));
  }
  
@@ -1361,12 +1083,12 @@ void KernelGenerator::visit(const ir::operation::RSQRT &node)
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
  
    auto fn = std::make_unique<::arm_compute::NERsqrtLayer>();
  
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -1383,10 +1105,10 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
    (void)dims;
    (void)ndim;
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
    auto fn = std::make_unique<arm_compute::NEReshapeLayer>();
-  fn->configure(input_alloc->handle(), output_alloc->handle());
+  fn->configure(input_tensor->handle(), output_tensor->handle());
    auto acl_fn = asAclFunction(std::move(fn));
    _return_fn = std::move(acl_fn);
  }
@@ -1396,15 +1118,15 @@ void KernelGenerator::visit(const ir::operation::Tanh &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
  
    auto fn = std::make_unique<arm_compute::NEActivationLayer>();
  
    const ::arm_compute::ActivationLayerInfo act_info{
        ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
  
-  fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
  
    auto acl_fn = asAclFunction(std::move(fn));
  
@@ -1417,13 +1139,25 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
    const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
    const auto beta = node.param().beta;
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
+  const auto frontend_layout = _current_op_seq_layout;
+  const auto backend_layout = input_tensor->layout();
+
+  // Disable applied dim_correction
+  const size_t input_rank = _ctx.at(input_index).shape().rank();
+  if (input_rank != input_tensor->info()->num_dimensions())
+  {
+    // This means that high dimension's value is 1 and input tensor is applied dim_correction
+    const auto input = _ctx.at(input_index);
+    input_tensor->info()->set_tensor_shape(
+        acl_common::asTensorShape(input.shape(), frontend_layout, backend_layout, false));
+  }
  
    auto fn = std::make_unique<::arm_compute::NESoftmaxLayer>(
        _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
  
-  fn->configure(input_alloc->handle(), output_alloc->handle(), beta);
+  fn->configure(input_tensor->handle(), output_tensor->handle(), beta);
  
    auto acl_fn = asAclFunction(std::move(fn));
  
@@ -1438,20 +1172,18 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
        node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
    const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  auto block_size_alloc = _tensor_builder->at(block_size_index).get();
-  auto paddings_alloc = _tensor_builder->at(paddings_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto block_size_tensor = _tensor_builder->at(block_size_index).get();
+  auto paddings_tensor = _tensor_builder->at(paddings_index).get();
  
    assert(_ctx.at(block_size_index).data());
    assert(_ctx.at(paddings_index).data());
  
-  // NESpaceToBatchLayer has a bug that padding's values are 0 even when zero point of QASYMM8 is
-  // not 0.
-  auto fn = std::make_unique<::arm_compute::NESpaceToBatchLayerEx>();
+  auto fn = std::make_unique<::arm_compute::NESpaceToBatchLayer>();
  
-  fn->configure(ifm_alloc->handle(), block_size_alloc->handle(), paddings_alloc->handle(),
-                ofm_alloc->handle());
+  fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
+                ofm_tensor->handle());
  
    auto acl_fn = asAclFunction(std::move(fn));
  
@@ -1465,12 +1197,12 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
  
    auto block_size = node.param().block_size;
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
  
-  auto fn = std::make_unique<::arm_compute::NESpaceToDepthLayerEx>();
+  auto fn = std::make_unique<::arm_compute::NESpaceToDepthLayer>();
  
-  fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), block_size);
+  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), block_size);
  
    auto acl_fn = asAclFunction(std::move(fn));
  
@@ -1489,13 +1221,13 @@ void KernelGenerator::visit(const ir::operation::Split &node)
    for (const auto &output : node.getOutputs())
      output_indexes.emplace_back(output);
  
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  std::vector<arm_compute::ITensor *> output_allocs;
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  std::vector<arm_compute::ITensor *> output_tensors;
    for (const auto &ofm_ind : output_indexes)
-    output_allocs.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
+    output_tensors.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
  
    const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = ifm_alloc->layout();
+  const auto backend_layout = ifm_tensor->layout();
    auto axis = node.param().axis;
    if (axis < 0)
      axis += ifm_rank;
@@ -1503,7 +1235,7 @@ void KernelGenerator::visit(const ir::operation::Split &node)
  
    auto fn = std::make_unique<::arm_compute::NESplit>();
  
-  fn->configure(ifm_alloc->handle(), output_allocs, axis);
+  fn->configure(ifm_tensor->handle(), output_tensors, axis);
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -1513,15 +1245,15 @@ void KernelGenerator::visit(const ir::operation::SQRT &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)};
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
  
    const ::arm_compute::ActivationLayerInfo act_info{
        ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
  
    auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
  
-  fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
+  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
  
    auto acl_fn = asAclFunction(std::move(fn));
  
@@ -1534,13 +1266,13 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
    const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
    const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->at(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
  
    auto fn = std::make_unique<::arm_compute::NEElementwiseSquaredDiff>();
  
-  fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
  
    auto acl_fn = asAclFunction(std::move(fn));
  
@@ -1555,17 +1287,17 @@ void KernelGenerator::visit(const ir::operation::Sub &node)
  
    const auto activation = node.param().activation;
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->at(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
  
    auto fn = std::make_unique<::arm_compute::NEArithmeticSubtraction>();
  
-  fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(),
+  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
                  arm_compute::ConvertPolicy::SATURATE);
  
    _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
  }
  
  void KernelGenerator::visit(const ir::operation::Slice &node)
@@ -1575,10 +1307,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
    const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
    const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
  
-  auto outputData_alloc = _tensor_builder->at(output_index).get();
-  auto inputData_alloc = _tensor_builder->at(input_index).get();
+  auto outputData_tensor = _tensor_builder->at(output_index).get();
+  auto inputData_tensor = _tensor_builder->at(input_index).get();
    const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = inputData_alloc->layout();
+  const auto backend_layout = inputData_tensor->layout();
  
    // Set initializers for indices data such as order of inputData
    int input_rank = _ctx.at(input_index).shape().rank();
@@ -1628,7 +1360,7 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
  
    auto fn = std::make_unique<::arm_compute::NESlice>();
  
-  fn->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set);
+  fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
  
    auto acl_fn = asAclFunction(std::move(fn));
  
@@ -1643,10 +1375,10 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
    const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
    const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
  
-  auto outputData_alloc = _tensor_builder->at(output_index).get();
-  auto inputData_alloc = _tensor_builder->at(input_index).get();
+  auto outputData_tensor = _tensor_builder->at(output_index).get();
+  auto inputData_tensor = _tensor_builder->at(input_index).get();
    const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = inputData_alloc->layout();
+  const auto backend_layout = inputData_tensor->layout();
  
    // Set initializers for indices data such as order of inputData
    int input_rank = _ctx.at(input_index).shape().rank();
@@ -1715,7 +1447,7 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
  
    auto fn = std::make_unique<::arm_compute::NEStridedSlice>();
  
-  fn->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set,
+  fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set,
                  strides_set, begin_mask, end_mask, shrink_axis_mask);
  
    auto acl_fn = asAclFunction(std::move(fn));
@@ -1749,16 +1481,16 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
      invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
    }
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-  auto ker_alloc = _tensor_builder->at(ker_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ker_tensor = _tensor_builder->at(ker_index).get();
  
    const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
  
    auto fn = std::make_unique<::arm_compute::NETransposeConvLayer>();
  
-  fn->configure(ifm_alloc->handle(), ker_alloc->handle(), nullptr, ofm_alloc->handle(), tconv_info,
-                invalid_horizontal, invalid_vertical);
+  fn->configure(ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(),
+                tconv_info, invalid_horizontal, invalid_vertical);
  
    auto acl_fn = asAclFunction(std::move(fn));
  
@@ -1771,10 +1503,10 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
    const auto ifm_idx{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
    const auto &perm{node.param().perm};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_idx).get();
-  const auto ifm_alloc = _tensor_builder->at(ifm_idx).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
+  const auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
    const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = ifm_alloc->layout();
+  const auto backend_layout = ifm_tensor->layout();
  
    const auto rank = _ctx.at(ifm_idx).shape().rank();
    std::vector<std::int32_t> pv(perm.cbegin(), perm.cend());
@@ -1783,11 +1515,11 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
  
    std::unique_ptr<::arm_compute::IFunction> fn;
  
-  if (ifm_alloc->num_dimensions() <= 2 && ofm_alloc->num_dimensions() <= 2)
+  if (ifm_tensor->num_dimensions() <= 2 && ofm_tensor->num_dimensions() <= 2)
    {
      auto l = std::make_unique<::arm_compute::NETranspose>();
  
-    l->configure(ifm_alloc->handle(), ofm_alloc->handle());
+    l->configure(ifm_tensor->handle(), ofm_tensor->handle());
  
      fn = std::move(l);
    }
@@ -1795,7 +1527,7 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
    {
      auto l = std::make_unique<::arm_compute::NEPermute>();
  
-    l->configure(ifm_alloc->handle(), ofm_alloc->handle(), backend_pv);
+    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), backend_pv);
  
      fn = std::move(l);
    }
@@ -1834,13 +1566,13 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
    for (const auto &output_index : output_indexes)
    {
      size_t output_rank = _ctx.at(output_index).shape().rank();
-    const auto &output_alloc = _tensor_builder->at(output_index);
-    orig_outputs_acl_tensor_shapes.emplace_back(output_alloc->info()->tensor_shape());
-    assert(output_rank == output_alloc->num_dimensions());
-    if (output_rank != output_alloc->info()->num_dimensions())
+    const auto &output_tensor = _tensor_builder->at(output_index);
+    orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape());
+    assert(output_rank == output_tensor->num_dimensions());
+    if (output_rank != output_tensor->info()->num_dimensions())
      {
        // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
-      output_alloc->info()->set_tensor_shape(acl_common::asTensorShape(
+      output_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
            _ctx.at(output_index).shape(), _current_op_seq_layout, backend_layout, false));
      }
    }
@@ -1858,17 +1590,17 @@ void KernelGenerator::visit(const ir::operation::Add &node)
  
    const auto activation = node.param().activation;
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->at(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
  
    auto fn = std::make_unique<::arm_compute::NEArithmeticAddition>();
  
-  fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(),
+  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
                  arm_compute::ConvertPolicy::SATURATE);
  
    _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
  }
  
  void KernelGenerator::visit(const ir::operation::Div &node)
@@ -1879,16 +1611,16 @@ void KernelGenerator::visit(const ir::operation::Div &node)
  
    const auto activation = node.param().activation;
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->at(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
  
    auto fn = std::make_unique<::arm_compute::NEElementwiseDivision>();
  
-  fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
  
    _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_alloc->handle()));
+      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
  }
  
  void KernelGenerator::visit(const ir::operation::Exp &node)
@@ -1896,12 +1628,12 @@ void KernelGenerator::visit(const ir::operation::Exp &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
  
    auto fn = std::make_unique<::arm_compute::NEExpLayer>();
  
-  fn->configure(input_alloc->handle(), output_alloc->handle());
+  fn->configure(input_tensor->handle(), output_tensor->handle());
  
    auto acl_fn = asAclFunction(std::move(fn));
  
@@ -1913,12 +1645,12 @@ void KernelGenerator::visit(const ir::operation::ExpandDims &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input_tensor = _tensor_builder->at(input_index).get();
  
    auto fn = std::make_unique<::arm_compute::NEReshapeLayer>();
  
-  fn->configure(input_alloc->handle(), output_alloc->handle());
+  fn->configure(input_tensor->handle(), output_tensor->handle());
  
    auto acl_fn = asAclFunction(std::move(fn));
  
@@ -1933,13 +1665,13 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
  
    const auto comparison_type = node.param().comparison_type;
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input0_alloc = _tensor_builder->at(input0_index).get();
-  auto input1_alloc = _tensor_builder->at(input1_index).get();
+  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto input0_tensor = _tensor_builder->at(input0_index).get();
+  auto input1_tensor = _tensor_builder->at(input1_index).get();
  
    auto fn = std::make_unique<::arm_compute::NEElementwiseComparison>();
  
-  fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle(),
+  fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
                  (arm_compute::ComparisonOperation)comparison_type);
  
    auto acl_fn = asAclFunction(std::move(fn));
@@ -1953,13 +1685,13 @@ void KernelGenerator::visit(const ir::operation::Min &node)
    const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
    const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->at(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
  
    auto fn = std::make_unique<::arm_compute::NEElementwiseMin>();
  
-  fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
  
    auto acl_fn = asAclFunction(std::move(fn));
  
@@ -1972,13 +1704,13 @@ void KernelGenerator::visit(const ir::operation::Max &node)
    const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
    const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
  
-  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->at(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
  
    auto fn = std::make_unique<::arm_compute::NEElementwiseMax>();
  
-  fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
+  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
  
    auto acl_fn = asAclFunction(std::move(fn));
  
diff --git a/runtime/onert/backend/cpu/Backend.h b/runtime/onert/backend/cpu/Backend.h

index 2daf06a..56bd352 100644 (file)
--- a/runtime/onert/backend/cpu/Backend.h
+++ b/runtime/onert/backend/cpu/Backend.h
@@ -17,6 +17,7 @@
  #ifndef __ONERT_BACKEND_CPU_BACKEND_H__
  #define __ONERT_BACKEND_CPU_BACKEND_H__
  
+#include "BackendContext.h"
  #include "Config.h"
  #include "ConstantInitializer.h"
  #include "KernelGenerator.h"
@@ -39,9 +40,9 @@ public:
  
    std::shared_ptr<IConfig> config() const override { return _config; }
  
-  std::unique_ptr<BackendContext> newContext(const ir::Graph &graph,
-                                             const std::shared_ptr<custom::IKernelBuilder> &kb,
-                                             bool) const override
+  std::unique_ptr<onert::backend::BackendContext>
+  newContext(const ir::Graph &graph, const std::shared_ptr<custom::IKernelBuilder> &kb,
+             bool) const override
    {
      const auto &operands = graph.operands();
      const auto &operations = graph.operations();
@@ -49,7 +50,8 @@ public:
      auto tb = std::make_shared<TensorBuilder>();
      context->tensor_builder = tb;
      context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
-    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, kb);
+    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, kb,
+                                                            context->external_context());
      context->tensor_register = nullptr;
      context->optimizer = nullptr;
      return context;
diff --git a/runtime/onert/backend/cpu/BackendContext.h b/runtime/onert/backend/cpu/BackendContext.h

new file mode 100644 (file)

index 0000000..f314a8e
--- /dev/null
+++ b/runtime/onert/backend/cpu/BackendContext.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_CPU_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include "ExternalContext.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+  BackendContext(const Backend *backend, const ir::Graph *graph,
+                 std::shared_ptr<ITensorBuilder> tensor_builder = nullptr,
+                 std::shared_ptr<IConstantInitializer> constant_initializer = nullptr,
+                 std::shared_ptr<IKernelGenerator> kernel_gen = nullptr,
+                 std::shared_ptr<ITensorRegister> tensor_register = nullptr,
+                 std::shared_ptr<IOptimizer> optimizer = nullptr)
+      : onert::backend::BackendContext(backend, graph, tensor_builder, constant_initializer,
+                                       kernel_gen, tensor_register, optimizer),
+        _external_context(new ExternalContext)
+  {
+  }
+
+  std::shared_ptr<ExternalContext> external_context() { return _external_context; }
+
+private:
+  // NOTE ruy context has a thread pool, and when multiple ruy contexts are created,
+  //      the thread pool is also created in duplicate
+  // TODO Create one ruy context for session
+  std::shared_ptr<ExternalContext> _external_context;
+};
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/cpu/CMakeLists.txt b/runtime/onert/backend/cpu/CMakeLists.txt

index e997a22..01a3cd1 100644 (file)
--- a/runtime/onert/backend/cpu/CMakeLists.txt
+++ b/runtime/onert/backend/cpu/CMakeLists.txt
@@ -1,5 +1,7 @@
  set(LIB_ONERT_BACKEND_CPU onert_backend_cpu)
  
+nnfw_find_package(Ruy REQUIRED)
+
  file(GLOB_RECURSE SOURCES "*.cc")
  
  add_library(${LIB_ONERT_BACKEND_CPU} SHARED ${SOURCES})
@@ -8,6 +10,8 @@ target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_lib_cker)
  target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE onert_core)
  target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_common)
  target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_coverage)
+target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE ruy)
+target_link_libraries(${LIB_ONERT_BACKEND_CPU} INTERFACE ruy_instrumentation)
  
  set_target_properties(${LIB_ONERT_BACKEND_CPU} PROPERTIES OUTPUT_NAME backend_cpu)
  
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.cc b/runtime/onert/backend/cpu/ConstantInitializer.cc

index 71e3136..deb27f0 100644 (file)
--- a/runtime/onert/backend/cpu/ConstantInitializer.cc
+++ b/runtime/onert/backend/cpu/ConstantInitializer.cc
@@ -15,6 +15,7 @@
   */
  
  #include "ConstantInitializer.h"
+#include "Tensor.h"
  
  namespace onert
  {
@@ -30,39 +31,61 @@ ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
    // DO NOTHING
  }
  
+void ConstantInitializer::registerDefaultInitializer(const ir::OperandIndex &index,
+                                                     const ir::Operand &obj)
+{
+  registerExternalInitializer(index, obj);
+}
+
+void ConstantInitializer::registerExternalInitializer(const ir::OperandIndex &index,
+                                                      const ir::Operand &obj)
+{
+  // For only CONSTANTS
+  // TODO Add to check if tensor has been allocated
+  if (!obj.isConstant())
+    return;
+
+  _init_map[index] = [](const onert::ir::Operand &model_obj, onert::backend::ITensor &itensor) {
+    auto data = model_obj.shareData();
+    assert(data && data->base());
+    ExternalTensor &tensor = dynamic_cast<ExternalTensor &>(itensor);
+    tensor.setData(data);
+  };
+}
+
  void ConstantInitializer::visit(const ir::operation::Conv2D &node)
  {
    const auto &kernel_index = node.getInputs().at(ir::operation::Conv2D::KERNEL);
    const auto &kernel_obj = _operands.at(kernel_index);
-  registerCopyInitializer(kernel_index, kernel_obj);
+  registerExternalInitializer(kernel_index, kernel_obj);
  
    const auto &bias_index = node.getInputs().at(ir::operation::Conv2D::BIAS);
    const auto &bias_obj = _operands.at(bias_index);
-  registerCopyInitializer(bias_index, bias_obj);
+  registerExternalInitializer(bias_index, bias_obj);
  }
  
  void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
  {
    const auto &kernel_index = node.getInputs().at(ir::operation::DepthwiseConv2D::KERNEL);
    const auto &kernel_obj = _operands.at(kernel_index);
-  registerCopyInitializer(kernel_index, kernel_obj);
+  registerExternalInitializer(kernel_index, kernel_obj);
  
    const auto &bias_index = node.getInputs().at(ir::operation::DepthwiseConv2D::BIAS);
    const auto &bias_obj = _operands.at(bias_index);
-  registerCopyInitializer(bias_index, bias_obj);
+  registerExternalInitializer(bias_index, bias_obj);
  }
  
  void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
  {
    const auto &weight_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
    const auto &weight_obj = _operands.at(weight_index);
-  registerCopyInitializer(weight_index, weight_obj);
+  registerExternalInitializer(weight_index, weight_obj);
  
    const auto &bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
    if (!bias_index.undefined())
    {
      const auto &bias_obj = _operands.at(bias_index);
-    registerCopyInitializer(bias_index, bias_obj);
+    registerExternalInitializer(bias_index, bias_obj);
    }
  }
  
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.h b/runtime/onert/backend/cpu/ConstantInitializer.h

index bd06c64..de03a69 100644 (file)
--- a/runtime/onert/backend/cpu/ConstantInitializer.h
+++ b/runtime/onert/backend/cpu/ConstantInitializer.h
@@ -36,6 +36,15 @@ public:
                        const std::shared_ptr<TensorBuilder> &tensor_builder);
  
  public:
+  void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj) override;
+
+  // TODO: For now the only cpu backend supports constant tensor to use data from external
+  // If the other backend supports (to do this,
+  // ExternalTensor should be abstract such as IExternal, maybe),
+  // this can be an interface of IConstantInitializer
+  void registerExternalInitializer(const ir::OperandIndex &, const ir::Operand &);
+
+public:
    void visit(const ir::operation::Conv2D &) override;
    void visit(const ir::operation::DepthwiseConv2D &) override;
    void visit(const ir::operation::FullyConnected &) override;
diff --git a/runtime/onert/backend/cpu/ExternalContext.h b/runtime/onert/backend/cpu/ExternalContext.h

new file mode 100644 (file)

index 0000000..6627412
--- /dev/null
+++ b/runtime/onert/backend/cpu/ExternalContext.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_EXTERNAL_CONTEXT_H__
+#define __ONERT_BACKEND_CPU_EXTERNAL_CONTEXT_H__
+
+#include <backend/IExternalContext.h>
+#include <util/ConfigSource.h>
+#include <ruy/context.h>
+
+namespace
+{
+const int kDefaultNumThreadpoolThreads = 1;
+}
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+class ExternalContext : public IExternalContext
+{
+public:
+  ExternalContext() : _ruy_context(new ruy::Context)
+  {
+    setMaxNumThreads(onert::util::getConfigInt(onert::util::config::RUY_THREADS));
+#ifdef USE_RUY_GEMV
+    _ruy_context->cache_policy = ruy::kCacheLHSOnNarrowMul;
+#endif
+  }
+
+  void setMaxNumThreads(int max_num_threads)
+  {
+    const int target_num_threads =
+        max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads;
+    _ruy_context->max_num_threads = target_num_threads;
+  }
+
+  ruy::Context *ruy_context() const { return _ruy_context.get(); }
+
+private:
+  const std::unique_ptr<ruy::Context> _ruy_context;
+};
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_EXTERNAL_CONTEXT_H__
diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc

index 72f9606..7939fe8 100644 (file)
--- a/runtime/onert/backend/cpu/KernelGenerator.cc
+++ b/runtime/onert/backend/cpu/KernelGenerator.cc
@@ -20,6 +20,7 @@
  #include "ops/AddLayer.h"
  #include "ops/ArgMinMaxLayer.h"
  #include "ops/AvgPoolLayer.h"
+#include "ops/BatchToSpaceNDLayer.h"
  #include "ops/CastLayer.h"
  #include "ops/CompareLayer.h"
  #include "ops/ConcatLayer.h"
@@ -49,7 +50,9 @@
  #include "ops/RangeLayer.h"
  #include "ops/ReduceLayer.h"
  #include "ops/ReLULayer.h"
+#include "ops/ReLU6Layer.h"
  #include "ops/ReshapeLayer.h"
+#include "ops/ResizeBilinearLayer.h"
  #include "ops/ReverseLayer.h"
  #include "ops/RoundLayer.h"
  #include "ops/RsqrtLayer.h"
@@ -60,7 +63,9 @@
  #include "ops/SoftMaxLayer.h"
  #include "ops/StridedSliceLayer.h"
  #include "ops/SpaceToBatchNDLayer.h"
+#include "ops/SpaceToDepthLayer.h"
  #include "ops/SplitLayer.h"
+#include "ops/SplitVLayer.h"
  #include "ops/SubLayer.h"
  #include "ops/TanhLayer.h"
  #include "ops/TileLayer.h"
@@ -70,11 +75,14 @@
  #include "ops/ZerosLikeLayer.h"
  #include "ops/SquaredDiffLayer.h"
  #include "ops/LogicalOrLayer.h"
+#include "ops/L2NormLayer.h"
  #include "ops/MatrixBandPartLayer.h"
  #include "ops/BatchMatMulLayer.h"
  #include "ops/BroadcastToLayer.h"
  #include "ops/FusedBatchNormLayer.h"
  #include "ops/LogSoftMaxLayer.h"
+#include "ops/QuantizeLayer.h"
+#include "ops/StatelessRandomUniformLayer.h"
  
  #include <backend/Backend.h>
  #include <backend/IConfig.h>
@@ -119,9 +127,11 @@ ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_
  KernelGenerator::KernelGenerator(
      const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
      const std::shared_ptr<TensorBuilder> &tensor_builder,
-    const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder)
+    const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
+    const std::shared_ptr<ExternalContext> &external_context)
      : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
-      _kernel_builder(kernel_builder), _current_op_seq_layout(ir::Layout::UNKNOWN)
+      _kernel_builder(kernel_builder), _current_op_seq_layout(ir::Layout::UNKNOWN),
+      _external_context(external_context)
  {
    // DO NOTHING
  }
@@ -184,10 +194,10 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
    const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
    const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
  
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
-  auto ker_alloc = _tensor_builder->portableAt(ker_index).get();
-  auto bias_alloc = _tensor_builder->portableAt(bias_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
+  auto ker_tensor = _tensor_builder->portableAt(ker_index).get();
+  auto bias_tensor = _tensor_builder->portableAt(bias_index).get();
  
    const auto stride = node.param().stride;
    const auto activation = node.param().activation;
@@ -196,9 +206,9 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
  
    if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
    {
-    fn->configure(ifm_alloc, ker_alloc, bias_alloc, param_padding.type, param_padding.param.left,
+    fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
                    param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
-                  stride.horizontal, stride.vertical, activation, ofm_alloc);
+                  stride.horizontal, stride.vertical, activation, ofm_tensor);
  
      _return_fn = std::move(fn);
      return;
@@ -213,9 +223,9 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
    const auto padding =
        ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height);
  
-  fn->configure(ifm_alloc, ker_alloc, bias_alloc, param_padding.type, padding.left, padding.right,
-                padding.top, padding.bottom, stride.horizontal, stride.vertical, activation,
-                ofm_alloc);
+  fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
+                padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
+                activation, ofm_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -241,16 +251,16 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
    const auto multiplier = node.param().multiplier;
    const auto activation = node.param().activation;
  
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
-  auto ker_alloc = _tensor_builder->portableAt(ker_index).get();
-  auto bias_alloc = _tensor_builder->portableAt(bias_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
+  auto ker_tensor = _tensor_builder->portableAt(ker_index).get();
+  auto bias_tensor = _tensor_builder->portableAt(bias_index).get();
  
    auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>();
  
-  fn->configure(ifm_alloc, ker_alloc, bias_alloc, padding.left, padding.right, padding.top,
+  fn->configure(ifm_tensor, ker_tensor, bias_tensor, padding.left, padding.right, padding.top,
                  padding.bottom, stride.horizontal, stride.vertical, multiplier, activation,
-                ofm_alloc);
+                ofm_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -270,13 +280,13 @@ void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
        ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
    const auto activation = node.param().activation;
  
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
  
    auto fn = std::make_unique<ops::MaxPoolLayer>();
  
-  fn->configure(ifm_alloc, padding.left, padding.right, padding.top, padding.bottom,
-                stride.horizontal, stride.vertical, kw, kh, activation, ofm_alloc);
+  fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
+                stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -295,13 +305,13 @@ void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
        ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
    const auto activation = node.param().activation;
  
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
  
    auto fn = std::make_unique<ops::AvgPoolLayer>();
  
-  fn->configure(ifm_alloc, padding.left, padding.right, padding.top, padding.bottom,
-                stride.horizontal, stride.vertical, kw, kh, activation, ofm_alloc);
+  fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
+                stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -313,7 +323,7 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
    const auto rank = _ctx.at(ofm_index).shape().rank();
    const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
  
-  auto output_alloc = _tensor_builder->portableAt(ofm_index).get();
+  auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
  
    std::vector<const IPortableTensor *> input_tensors;
    for (auto &ifm_idx : node.getInputs())
@@ -321,7 +331,33 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
  
    auto fn = std::make_unique<ops::ConcatLayer>();
  
-  fn->configure(input_tensors, axis, output_alloc);
+  fn->configure(input_tensors, axis, output_tensor);
+
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ir::operation::BatchToSpaceND::INPUT)};
+  const auto block_size_index{node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE)};
+
+  auto output_alloc = _tensor_builder->portableAt(output_index).get();
+  auto input_alloc = _tensor_builder->portableAt(input_index).get();
+  auto block_size_alloc = _tensor_builder->portableAt(block_size_index).get();
+
+  auto fn = std::make_unique<ops::BatchToSpaceNDLayer>();
+
+  IPortableTensor *crops_alloc = nullptr;
+  const auto NNApiInputs = 2;
+
+  if (node.getInputs().size() != NNApiInputs)
+  {
+    const auto crops_data_index{node.getInputs().at(ir::operation::BatchToSpaceND::CROPS_DATA)};
+    crops_alloc = _tensor_builder->portableAt(crops_data_index).get();
+  }
+
+  fn->configure(input_alloc, output_alloc, block_size_alloc, crops_alloc);
  
    _return_fn = std::move(fn);
  }
@@ -332,13 +368,13 @@ void KernelGenerator::visit(const ir::operation::Fill &node)
    const auto input_index{node.getInputs().at(ir::operation::Fill::Input::INPUT)};
    const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
  
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
-  auto value_alloc = _tensor_builder->portableAt(value_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto value_tensor = _tensor_builder->portableAt(value_index).get();
  
    auto fn = std::make_unique<ops::FillLayer>();
  
-  fn->configure(input_alloc, value_alloc, output_alloc);
+  fn->configure(input_tensor, value_tensor, output_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -353,15 +389,16 @@ void KernelGenerator::visit(const ir::operation::FullyConnected &node)
    const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
    const auto activation = node.param().activation;
  
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
-  auto weight_alloc = _tensor_builder->portableAt(weight_index).get();
-  auto bias_alloc =
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto weight_tensor = _tensor_builder->portableAt(weight_index).get();
+  auto bias_tensor =
        bias_index.undefined() ? nullptr : _tensor_builder->portableAt(bias_index).get();
  
    auto fn = std::make_unique<ops::FullyConnectedLayer>();
  
-  fn->configure(input_alloc, weight_alloc, bias_alloc, activation, output_alloc);
+  fn->configure(input_tensor, weight_tensor, bias_tensor, activation, output_tensor,
+                _external_context);
  
    _return_fn = std::move(fn);
  }
@@ -371,21 +408,21 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
  
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
  
    // optional 2nd input
-  IPortableTensor *shape_alloc = nullptr;
+  IPortableTensor *shape_tensor = nullptr;
  
    if (node.getInputs().size() == 2)
    {
      const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
-    shape_alloc = _tensor_builder->portableAt(shape_index).get();
+    shape_tensor = _tensor_builder->portableAt(shape_index).get();
    }
  
    auto fn = std::make_unique<ops::ReshapeLayer>();
  
-  fn->configure(input_alloc, shape_alloc, output_alloc);
+  fn->configure(input_tensor, shape_tensor, output_tensor);
    _return_fn = std::move(fn);
  }
  
@@ -394,13 +431,13 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
  
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
  
    // Squeeze can share same kernel with reshape
    auto fn = std::make_unique<ops::ReshapeLayer>();
  
-  fn->configure(input_alloc, nullptr, output_alloc);
+  fn->configure(input_tensor, nullptr, output_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -412,12 +449,12 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
  
    const auto beta = node.param().beta;
  
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
  
    auto fn = std::make_unique<ops::SoftMaxLayer>();
  
-  fn->configure(input_alloc, beta, output_alloc);
+  fn->configure(input_tensor, beta, output_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -430,13 +467,13 @@ void KernelGenerator::visit(const ir::operation::Add &node)
  
    const auto activation = node.param().activation;
  
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
  
    auto fn = std::make_unique<ops::AddLayer>();
  
-  fn->configure(lhs_alloc, rhs_alloc, activation, ofm_alloc);
+  fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -447,15 +484,15 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
    const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
    const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
  
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
  
    auto comparison_type = node.param().comparison_type;
  
    auto fn = std::make_unique<ops::CompareLayer>();
  
-  fn->configure(lhs_alloc, rhs_alloc, comparison_type, ofm_alloc);
+  fn->configure(lhs_tensor, rhs_tensor, comparison_type, ofm_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -466,11 +503,11 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
    const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
    const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
  
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
-  auto indices_alloc = _tensor_builder->portableAt(indices_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto indices_tensor = _tensor_builder->portableAt(indices_index).get();
  
-  const auto backend_layout = output_alloc->layout();
+  const auto backend_layout = output_tensor->layout();
    UNUSED_RELEASE(backend_layout);
  
    // NOTE The frontend layout and backend layout must be the same for this operation.
@@ -481,8 +518,8 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
    //      a model. For example, if a model in NHWC has this operation as output rank == 4, indices
    //      rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
    //      and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
-  assert(backend_layout == input_alloc->layout());
-  assert(backend_layout == indices_alloc->layout());
+  assert(backend_layout == input_tensor->layout());
+  assert(backend_layout == indices_tensor->layout());
    const auto &input_shape = _ctx.at(input_index).shape();
    UNUSED_RELEASE(input_shape);
    assert(input_shape.rank() < 4 || _current_op_seq_layout == backend_layout);
@@ -492,7 +529,7 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
  
    auto fn = std::make_unique<ops::GatherLayer>();
  
-  fn->configure(input_alloc, indices_alloc, output_alloc, axis_value);
+  fn->configure(input_tensor, indices_tensor, output_tensor, axis_value);
  
    _return_fn = std::move(fn);
  }
@@ -506,13 +543,13 @@ void KernelGenerator::visit(const ir::operation::Sub &node)
  
    const auto activation = node.param().activation;
  
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
  
    auto fn = std::make_unique<ops::SubLayer>();
  
-  fn->configure(lhs_alloc, rhs_alloc, activation, ofm_alloc);
+  fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -526,13 +563,13 @@ void KernelGenerator::visit(const ir::operation::Mul &node)
  
    const auto activation = node.param().activation;
  
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
  
    auto fn = std::make_unique<ops::MulLayer>();
  
-  fn->configure(lhs_alloc, rhs_alloc, activation, ofm_alloc);
+  fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -547,18 +584,18 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
  
    const auto axis = node.param().axis;
  
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto indices_alloc = _tensor_builder->portableAt(indices_index).get();
-  auto depth_alloc = _tensor_builder->portableAt(depth_index).get();
-  auto onvalue_alloc = _tensor_builder->portableAt(onvalue_index).get();
-  auto offvalue_alloc = _tensor_builder->portableAt(offvalue_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto indices_tensor = _tensor_builder->portableAt(indices_index).get();
+  auto depth_tensor = _tensor_builder->portableAt(depth_index).get();
+  auto onvalue_tensor = _tensor_builder->portableAt(onvalue_index).get();
+  auto offvalue_tensor = _tensor_builder->portableAt(offvalue_index).get();
  
-  assert(indices_alloc->data_type() == OperandType::INT32);
-  assert(axis <= static_cast<int>(indices_alloc->num_dimensions()));
+  assert(indices_tensor->data_type() == OperandType::INT32);
+  assert(axis <= static_cast<int>(indices_tensor->num_dimensions()));
  
    auto fn = std::make_unique<ops::OneHotLayer>();
  
-  fn->configure(indices_alloc, depth_alloc, onvalue_alloc, offvalue_alloc, output_alloc, axis);
+  fn->configure(indices_tensor, depth_tensor, onvalue_tensor, offvalue_tensor, output_tensor, axis);
  
    _return_fn = std::move(fn);
  }
@@ -572,13 +609,13 @@ void KernelGenerator::visit(const ir::operation::Div &node)
  
    const auto activation = node.param().activation;
  
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
  
    auto fn = std::make_unique<ops::DivLayer>();
  
-  fn->configure(lhs_alloc, rhs_alloc, activation, ofm_alloc);
+  fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -587,16 +624,16 @@ void KernelGenerator::visit(const ir::operation::Einsum &node)
  {
    const auto ofm_index{node.getOutputs().at(0)};
  
-  auto output_alloc = _tensor_builder->portableAt(ofm_index).get();
-  std::vector<const IPortableTensor *> input_allocs;
+  auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+  std::vector<const IPortableTensor *> input_tensors;
    for (auto &ifm_idx : node.getInputs())
-    input_allocs.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+    input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
  
    const auto equation = node.param().equation;
  
    auto fn = std::make_unique<ops::EinsumLayer>();
  
-  fn->configure(input_allocs, equation, output_alloc);
+  fn->configure(input_tensors, equation, output_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -605,14 +642,14 @@ void KernelGenerator::visit(const ir::operation::Custom &node)
  {
    auto fill_op_info = [&](const ir::OperandIndexSequence &opSeq,
                            std::vector<custom::TypeInfo> &types,
-                          std::vector<std::shared_ptr<IPortableTensor>> &allocs) {
+                          std::vector<std::shared_ptr<IPortableTensor>> &tensors) {
      for (auto &idx : opSeq)
      {
        const auto &operand = _ctx.at(idx);
        // TODO make sure using `_current_op_seq_layout` is correct for custom operations
        types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
-      auto in_alloc = _tensor_builder->portableAt(idx);
-      allocs.emplace_back(in_alloc);
+      auto in_tensor = _tensor_builder->portableAt(idx);
+      tensors.emplace_back(in_tensor);
      }
    };
  
@@ -634,12 +671,12 @@ void KernelGenerator::visit(const ir::operation::Exp &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
  
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
  
    auto fn = std::make_unique<ops::ExpLayer>();
  
-  fn->configure(input_alloc, output_alloc);
+  fn->configure(input_tensor, output_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -650,13 +687,13 @@ void KernelGenerator::visit(const ir::operation::ExpandDims &node)
    const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
    const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
  
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
-  auto axis_alloc = _tensor_builder->portableAt(axis_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto axis_tensor = _tensor_builder->portableAt(axis_index).get();
  
    auto fn = std::make_unique<ops::ExpandDimsLayer>();
  
-  fn->configure(input_alloc, axis_alloc, output_alloc);
+  fn->configure(input_tensor, axis_tensor, output_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -666,12 +703,12 @@ void KernelGenerator::visit(const ir::operation::Logistic &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
  
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
  
    auto fn = std::make_unique<ops::LogisticLayer>();
  
-  fn->configure(input_alloc, output_alloc);
+  fn->configure(input_tensor, output_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -681,12 +718,12 @@ void KernelGenerator::visit(const ir::operation::Tanh &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
  
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
  
    auto fn = std::make_unique<ops::TanhLayer>();
  
-  fn->configure(input_alloc, output_alloc);
+  fn->configure(input_tensor, output_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -700,7 +737,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
  
    assert(-rank <= axis && axis < rank);
  
-  auto output_alloc = _tensor_builder->portableAt(ofm_index).get();
+  auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
  
    std::vector<const IPortableTensor *> input_tensors;
    for (auto &ifm_idx : node.getInputs())
@@ -708,7 +745,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
  
    auto fn = std::make_unique<ops::PackLayer>();
  
-  fn->configure(input_tensors, axis, output_alloc);
+  fn->configure(input_tensors, axis, output_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -722,7 +759,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
  
    assert(rank == 0 || (-rank <= axis && axis < rank));
  
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
  
    std::vector<IPortableTensor *> output_tensors;
    for (auto &output_idx : node.getOutputs())
@@ -732,7 +769,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
  
    uint32_t axis_resolved = (axis < 0 ? axis + rank : axis);
  
-  fn->configure(input_alloc, axis_resolved, node.param().num, output_tensors);
+  fn->configure(input_tensor, axis_resolved, node.param().num, output_tensors);
  
    _return_fn = std::move(fn);
  }
@@ -751,8 +788,16 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
  
    auto fn = std::make_unique<ops::PadLayer>();
  
-  fn->configure(input, output, pad_base, pad_rank);
+  bool isPadV2 = node.getInputs().size() == 3 ? true : false;
+  const void *value = nullptr;
+
+  if (isPadV2)
+  {
+    const auto value_index{node.getInputs().at(ir::operation::Pad::Input::VALUE)};
+    value = reinterpret_cast<const void *>(_ctx.at(value_index).data()->base());
+  }
  
+  fn->configure(input, output, pad_base, pad_rank, value);
    _return_fn = std::move(fn);
  }
  
@@ -762,13 +807,13 @@ void KernelGenerator::visit(const ir::operation::Max &node)
    const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
    const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
  
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
  
    auto fn = std::make_unique<ops::MaxLayer>();
  
-  fn->configure(lhs_alloc, rhs_alloc, ofm_alloc);
+  fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -779,13 +824,13 @@ void KernelGenerator::visit(const ir::operation::Min &node)
    const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
    const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
  
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
  
    auto fn = std::make_unique<ops::MinLayer>();
  
-  fn->configure(lhs_alloc, rhs_alloc, ofm_alloc);
+  fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -795,12 +840,12 @@ void KernelGenerator::visit(const ir::operation::Cast &node)
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
  
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
  
    auto fn = std::make_unique<ops::CastLayer>();
  
-  fn->configure(ifm_alloc, ofm_alloc);
+  fn->configure(ifm_tensor, ofm_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -810,12 +855,12 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
  
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
  
    auto fn = std::make_unique<ops::TransposeLayer>();
  
-  fn->configure(input_alloc, output_alloc, node.param().perm);
+  fn->configure(input_tensor, output_tensor, node.param().perm);
  
    _return_fn = std::move(fn);
  }
@@ -827,15 +872,15 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
    const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
  
    const auto keep_dims = node.param().keep_dims;
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
-  auto axes_alloc = _tensor_builder->portableAt(axes_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto axes_tensor = _tensor_builder->portableAt(axes_index).get();
  
    if (node.param().reduce_type == ir::operation::Reduce::ReduceType::MEAN)
    {
      auto fn = std::make_unique<ops::MeanLayer>();
  
-    fn->configure(input_alloc, axes_alloc, output_alloc, keep_dims);
+    fn->configure(input_tensor, axes_tensor, output_tensor, keep_dims);
  
      _return_fn = std::move(fn);
    }
@@ -844,7 +889,7 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
      auto fn = std::make_unique<ops::ReduceLayer>();
  
      const auto reduce_type = convertReduceType(node.param().reduce_type);
-    fn->configure(input_alloc, axes_alloc, output_alloc, reduce_type, keep_dims);
+    fn->configure(input_tensor, axes_tensor, output_tensor, reduce_type, keep_dims);
  
      _return_fn = std::move(fn);
    }
@@ -855,12 +900,27 @@ void KernelGenerator::visit(const ir::operation::ReLU &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(0)};
  
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
  
    auto fn = std::make_unique<ops::ReLULayer>();
  
-  fn->configure(input_alloc, output_alloc);
+  fn->configure(input_tensor, output_tensor);
+
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ReLU6 &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(0)};
+
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+
+  auto fn = std::make_unique<ops::ReLU6Layer>();
+
+  fn->configure(input_tensor, output_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -872,14 +932,14 @@ void KernelGenerator::visit(const ir::operation::Select &node)
    const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
    const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
  
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto condition_alloc = _tensor_builder->portableAt(condition_index).get();
-  auto true_alloc = _tensor_builder->portableAt(true_index).get();
-  auto false_alloc = _tensor_builder->portableAt(false_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto condition_tensor = _tensor_builder->portableAt(condition_index).get();
+  auto true_tensor = _tensor_builder->portableAt(true_index).get();
+  auto false_tensor = _tensor_builder->portableAt(false_index).get();
  
    auto fn = std::make_unique<ops::SelectLayer>();
  
-  fn->configure(condition_alloc, true_alloc, false_alloc, output_alloc);
+  fn->configure(condition_tensor, true_tensor, false_tensor, output_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -891,14 +951,14 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
    const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
    const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
  
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
-  auto begins_alloc = _tensor_builder->portableAt(begins_index).get();
-  auto sizes_alloc = _tensor_builder->portableAt(sizes_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto begins_tensor = _tensor_builder->portableAt(begins_index).get();
+  auto sizes_tensor = _tensor_builder->portableAt(sizes_index).get();
  
    auto fn = std::make_unique<ops::SliceLayer>();
  
-  fn->configure(input_alloc, begins_alloc, sizes_alloc, output_alloc);
+  fn->configure(input_tensor, begins_tensor, sizes_tensor, output_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -911,11 +971,11 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
    const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
    const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
  
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
-  auto starts_alloc = _tensor_builder->portableAt(starts_index).get();
-  auto ends_alloc = _tensor_builder->portableAt(ends_index).get();
-  auto strides_alloc = _tensor_builder->portableAt(strides_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto starts_tensor = _tensor_builder->portableAt(starts_index).get();
+  auto ends_tensor = _tensor_builder->portableAt(ends_index).get();
+  auto strides_tensor = _tensor_builder->portableAt(strides_index).get();
  
    auto begin_mask = node.param().begin_mask;
    auto end_mask = node.param().end_mask;
@@ -923,7 +983,7 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
  
    auto fn = std::make_unique<ops::StridedSliceLayer>();
  
-  fn->configure(input_alloc, starts_alloc, ends_alloc, strides_alloc, output_alloc, begin_mask,
+  fn->configure(input_tensor, starts_tensor, ends_tensor, strides_tensor, output_tensor, begin_mask,
                  end_mask, shrink_axis_mask);
  
    _return_fn = std::move(fn);
@@ -957,12 +1017,12 @@ void KernelGenerator::visit(const ir::operation::Abs &node)
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
  
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
  
    auto fn = std::make_unique<ops::AbsLayer>();
  
-  fn->configure(ifm_alloc, ofm_alloc);
+  fn->configure(ifm_tensor, ofm_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -972,12 +1032,12 @@ void KernelGenerator::visit(const ir::operation::Sin &node)
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(ir::operation::Sin::Input::INPUT)};
  
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
  
    auto fn = std::make_unique<ops::SinLayer>();
  
-  fn->configure(ifm_alloc, ofm_alloc);
+  fn->configure(ifm_tensor, ofm_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -987,12 +1047,12 @@ void KernelGenerator::visit(const ir::operation::Cos &node)
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(ir::operation::Cos::Input::INPUT)};
  
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
  
    auto fn = std::make_unique<ops::CosLayer>();
  
-  fn->configure(ifm_alloc, ofm_alloc);
+  fn->configure(ifm_tensor, ofm_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -1002,12 +1062,12 @@ void KernelGenerator::visit(const ir::operation::RSQRT &node)
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
  
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
  
    auto fn = std::make_unique<ops::RsqrtLayer>();
  
-  fn->configure(ifm_alloc, ofm_alloc);
+  fn->configure(ifm_tensor, ofm_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -1017,12 +1077,33 @@ void KernelGenerator::visit(const ir::operation::Shape &node)
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
  
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
  
    auto fn = std::make_unique<ops::ShapeLayer>();
  
-  fn->configure(ifm_alloc, ofm_alloc);
+  fn->configure(ifm_tensor, ofm_tensor);
+
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::INPUT)};
+
+  auto output_height = node.param().height_out;
+  auto output_width = node.param().width_out;
+  auto align_corners = node.param().align_corners;
+  auto half_pixel_centers = node.param().half_pixel_centers;
+
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+
+  auto fn = std::make_unique<ops::ResizeBilinearLayer>();
+
+  fn->configure(input_tensor, output_tensor, output_height, output_width, align_corners,
+                half_pixel_centers);
  
    _return_fn = std::move(fn);
  }
@@ -1033,13 +1114,13 @@ void KernelGenerator::visit(const ir::operation::Reverse &node)
    const auto input_index{node.getInputs().at(ir::operation::Reverse::INPUT)};
    const auto axis_index{node.getInputs().at(ir::operation::Reverse::AXIS)};
  
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
-  auto axis_alloc = _tensor_builder->portableAt(axis_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto axis_tensor = _tensor_builder->portableAt(axis_index).get();
  
    auto fn = std::make_unique<ops::ReverseLayer>();
  
-  fn->configure(input_alloc, axis_alloc, output_alloc);
+  fn->configure(input_tensor, axis_tensor, output_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -1049,12 +1130,12 @@ void KernelGenerator::visit(const ir::operation::Neg &node)
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
  
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
  
    auto fn = std::make_unique<ops::NegLayer>();
  
-  fn->configure(ifm_alloc, ofm_alloc);
+  fn->configure(ifm_tensor, ofm_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -1066,12 +1147,12 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
  
    const auto axis = node.param().axis;
  
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
  
    auto fn = std::make_unique<ops::ArgMinMaxLayer>();
  
-  fn->configure(input_alloc, output_alloc, axis, /* is_arg_max */ true);
+  fn->configure(input_tensor, output_tensor, axis, /* is_arg_max */ true);
  
    _return_fn = std::move(fn);
  }
@@ -1082,13 +1163,13 @@ void KernelGenerator::visit(const ir::operation::Pow &node)
    const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
    const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
  
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
  
    auto fn = std::make_unique<ops::PowLayer>();
  
-  fn->configure(lhs_alloc, rhs_alloc, ir::Activation::NONE, output_alloc);
+  fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -1098,12 +1179,12 @@ void KernelGenerator::visit(const ir::operation::Log &node)
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(ir::operation::Log::Input::INPUT)};
  
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_alloc = _tensor_builder->portableAt(ifm_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
  
    auto fn = std::make_unique<ops::LogLayer>();
  
-  fn->configure(ifm_alloc, ofm_alloc);
+  fn->configure(ifm_tensor, ofm_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -1113,12 +1194,12 @@ void KernelGenerator::visit(const ir::operation::Round &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::Round::INPUT)};
  
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
  
    auto fn = std::make_unique<ops::RoundLayer>();
  
-  fn->configure(input_alloc, output_alloc);
+  fn->configure(input_tensor, output_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -1128,12 +1209,12 @@ void KernelGenerator::visit(const ir::operation::LogicalNot &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::LogicalNot::INPUT)};
  
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
  
    auto fn = std::make_unique<ops::LogicalNotLayer>();
  
-  fn->configure(input_alloc, output_alloc);
+  fn->configure(input_tensor, output_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -1144,28 +1225,43 @@ void KernelGenerator::visit(const ir::operation::LogicalOr &node)
    const auto lhs_index{node.getInputs().at(0)};
    const auto rhs_index{node.getInputs().at(1)};
  
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
  
    auto fn = std::make_unique<ops::LogicalOrLayer>();
  
-  fn->configure(lhs_alloc, rhs_alloc, ofm_alloc);
+  fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
  
    _return_fn = std::move(fn);
  }
  
-void KernelGenerator::visit(const ir::operation::ZerosLike &node)
+void KernelGenerator::visit(const ir::operation::L2Normalization &node)
  {
    const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::ZerosLike::INPUT)};
+  const auto input_index{node.getInputs().at(0)};
  
    auto output_alloc = _tensor_builder->portableAt(output_index).get();
    auto input_alloc = _tensor_builder->portableAt(input_index).get();
  
-  auto fn = std::make_unique<ops::ZerosLikeLayer>();
+  auto fn = std::make_unique<ops::L2NormLayer>();
  
    fn->configure(input_alloc, output_alloc);
+
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ZerosLike &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ir::operation::ZerosLike::INPUT)};
+
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+
+  auto fn = std::make_unique<ops::ZerosLikeLayer>();
+
+  fn->configure(input_tensor, output_tensor);
    _return_fn = std::move(fn);
  }
  
@@ -1176,14 +1272,14 @@ void KernelGenerator::visit(const ir::operation::Range &node)
    const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
    const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
  
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto start_alloc = _tensor_builder->portableAt(start_index).get();
-  auto limit_alloc = _tensor_builder->portableAt(limit_index).get();
-  auto delta_alloc = _tensor_builder->portableAt(delta_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto start_tensor = _tensor_builder->portableAt(start_index).get();
+  auto limit_tensor = _tensor_builder->portableAt(limit_index).get();
+  auto delta_tensor = _tensor_builder->portableAt(delta_index).get();
  
    auto fn = std::make_unique<ops::RangeLayer>();
  
-  fn->configure(start_alloc, limit_alloc, delta_alloc, output_alloc);
+  fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
    _return_fn = std::move(fn);
  }
  
@@ -1193,13 +1289,13 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
    const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
    const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
  
-  auto ofm_alloc = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
  
    auto fn = std::make_unique<ops::SqDiffLayer>();
  
-  fn->configure(lhs_alloc, rhs_alloc, ofm_alloc);
+  fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
    _return_fn = std::move(fn);
  }
  
@@ -1209,13 +1305,13 @@ void KernelGenerator::visit(const ir::operation::Tile &node)
    const auto input_index{node.getInputs().at(ir::operation::Tile::INPUT)};
    const auto multiples_index{node.getInputs().at(ir::operation::Tile::MULTIPLES)};
  
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
-  auto multiples_alloc = _tensor_builder->portableAt(multiples_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto multiples_tensor = _tensor_builder->portableAt(multiples_index).get();
  
    auto fn = std::make_unique<ops::TileLayer>();
  
-  fn->configure(input_alloc, multiples_alloc, output_alloc);
+  fn->configure(input_tensor, multiples_tensor, output_tensor);
    _return_fn = std::move(fn);
  }
  
@@ -1226,14 +1322,14 @@ void KernelGenerator::visit(const ir::operation::MatrixBandPart &node)
    const auto num_lower_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_LOWER_DIAG)};
    const auto num_upper_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_UPPER_DIAG)};
  
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
-  auto num_lower_alloc = _tensor_builder->portableAt(num_lower_index).get();
-  auto num_upper_alloc = _tensor_builder->portableAt(num_upper_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto num_lower_tensor = _tensor_builder->portableAt(num_lower_index).get();
+  auto num_upper_tensor = _tensor_builder->portableAt(num_upper_index).get();
  
    auto fn = std::make_unique<ops::MatrixBandPartLayer>();
  
-  fn->configure(input_alloc, num_lower_alloc, num_upper_alloc, output_alloc);
+  fn->configure(input_tensor, num_lower_tensor, num_upper_tensor, output_tensor);
    _return_fn = std::move(fn);
  }
  
@@ -1243,16 +1339,16 @@ void KernelGenerator::visit(const ir::operation::BatchMatMul &node)
    const auto lhs_index{node.getInputs().at(ir::operation::BatchMatMul::LHS)};
    const auto rhs_index{node.getInputs().at(ir::operation::BatchMatMul::RHS)};
  
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto lhs_alloc = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_alloc = _tensor_builder->portableAt(rhs_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
+  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
  
    const auto adj_x = node.param().adj_x;
    const auto adj_y = node.param().adj_y;
  
    auto fn = std::make_unique<ops::BatchMatMulLayer>();
  
-  fn->configure(lhs_alloc, rhs_alloc, adj_x, adj_y, output_alloc);
+  fn->configure(lhs_tensor, rhs_tensor, adj_x, adj_y, output_tensor);
    _return_fn = std::move(fn);
  }
  
@@ -1262,13 +1358,13 @@ void KernelGenerator::visit(const ir::operation::BroadcastTo &node)
    const auto input_index{node.getInputs().at(ir::operation::BroadcastTo::INPUT)};
    const auto shape_index{node.getInputs().at(ir::operation::BroadcastTo::SHAPE)};
  
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
-  auto shape_alloc = _tensor_builder->portableAt(shape_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto shape_tensor = _tensor_builder->portableAt(shape_index).get();
  
    auto fn = std::make_unique<ops::BroadcastToLayer>();
  
-  fn->configure(input_alloc, shape_alloc, output_alloc);
+  fn->configure(input_tensor, shape_tensor, output_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -1277,10 +1373,10 @@ void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node)
  {
    const auto ofm_index{node.getOutputs().at(0)};
  
-  auto output_alloc = _tensor_builder->portableAt(ofm_index).get();
-  std::vector<const IPortableTensor *> input_allocs;
+  auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+  std::vector<const IPortableTensor *> input_tensors;
    for (auto &ifm_idx : node.getInputs())
-    input_allocs.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+    input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
  
    const auto epsilon = node.param().epsilon;
    const auto is_training = node.param().is_training;
@@ -1288,7 +1384,7 @@ void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node)
  
    auto fn = std::make_unique<ops::FusedBatchNormLayer>();
  
-  fn->configure(input_allocs, epsilon, is_training, data_format, output_alloc);
+  fn->configure(input_tensors, epsilon, is_training, data_format, output_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -1301,12 +1397,12 @@ void KernelGenerator::visit(const ir::operation::LogSoftmax &node)
    const auto beta = node.param().beta;
    const auto axis = node.param().axis;
  
-  auto output_alloc = _tensor_builder->at(output_index).get();
-  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
  
    auto fn = std::make_unique<ops::LogSoftMaxLayer>();
  
-  fn->configure(input_alloc, beta, axis, output_alloc);
+  fn->configure(input_tensor, beta, axis, output_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -1318,14 +1414,84 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
    const auto block_shape_index{node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE)};
    const auto padding_index{node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS)};
  
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
-  auto block_shape_alloc = _tensor_builder->portableAt(block_shape_index).get();
-  auto padding_alloc = _tensor_builder->portableAt(padding_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto block_shape_tensor = _tensor_builder->portableAt(block_shape_index).get();
+  auto padding_tensor = _tensor_builder->portableAt(padding_index).get();
  
    auto fn = std::make_unique<ops::SpaceToBatchNDLayer>();
  
-  fn->configure(input_alloc, block_shape_alloc, padding_alloc, output_alloc);
+  fn->configure(input_tensor, block_shape_tensor, padding_tensor, output_tensor);
+
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Quantize &node)
+{
+  const auto input_index{node.getInputs().at(ir::operation::Quantize::Input::INPUT)};
+  const auto output_index{node.getOutputs().at(0)};
+
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+
+  auto fn = std::make_unique<ops::QuantizeLayer>();
+
+  fn->configure(input_tensor, output_tensor);
+
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
+{
+  const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
+  const auto output_index{node.getOutputs().at(0)};
+  auto block_size = node.param().block_size;
+
+  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+
+  auto fn = std::make_unique<ops::SpaceToDepthLayer>();
+
+  fn->configure(input_tensor, block_size, output_tensor);
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::StatelessRandomUniform &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto shape_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SHAPE)};
+  const auto seed_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SEED)};
+
+  auto output_alloc = _tensor_builder->portableAt(output_index).get();
+  auto shape_alloc = _tensor_builder->portableAt(shape_index).get();
+  auto seed_alloc = _tensor_builder->portableAt(seed_index).get();
+
+  auto fn = std::make_unique<ops::StatelessRandomUniformLayer>();
+
+  fn->configure(shape_alloc, seed_alloc, output_alloc);
+  _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::SplitV &node)
+{
+  const auto num_splits = node.param().num_splits;
+  assert(num_splits == static_cast<int>(node.getOutputs().size()));
+
+  const auto input_idx{node.getInputs().at(ir::operation::SplitV::Input::INPUT)};
+  const auto size_splits{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
+  const auto split_dim{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
+
+  auto in_tensor = _tensor_builder->portableAt(input_idx).get();
+  auto in_size_splits = _tensor_builder->portableAt(size_splits).get();
+  auto in_split_dim = _tensor_builder->portableAt(split_dim).get();
+
+  std::vector<IPortableTensor *> out_tensors;
+  for (auto &output_idx : node.getOutputs())
+    out_tensors.emplace_back(_tensor_builder->portableAt(output_idx).get());
+
+  auto fn = std::make_unique<ops::SplitVLayer>();
+
+  fn->configure(in_tensor, in_size_splits, in_split_dim, num_splits, out_tensors);
  
    _return_fn = std::move(fn);
  }
diff --git a/runtime/onert/backend/cpu/KernelGenerator.h b/runtime/onert/backend/cpu/KernelGenerator.h

index d6f4c28..40c056a 100644 (file)
--- a/runtime/onert/backend/cpu/KernelGenerator.h
+++ b/runtime/onert/backend/cpu/KernelGenerator.h
@@ -17,6 +17,7 @@
  #ifndef __ONERT_BACKEND_CPU_KERNEL_GENERATOR_H__
  #define __ONERT_BACKEND_CPU_KERNEL_GENERATOR_H__
  
+#include "ExternalContext.h"
  #include "TensorBuilder.h"
  #include "Tensor.h"
  
@@ -37,7 +38,8 @@ class KernelGenerator : public IKernelGenerator
  public:
    KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
                    const std::shared_ptr<TensorBuilder> &tensor_builder,
-                  const std::shared_ptr<custom::IKernelBuilder> &kernel_builder);
+                  const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
+                  const std::shared_ptr<ExternalContext> &external_context);
  
    using IKernelGenerator::visit;
  
@@ -74,6 +76,7 @@ public:
    void visit(const ir::operation::Transpose &) override;
    void visit(const ir::operation::Reduce &) override;
    void visit(const ir::operation::ReLU &) override;
+  void visit(const ir::operation::ReLU6 &) override;
    void visit(const ir::operation::Select &) override;
    void visit(const ir::operation::Slice &) override;
    void visit(const ir::operation::StridedSlice &) override;
@@ -83,6 +86,7 @@ public:
    void visit(const ir::operation::Sin &) override;
    void visit(const ir::operation::RSQRT &) override;
    void visit(const ir::operation::Shape &) override;
+  void visit(const ir::operation::ResizeBilinear &node) override;
    void visit(const ir::operation::Reverse &) override;
    void visit(const ir::operation::Neg &) override;
    void visit(const ir::operation::ArgMax &) override;
@@ -94,13 +98,19 @@ public:
    void visit(const ir::operation::SquaredDifference &) override;
    void visit(const ir::operation::Tile &) override;
    void visit(const ir::operation::LogicalOr &) override;
+  void visit(const ir::operation::L2Normalization &) override;
    void visit(const ir::operation::Range &) override;
    void visit(const ir::operation::MatrixBandPart &) override;
    void visit(const ir::operation::BatchMatMul &) override;
+  void visit(const ir::operation::BatchToSpaceND &) override;
    void visit(const ir::operation::BroadcastTo &) override;
    void visit(const ir::operation::FusedBatchNorm &) override;
    void visit(const ir::operation::LogSoftmax &) override;
    void visit(const ir::operation::SpaceToBatchND &) override;
+  void visit(const ir::operation::Quantize &) override;
+  void visit(const ir::operation::SpaceToDepth &) override;
+  void visit(const ir::operation::StatelessRandomUniform &) override;
+  void visit(const ir::operation::SplitV &) override;
  
  private:
    const ir::Operands &_ctx;
@@ -108,6 +118,7 @@ private:
    std::shared_ptr<TensorBuilder> _tensor_builder;
    std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
    ir::Layout _current_op_seq_layout;
+  const std::shared_ptr<ExternalContext> _external_context;
  };
  
  } // namespace cpu
diff --git a/runtime/onert/backend/cpu/StaticTensorManager.cc b/runtime/onert/backend/cpu/StaticTensorManager.cc

new file mode 100644 (file)

index 0000000..78c98da
--- /dev/null
+++ b/runtime/onert/backend/cpu/StaticTensorManager.cc
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "StaticTensorManager.h"
+#include "Tensor.h"
+
+#include <util/logging.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+StaticTensorManager::StaticTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> &reg,
+                                         cpu_common::DynamicTensorManager *dynamic_tensor_manager)
+    : _nonconst_mgr{new cpu_common::MemoryManager()}, _tensors{reg},
+      _dynamic_tensor_manager{dynamic_tensor_manager}
+{
+  // DO NOTHING
+}
+
+void StaticTensorManager::allocateNonconsts(void)
+{
+  _nonconst_mgr->allocate();
+
+  for (auto &pair : _tensors->native_tensors())
+  {
+    const auto &ind = pair.first;
+    auto tensor = pair.second;
+    if (!_as_constants[ind] && !tensor->is_dynamic())
+    {
+      auto *buffer = _nonconst_mgr->getBuffer(ind);
+      tensor->setBuffer(buffer);
+
+      VERBOSE(CPU_StaticTensorManager) << "TENSOR(#" << ind.value()
+                                       << "): " << static_cast<void *>(buffer) << std::endl;
+    }
+  }
+}
+
+void StaticTensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); }
+
+void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
+                                      const ir::OperandInfo &tensor_info, ir::Layout backend_layout,
+                                      bool as_const)
+{
+  assert(!_tensors->getITensor(ind));
+  if (as_const)
+  {
+    auto tensor = std::make_shared<ExternalTensor>(tensor_info, backend_layout);
+    _tensors->setNativeTensor(ind, tensor);
+  }
+  else
+  {
+    auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout, _dynamic_tensor_manager);
+    _tensors->setNativeTensor(ind, tensor);
+  }
+  _as_constants[ind] = as_const;
+}
+
+void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
+{
+  assert(_tensors->getITensor(ind));
+
+  // This method is called only when a tensor has proper shape
+  assert(!_tensors->getITensor(ind)->is_dynamic());
+
+  if (!_as_constants[ind])
+    _nonconst_mgr->claimPlan(ind, size);
+}
+
+void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
+{
+  assert(_tensors->getITensor(ind));
+
+  // This method is called only when a tensor has proper shape
+  assert(!_tensors->getITensor(ind)->is_dynamic());
+
+  if (!_as_constants[ind])
+    _nonconst_mgr->releasePlan(ind);
+}
+
+void StaticTensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
+{
+  for (const auto &it : _tensors->native_tensors())
+    fn(it.first);
+}
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/StaticTensorManager.h b/runtime/onert/backend/cpu/StaticTensorManager.h

new file mode 100644 (file)

index 0000000..2af61e4
--- /dev/null
+++ b/runtime/onert/backend/cpu/StaticTensorManager.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__
+#define __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__
+
+#include "backend/IStaticTensorManager.h"
+#include "backend/cpu_common/DynamicTensorManager.h"
+#include "backend/cpu_common/MemoryManager.h"
+#include "backend/cpu_common/TensorRegistry.h"
+#include "backend/ITensorManager.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandInfo.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+class StaticTensorManager : public backend::IStaticTensorManager
+{
+public:
+  StaticTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> &reg,
+                      cpu_common::DynamicTensorManager *dynamic_tensor_manager);
+  virtual ~StaticTensorManager() = default;
+
+  void allocateNonconsts(void);
+  void deallocateNonconsts(void);
+
+  void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info,
+                   ir::Layout backend_layout, bool as_const);
+
+  void claimPlan(const ir::OperandIndex &ind, uint32_t size);
+  void releasePlan(const ir::OperandIndex &ind);
+
+  void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
+
+private:
+  std::unique_ptr<cpu_common::MemoryManager> _nonconst_mgr;
+  const std::shared_ptr<cpu_common::TensorRegistry> _tensors;
+  ir::OperandIndexMap<bool> _as_constants;
+  cpu_common::DynamicTensorManager *_dynamic_tensor_manager;
+};
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_STATICTENSOR_MANAGER_H__
diff --git a/runtime/onert/backend/cpu/Tensor.h b/runtime/onert/backend/cpu/Tensor.h

index 4dd251b..20e6026 100644 (file)
--- a/runtime/onert/backend/cpu/Tensor.h
+++ b/runtime/onert/backend/cpu/Tensor.h
@@ -29,15 +29,22 @@ namespace cpu
  
  using Tensor = cpu_common::Tensor;
  
-// Tensor which has data from external. To support this, assume below things
-// no padding, always NHWC layout, constant tensor and not dynamic
+/**
+ * @brief Class that uses data from external memory that is not managed by a backend
+ *        instead of allocating and copying the data. ExternalTensor's data pointer points to
+ *        an address of memory such as where memory is already allocated, or mmapped area.
+ *        This is meaning that ExternalTensor can take all of types' ir::Data.
+ *        To support this, assume below things no padding, always NHWC layout,
+ *        constant tensor and not dynamic.
+ */
  class ExternalTensor : public Tensor
  {
  public:
    ExternalTensor() = delete;
  
  public:
-  ExternalTensor(const ir::OperandInfo &info, const ir::Layout layout) : Tensor(info, layout)
+  ExternalTensor(const ir::OperandInfo &info, const ir::Layout layout)
+      : Tensor(info, layout, nullptr)
    {
      assert(_layout == ir::Layout::NHWC);
      assert(_info.isConstant());
@@ -45,6 +52,11 @@ public:
    }
  
  public:
+  /**
+   * @brief     set Data to be shared from external so that this ExternalTensor will not be
+   *            allocated on CPU backend
+   * @param[in] data    data of Operand to be set
+   */
    void setData(const std::shared_ptr<ir::Data> data)
    {
      assert(data != nullptr);
diff --git a/runtime/onert/backend/cpu/TensorBuilder.cc b/runtime/onert/backend/cpu/TensorBuilder.cc

index 886e8d8..ab8ba57 100644 (file)
--- a/runtime/onert/backend/cpu/TensorBuilder.cc
+++ b/runtime/onert/backend/cpu/TensorBuilder.cc
@@ -29,8 +29,8 @@ namespace cpu
  
  TensorBuilder::TensorBuilder()
      : _tensor_reg{new cpu_common::TensorRegistry()},
-      _static_tensor_mgr{new cpu_common::StaticTensorManager(_tensor_reg)},
-      _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)}
+      _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)},
+      _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
  {
    /* empty */
  }
@@ -77,11 +77,7 @@ bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
    return _tensor_info_map.find(ind) != _tensor_info_map.end();
  }
  
-void TensorBuilder::prepare(void)
-{
-  _static_tensor_mgr->allocateConsts();
-  _static_tensor_mgr->allocateNonconsts();
-}
+void TensorBuilder::prepare(void) { _static_tensor_mgr->allocateNonconsts(); }
  
  void TensorBuilder::allocate()
  {
@@ -99,17 +95,17 @@ std::shared_ptr<IPortableTensor> TensorBuilder::portableAt(const ir::OperandInde
    return _tensor_reg->getPortableTensor(ind);
  }
  
-bool TensorBuilder::setExternalTensor(const ir::OperandIndex &ind,
-                                      const std::shared_ptr<IPortableTensor> &tensor)
+bool TensorBuilder::setMigrantTensor(const ir::OperandIndex &ind,
+                                     const std::shared_ptr<IPortableTensor> &tensor)
  {
-  return _tensor_reg->setExternalTensor(ind, tensor);
+  return _tensor_reg->setMigrantTensor(ind, tensor);
  }
  
  void TensorBuilder::iterate(const IterateFunction &fn) { _static_tensor_mgr->iterate(fn); }
  
-std::shared_ptr<cpu_common::Tensor> TensorBuilder::at(const ir::OperandIndex &ind)
+std::shared_ptr<Tensor> TensorBuilder::at(const ir::OperandIndex &ind)
  {
-  return _tensor_reg->getManagedTensor(ind);
+  return _tensor_reg->getNativeTensor(ind);
  }
  
  std::unique_ptr<ITensorManager> TensorBuilder::releaseStaticTensorManager(void)
diff --git a/runtime/onert/backend/cpu/TensorBuilder.h b/runtime/onert/backend/cpu/TensorBuilder.h

index ba25451..6171365 100644 (file)
--- a/runtime/onert/backend/cpu/TensorBuilder.h
+++ b/runtime/onert/backend/cpu/TensorBuilder.h
@@ -18,13 +18,14 @@
  #define __ONERT_BACKEND_CPU_TENSOR_BUILDER_H__
  
  #include <backend/cpu_common/DynamicTensorManager.h>
-#include <backend/cpu_common/StaticTensorManager.h>
  #include <backend/cpu_common/TensorRegistry.h>
-#include <backend/cpu_common/Tensor.h>
  
  #include <backend/ITensorBuilder.h>
  #include <ir/OperandIndexMap.h>
  
+#include "StaticTensorManager.h"
+#include "Tensor.h"
+
  #include <unordered_map>
  
  namespace onert
@@ -80,17 +81,17 @@ public:
     *        If not, program will crash with assert or exception.
     * @return shared_ptr<Tensor>
     */
-  std::shared_ptr<cpu_common::Tensor> at(const ir::OperandIndex &ind);
+  std::shared_ptr<Tensor> at(const ir::OperandIndex &ind);
    std::shared_ptr<IPortableTensor> portableAt(const ir::OperandIndex &ind);
-  bool setExternalTensor(const ir::OperandIndex &ind,
-                         const std::shared_ptr<IPortableTensor> &tensor) override;
+  bool setMigrantTensor(const ir::OperandIndex &ind,
+                        const std::shared_ptr<IPortableTensor> &tensor) override;
  
    std::shared_ptr<ITensorRegistry> tensorRegistry() override { return _tensor_reg; }
  
  private:
    const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
-  std::unique_ptr<cpu_common::StaticTensorManager> _static_tensor_mgr;
    std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr;
+  std::unique_ptr<StaticTensorManager> _static_tensor_mgr;
    ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
  };
  
diff --git a/runtime/onert/backend/cpu/ops/BatchToSpaceNDLayer.cc b/runtime/onert/backend/cpu/ops/BatchToSpaceNDLayer.cc

new file mode 100644 (file)

index 0000000..f2f10eb
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/BatchToSpaceNDLayer.cc
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BatchToSpaceNDLayer.h"
+
+#include <cker/operation/BatchToSpaceND.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+BatchToSpaceNDLayer::BatchToSpaceNDLayer()
+    : _input(nullptr), _output(nullptr), _block_shape(nullptr), _crops(nullptr)
+{
+  // DO NOTHING
+}
+
+template <typename T> void BatchToSpaceNDLayer::batchToSpaceNDGeneric()
+{
+  const int32_t NNapiCrops[]{0, 0, 0, 0};
+  const int32_t *_crops_buffer;
+
+  if (_crops == nullptr)
+  {
+    _crops_buffer = NNapiCrops;
+  }
+  else
+  {
+    _crops_buffer = reinterpret_cast<const int32_t *>(_crops->buffer());
+  }
+  nnfw::cker::BatchToSpaceND<T>(
+      getTensorShape(_input), reinterpret_cast<const T *>(_input->buffer()),
+      reinterpret_cast<const int32_t *>(_block_shape->buffer()), _crops_buffer,
+      getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()));
+}
+
+void BatchToSpaceNDLayer::configure(const IPortableTensor *input, IPortableTensor *output,
+                                    IPortableTensor *block_shape, IPortableTensor *crops)
+{
+  _output = output;
+  _input = input;
+  _block_shape = block_shape;
+  _crops = crops;
+}
+
+void BatchToSpaceNDLayer::run()
+{
+  if (_output->data_type() == OperandType::FLOAT32)
+  {
+    batchToSpaceNDGeneric<float>();
+  }
+  else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM)
+  {
+    batchToSpaceNDGeneric<uint8_t>();
+  }
+  else
+  {
+    throw std::runtime_error{"NYI"};
+  }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/BatchToSpaceNDLayer.h b/runtime/onert/backend/cpu/ops/BatchToSpaceNDLayer.h

new file mode 100644 (file)

index 0000000..6e25b24
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/BatchToSpaceNDLayer.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_BATCHTOSPACEND_LAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_BATCHTOSPACEND_LAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include "OperationUtils.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+class BatchToSpaceNDLayer : public ::onert::exec::IFunction
+{
+public:
+  BatchToSpaceNDLayer();
+
+public:
+  template <typename T> void batchToSpaceNDGeneric();
+
+  void configure(const IPortableTensor *input, IPortableTensor *output,
+                 IPortableTensor *block_shape, IPortableTensor *crops);
+
+  void run() override;
+
+private:
+  const IPortableTensor *_input;
+  IPortableTensor *_output;
+  IPortableTensor *_block_shape;
+  IPortableTensor *_crops;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_BATCHTOSPACEND_LAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/CompareLayer.cc b/runtime/onert/backend/cpu/ops/CompareLayer.cc

index f557f3a..adf902a 100644 (file)
--- a/runtime/onert/backend/cpu/ops/CompareLayer.cc
+++ b/runtime/onert/backend/cpu/ops/CompareLayer.cc
@@ -17,6 +17,7 @@
  
  #include "OperationUtils.h"
  
+#include <assert.h>
  #include <cker/operation/Comparison.h>
  using namespace nnfw::cker;
  namespace onert
@@ -34,6 +35,14 @@ namespace
  using OpType = onert::ir::operation::Comparison::ComparisonType;
  using namespace onert::backend::cpu;
  
+// Assumes these enum values to be in the order like this
+static_assert(static_cast<int>(OpType::Equal) == 0, "An OpType value has changed!");
+static_assert(static_cast<int>(OpType::NotEqual) == 1, "An OpType value has changed!");
+static_assert(static_cast<int>(OpType::Greater) == 2, "An OpType value has changed!");
+static_assert(static_cast<int>(OpType::GreaterEqual) == 3, "An OpType value has changed!");
+static_assert(static_cast<int>(OpType::Less) == 4, "An OpType value has changed!");
+static_assert(static_cast<int>(OpType::LessEqual) == 5, "An OpType value has changed!");
+
  template <typename T>
  void compareQuant8(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output,
                     OpType op_type)
@@ -52,95 +61,33 @@ void compareQuant8(const IPortableTensor *lhs, const IPortableTensor *rhs, IPort
                                        &params.input2_shift);
    params.is_broadcast = !HaveSameShapes(lhs, rhs);
  
-  if (params.is_broadcast)
-  {
-    switch (op_type)
-    {
-      case OpType::Equal:
-        Broadcast4DSlowEqualWithScaling(
-            params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-            getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::NotEqual:
-        Broadcast4DSlowNotEqualWithScaling(
-            params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-            getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::Greater:
-        Broadcast4DSlowGreaterWithScaling(
-            params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-            getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::GreaterEqual:
-        Broadcast4DSlowGreaterEqualWithScaling(
-            params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-            getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::Less:
-        Broadcast4DSlowLessWithScaling(
-            params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-            getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::LessEqual:
-        Broadcast4DSlowLessEqualWithScaling(
-            params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-            getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      default:
-        throw std::runtime_error{"Invalid OpType for CompareLayer"};
-    }
-  }
-  else // if (requires_broadcast == false)
-  {
-    switch (op_type)
-    {
-      case OpType::Equal:
-        EqualWithScaling(params, getExtendedTensorShape(lhs),
-                         reinterpret_cast<const T *>(lhs->buffer()), getExtendedTensorShape(rhs),
-                         reinterpret_cast<const T *>(rhs->buffer()), getExtendedTensorShape(output),
-                         reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::NotEqual:
-        NotEqualWithScaling(
-            params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-            getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::Greater:
-        GreaterWithScaling(
-            params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-            getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::GreaterEqual:
-        GreaterEqualWithScaling(
-            params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-            getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::Less:
-        LessWithScaling(params, getExtendedTensorShape(lhs),
-                        reinterpret_cast<const T *>(lhs->buffer()), getExtendedTensorShape(rhs),
-                        reinterpret_cast<const T *>(rhs->buffer()), getExtendedTensorShape(output),
-                        reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::LessEqual:
-        LessEqualWithScaling(
-            params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-            getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      default:
-        throw std::runtime_error{"Invalid OpType for CompareLayer"};
-    }
-  }
-  return;
+  using CompareFunction =
+      void (*)(ComparisonParams & params, const Shape &input1_shape, const T *input1_data,
+               const Shape &input2_shape, const T *input2_data, const Shape &output_shape,
+               bool *output_data);
+
+  static const CompareFunction broadcast_fns[] = {
+      Broadcast4DSlowEqualWithScaling,   Broadcast4DSlowNotEqualWithScaling,
+      Broadcast4DSlowGreaterWithScaling, Broadcast4DSlowGreaterEqualWithScaling,
+      Broadcast4DSlowLessWithScaling,    Broadcast4DSlowLessEqualWithScaling,
+  };
+  static const CompareFunction non_broadcast_fns[] = {
+      EqualWithScaling,        NotEqualWithScaling, GreaterWithScaling,
+      GreaterEqualWithScaling, LessWithScaling,     LessEqualWithScaling,
+  };
+
+  static_assert(sizeof(broadcast_fns) == sizeof(non_broadcast_fns),
+                "Sizes of broadcast_fns and non_broadcast_fns must match!");
+
+  auto index = static_cast<int>(op_type);
+  if (index < 0 || index >= static_cast<int>(sizeof(broadcast_fns) / sizeof(broadcast_fns[0])))
+    throw std::runtime_error{"Invalid OpType for CompareLayer"};
+
+  CompareFunction fn = (params.is_broadcast ? broadcast_fns[index] : non_broadcast_fns[index]);
+
+  fn(params, getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+     getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+     getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
  }
  
  template <typename T>
@@ -149,94 +96,33 @@ void compareScalar(const IPortableTensor *lhs, const IPortableTensor *rhs, IPort
  {
    bool requires_broadcast = !HaveSameShapes(lhs, rhs);
  
-  if (requires_broadcast)
-  {
-    switch (op_type)
-    {
-      case OpType::Equal:
-        Broadcast4DSlowEqual(
-            getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-            getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::NotEqual:
-        Broadcast4DSlowNotEqual(
-            getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-            getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::Greater:
-        Broadcast4DSlowGreater(
-            getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-            getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::GreaterEqual:
-        Broadcast4DSlowGreaterEqual(
-            getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-            getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::Less:
-        Broadcast4DSlowLess(getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-                            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-                            getExtendedTensorShape(output),
-                            reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::LessEqual:
-        Broadcast4DSlowLessEqual(
-            getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-            getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      default:
-        throw std::runtime_error{"Invalid OpType for CompareLayer"};
-    }
-  }
-  else // if (requires_broadcast == false)
-  {
-    switch (op_type)
-    {
-      case OpType::Equal:
-        EqualNoScaling(getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-                       getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-                       getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::NotEqual:
-        NotEqualNoScaling(getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-                          getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-                          getExtendedTensorShape(output),
-                          reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::Greater:
-        GreaterNoScaling(getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-                         getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-                         getExtendedTensorShape(output),
-                         reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::GreaterEqual:
-        GreaterEqualNoScaling(
-            getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-            getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-            getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::Less:
-        LessNoScaling(getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-                      getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-                      getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
-        break;
-      case OpType::LessEqual:
-        LessEqualNoScaling(getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
-                           getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
-                           getExtendedTensorShape(output),
-                           reinterpret_cast<bool *>(output->buffer()));
-        break;
-      default:
-        throw std::runtime_error{"Invalid OpType for CompareLayer"};
-    }
-  }
-  return;
+  using CompareFunction =
+      void (*)(const Shape &input1_shape, const T *input1_data, const Shape &input2_shape,
+               const T *input2_data, const Shape &output_shape, bool *output_data);
+
+  static const CompareFunction broadcast_fns[] = {
+      Broadcast4DSlowEqual,        Broadcast4DSlowNotEqual, Broadcast4DSlowGreater,
+      Broadcast4DSlowGreaterEqual, Broadcast4DSlowLess,     Broadcast4DSlowLessEqual,
+  };
+  static const CompareFunction non_broadcast_fns[] = {
+      EqualNoScaling,        NotEqualNoScaling, GreaterNoScaling,
+      GreaterEqualNoScaling, LessNoScaling,     LessEqualNoScaling,
+  };
+
+  static_assert(sizeof(broadcast_fns) == sizeof(non_broadcast_fns),
+                "Sizes of broadcast_fns and non_broadcast_fns must match!");
+
+  auto index = static_cast<int>(op_type);
+  if (index < 0 || index >= static_cast<int>(sizeof(broadcast_fns) / sizeof(broadcast_fns[0])))
+    throw std::runtime_error{"Invalid OpType for CompareLayer"};
+
+  CompareFunction fn = (requires_broadcast ? broadcast_fns[index] : non_broadcast_fns[index]);
+
+  fn(getExtendedTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+     getExtendedTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+     getExtendedTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
  }
+
  } // namespace
  
  CompareLayer::CompareLayer()
diff --git a/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc b/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc

index c00be64..05da33a 100644 (file)
--- a/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc
+++ b/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc
@@ -18,6 +18,8 @@
  
  #include "../Tensor.h"
  #include <cker/operation/FullyConnected.h>
+#include <cker/TensorUtils.h>
+#include <misc/polymorphic_downcast.h>
  
  namespace onert
  {
@@ -31,7 +33,7 @@ namespace ops
  FullyConnectedLayer::FullyConnectedLayer()
      : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),
        _activation(ir::Activation::NONE), _temp_arena(new nnfw::cker::FCTempArena()),
-      _is_hybrid(false)
+      _external_context(nullptr), _is_hybrid(false)
  {
    // DO NOTHING
  }
@@ -102,7 +104,8 @@ void FullyConnectedLayer::fullyConnectedHybrid()
        op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
        getTensorShape(_weights), reinterpret_cast<const int8_t *>(_weights->buffer()),
        getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
-      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena);
+      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena,
+      _external_context->ruy_context());
  #else
    nnfw::cker::FullyConnectedHybrid(
        op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
@@ -110,31 +113,67 @@ void FullyConnectedLayer::fullyConnectedHybrid()
        (_cached_weights) ? reinterpret_cast<const int8_t *>(_cached_weights)
                          : reinterpret_cast<const int8_t *>(_weights->buffer()),
        getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
-      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena);
+      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena,
+      _external_context->ruy_context());
  
-// TODO Enable calling decrease_ref
-#if 0
    if (_cached_weights == nullptr || _is_weights_freed)
      return;
  
-  auto weight_tensor = dynamic_cast<const Tensor *>(_weights);
-  if (weight_tensor)
+  // '_cached_weights is not nullptr and _is_weights_freed is false' means
+  // this weight shape is satisfied with the ruy kernel's prepack cache's condition.
+  // After entering here, it will not enter again except below the case - input is zero-vector
+
+  // if input's elements are filled with zero, it by-passes(does not enter ruy-kernel path)
+  // so that handle this case
+  const int input_size = getTensorShape(_input).FlatSize();
+  if (nnfw::cker::IsZeroVector(reinterpret_cast<float *>(_input->buffer()), input_size))
+    return;
+
+  auto weight_tensor = nnfw::misc::polymorphic_downcast<const Tensor *>(_weights);
+
+  // This weight tensor could be other ops' const tensor.
+  // Therefore, below reference should be checked like following
+  auto tensor = const_cast<Tensor *>(weight_tensor);
+  if (tensor->buffer() == nullptr) // ref is already 0?
    {
-    auto tensor = const_cast<Tensor *>(weight_tensor);
+    _is_weights_freed = true;
+    return;
+  }
  
-    tensor->decrease_ref();
-    if (tensor->buffer() == nullptr) // ref == 0?
-    {
-      _is_weights_freed = true;
-    }
+  tensor->decrease_ref();
+  if (tensor->buffer() == nullptr) // ref == 0?
+  {
+    _is_weights_freed = true;
    }
-#endif // if 0
  #endif
  }
  
+void FullyConnectedLayer::fullyConnectedSparseWeight()
+{
+  float output_activation_min = 0, output_activation_max = 0;
+  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
+
+  nnfw::cker::FullyConnectedParams op_params;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+  op_params.activation = convertActivationType(_activation);
+
+  int w0_size = getTensorShape(_weights).Dims(0);
+  const uint16_t *w1_segments = _weights->w1_segments();
+  const uint16_t *w1_indices = _weights->w1_indices();
+
+  nnfw::cker::FullyConnectedSparseWeight(
+      op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+      getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
+      getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
+      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), w0_size, w1_segments,
+      w1_indices);
+}
+
  void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,
                                      const IPortableTensor *bias, ir::Activation activation,
-                                    IPortableTensor *output)
+                                    IPortableTensor *output,
+                                    const std::shared_ptr<ExternalContext> &external_context)
  {
    _input = input;
    _weights = weights;
@@ -143,6 +182,7 @@ void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortabl
    _output = output;
    _is_hybrid = input->data_type() == OperandType::FLOAT32 &&
                 weights->data_type() == OperandType::QUANT_INT8_SYMM;
+  _external_context = external_context;
  }
  
  void FullyConnectedLayer::run()
@@ -151,6 +191,10 @@ void FullyConnectedLayer::run()
    {
      fullyConnectedHybrid();
    }
+  else if (_weights->is_sparse())
+  {
+    fullyConnectedSparseWeight();
+  }
    else if (_input->data_type() == OperandType::FLOAT32)
    {
      fullyConnectedFloat32();
@@ -167,7 +211,16 @@ void FullyConnectedLayer::run()
  
  void FullyConnectedLayer::prepare()
  {
-#ifdef USE_RUY_GEMV
+  if (_bias && _bias->is_constant())
+  {
+    const int bias_size = getTensorShape(_bias).FlatSize();
+    if (nnfw::cker::IsZeroVector(reinterpret_cast<float *>(_bias->buffer()), bias_size))
+    {
+      _bias = nullptr;
+    }
+  }
+
+#if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && defined(USE_RUY_GEMV)
    // TODO This is workaround
    // The only fc hybrid will use ruy kernel
    if (_input->data_type() != OperandType::FLOAT32 ||
diff --git a/runtime/onert/backend/cpu/ops/FullyConnectedLayer.h b/runtime/onert/backend/cpu/ops/FullyConnectedLayer.h

index dd5ef24..f124267 100644 (file)
--- a/runtime/onert/backend/cpu/ops/FullyConnectedLayer.h
+++ b/runtime/onert/backend/cpu/ops/FullyConnectedLayer.h
@@ -18,6 +18,7 @@
  #define __ONERT_BACKEND_CPU_OPS_FULLYCONNECTEDLAYER_H__
  
  #include <backend/IPortableTensor.h>
+#include "../ExternalContext.h"
  #include "OperationUtils.h"
  
  #include <exec/IFunction.h>
@@ -52,8 +53,11 @@ public:
  
    void fullyConnectedHybrid();
  
+  void fullyConnectedSparseWeight();
+
    void configure(const IPortableTensor *input, const IPortableTensor *weights,
-                 const IPortableTensor *bias, ir::Activation activation, IPortableTensor *output);
+                 const IPortableTensor *bias, ir::Activation activation, IPortableTensor *output,
+                 const std::shared_ptr<ExternalContext> &external_context);
  
    void run() override;
  
@@ -68,10 +72,13 @@ private:
    ir::Activation _activation;
    std::unique_ptr<nnfw::cker::FCTempArena> _temp_arena;
  
+  std::shared_ptr<ExternalContext> _external_context;
+
    bool _is_hybrid;
  
  #ifdef USE_RUY_GEMV
    uint8_t *_cached_weights = nullptr; // weights to be cached and a key
+  bool _is_weights_freed = false;     // is weights freed?
  #endif
  };
  
diff --git a/runtime/onert/backend/cpu/ops/L2NormLayer.cc b/runtime/onert/backend/cpu/ops/L2NormLayer.cc

new file mode 100644 (file)

index 0000000..0d99b05
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/L2NormLayer.cc
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "L2NormLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/L2Normalize.h>
+#include <cker/Types.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+void L2NormLayer::configure(const IPortableTensor *input, IPortableTensor *output)
+{
+  assert(input != nullptr);
+  assert(output != nullptr);
+
+  _input = input;
+  _output = output;
+}
+
+void L2NormLayer::run()
+{
+  switch (_input->data_type())
+  {
+    case OperandType::FLOAT32:
+      nnfw::cker::L2NormalizeFloat32(
+          getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+          getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+      break;
+
+    case OperandType::QUANT_UINT8_ASYMM:
+    {
+      nnfw::cker::L2NormParams params;
+      assert(_input->data_offset() == 128);
+      params.input_zero_point = _input->data_offset();
+      nnfw::cker::L2NormalizeQuant8(
+          params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
+          getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
+    }
+    break;
+
+    default:
+      throw std::runtime_error{"L2Norm: Unsupported data type"};
+  }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/L2NormLayer.h b/runtime/onert/backend/cpu/ops/L2NormLayer.h

new file mode 100644 (file)

index 0000000..63f2d11
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/L2NormLayer.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in riting, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_L2NORM_LAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_L2NORM_LAYER_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+class L2NormLayer : public ::onert::exec::IFunction
+{
+public:
+  L2NormLayer() : _input(nullptr), _output(nullptr)
+  {
+    // Nothing
+  }
+
+public:
+  void configure(const IPortableTensor *_input, IPortableTensor *output);
+
+  void run() override;
+
+private:
+  const IPortableTensor *_input;
+  IPortableTensor *_output;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_L2NORM_LAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc

index d71e325..06dde4f 100644 (file)
--- a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc
+++ b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc
@@ -49,8 +49,8 @@ void LogSoftMaxLayer::logsoftmaxQuant8()
    // NYI
  }
  
-void LogSoftMaxLayer::configure(const Tensor *input, const float beta, const int axis,
-                                Tensor *output)
+void LogSoftMaxLayer::configure(const IPortableTensor *input, const float beta, const int axis,
+                                IPortableTensor *output)
  {
    _input = input;
    _output = output;
diff --git a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h

index bc145ce..ba9deca 100644 (file)
--- a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h
+++ b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h
@@ -40,13 +40,14 @@ public:
  
    void logsoftmaxQuant8();
  
-  void configure(const Tensor *input, const float beta, const int axis, Tensor *output);
+  void configure(const IPortableTensor *input, const float beta, const int axis,
+                 IPortableTensor *output);
  
    void run();
  
  private:
-  const Tensor *_input;
-  Tensor *_output;
+  const IPortableTensor *_input;
+  IPortableTensor *_output;
  
    float _beta;
    int _axis;
diff --git a/runtime/onert/backend/cpu/ops/OperationUtils.h b/runtime/onert/backend/cpu/ops/OperationUtils.h

index 8d29374..9838552 100644 (file)
--- a/runtime/onert/backend/cpu/ops/OperationUtils.h
+++ b/runtime/onert/backend/cpu/ops/OperationUtils.h
@@ -52,6 +52,17 @@ union DataPtr {
    void *v;
  };
  
+union ConstDataPtr {
+  const uint8_t *u8;
+  const int8_t *i8;
+  const uint32_t *u32;
+  const int32_t *i32;
+  const bool *b;
+  const float *f;
+  const int64_t *i64;
+  const void *v;
+};
+
  uint32_t getNumberOfDimensions(const IPortableTensor *tensor);
  
  uint32_t getNumberOfElements(const IPortableTensor *tensor);
diff --git a/runtime/onert/backend/cpu/ops/PadLayer.cc b/runtime/onert/backend/cpu/ops/PadLayer.cc

index fcfcf7b..6a2bf9d 100644 (file)
--- a/runtime/onert/backend/cpu/ops/PadLayer.cc
+++ b/runtime/onert/backend/cpu/ops/PadLayer.cc
@@ -33,33 +33,40 @@ PadLayer::PadLayer()
    // DO NOTHING
  }
  
-void PadLayer::padFloat32()
+template <typename T> void PadLayer::padImpl(const T *constant_value_data)
  {
-  nnfw::cker::Pad(_padData, _padRank, getTensorShape(_input),
-                  reinterpret_cast<const float *>(_input->buffer()), getTensorShape(_output),
-                  reinterpret_cast<float *>(_output->buffer()), _constantValueData.f);
+  nnfw::cker::Pad<T>(_padData, _padRank, getTensorShape(_input),
+                     reinterpret_cast<const T *>(_input->buffer()), getTensorShape(_output),
+                     reinterpret_cast<T *>(_output->buffer()), constant_value_data);
  }
-void PadLayer::padQuant8() { throw std::runtime_error("Quantized Pad isn't supported NYI"); }
  
  void PadLayer::configure(const IPortableTensor *input, IPortableTensor *output,
-                         const int32_t *padData, int32_t padRank, uint8_t *constantValueData)
+                         const int32_t *padData, int32_t padRank, const void *constantValueData)
  {
    _input = input;
    _output = output;
    memcpy(_padData, padData, sizeof(_padData));
    _padRank = padRank;
-  _constantValueData.u8 = constantValueData;
+  _constantValueData.v = constantValueData;
  }
  
  void PadLayer::run()
  {
    if (_input->data_type() == OperandType::FLOAT32)
    {
-    padFloat32();
+    padImpl<float>(_constantValueData.f);
    }
    else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
    {
-    padQuant8();
+    if (_constantValueData.u8 == nullptr)
+    {
+      uint8_t pad_value = static_cast<uint8_t>(_output->data_offset());
+      padImpl<uint8_t>(&pad_value);
+    }
+    else
+    {
+      padImpl<uint8_t>(_constantValueData.u8);
+    }
    }
    else
    {
diff --git a/runtime/onert/backend/cpu/ops/PadLayer.h b/runtime/onert/backend/cpu/ops/PadLayer.h

index 85bd2e6..efd73d5 100644 (file)
--- a/runtime/onert/backend/cpu/ops/PadLayer.h
+++ b/runtime/onert/backend/cpu/ops/PadLayer.h
@@ -39,12 +39,10 @@ public:
    PadLayer();
  
  public:
-  void padFloat32();
-
-  void padQuant8();
+  template <typename T> void padImpl(const T *constant_value_data);
  
    void configure(const IPortableTensor *input, IPortableTensor *output, const int32_t *padData,
-                 int32_t padRank, uint8_t *constantValueData = nullptr);
+                 int32_t padRank, const void *constantValueData = nullptr);
  
    void run() override;
  
@@ -54,7 +52,7 @@ private:
  
    int32_t _padData[8];
    int32_t _padRank;
-  DataPtr _constantValueData;
+  ConstDataPtr _constantValueData;
  };
  
  } // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/QuantizeLayer.cc b/runtime/onert/backend/cpu/ops/QuantizeLayer.cc

new file mode 100644 (file)

index 0000000..45fc148
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/QuantizeLayer.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "QuantizeLayer.h"
+
+#include <cker/operation/Quantize.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+QuantizeLayer::QuantizeLayer() : _input(nullptr), _output(nullptr)
+{
+  // DO NOTHING
+}
+
+template <typename InputT, typename OutputT> void QuantizeLayer::affineQuantize()
+{
+  nnfw::cker::Quantize(getTensorShape(_input), reinterpret_cast<const InputT *>(_input->buffer()),
+                       getTensorShape(_output), reinterpret_cast<OutputT *>(_output->buffer()),
+                       _output->data_scale(), _output->data_offset());
+}
+
+void QuantizeLayer::configure(const IPortableTensor *input, IPortableTensor *output)
+{
+  _input = input;
+  _output = output;
+}
+
+void QuantizeLayer::run()
+{
+  if (_input->data_type() == OperandType::FLOAT32)
+  {
+    affineQuantize<float, uint8_t>();
+  }
+  else
+  {
+    throw std::runtime_error{"Quantize: unsupported data type"};
+  }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/QuantizeLayer.h b/runtime/onert/backend/cpu/ops/QuantizeLayer.h

new file mode 100644 (file)

index 0000000..b4e7aca
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/QuantizeLayer.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include "OperationUtils.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+class QuantizeLayer : public ::onert::exec::IFunction
+{
+public:
+  QuantizeLayer();
+
+public:
+  template <typename InputT, typename OutputT> void affineQuantize();
+
+  void configure(const IPortableTensor *input, IPortableTensor *output);
+
+  void run() override;
+
+private:
+  const IPortableTensor *_input;
+  IPortableTensor *_output;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ReLU6Layer.cc b/runtime/onert/backend/cpu/ops/ReLU6Layer.cc

new file mode 100644 (file)

index 0000000..26eb35e
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/ReLU6Layer.cc
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReLU6Layer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/ReLU6.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+ReLU6Layer::ReLU6Layer() : _input(nullptr), _output(nullptr)
+{
+  // DO NOTHING
+}
+
+void ReLU6Layer::relu6Float32()
+{
+  nnfw::cker::ReLU6(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+                    reinterpret_cast<float *>(_output->buffer()));
+}
+
+void ReLU6Layer::relu6Quant8()
+{
+  // cker quant8 relu is not implemented yet
+  throw std::runtime_error{"NYI"};
+}
+
+void ReLU6Layer::configure(const IPortableTensor *input, IPortableTensor *output)
+{
+  _input = input;
+  _output = output;
+}
+
+void ReLU6Layer::run()
+{
+  if (_input->data_type() == OperandType::FLOAT32)
+  {
+    relu6Float32();
+  }
+  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+  {
+    relu6Quant8();
+  }
+  else
+  {
+    throw std::runtime_error{"ReLU6: unsupported data type"};
+  }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ReLU6Layer.h b/runtime/onert/backend/cpu/ops/ReLU6Layer.h

new file mode 100644 (file)

index 0000000..994d17a
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/ReLU6Layer.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+class ReLU6Layer : public ::onert::exec::IFunction
+{
+public:
+  ReLU6Layer();
+
+public:
+  void relu6Float32();
+
+  void relu6Quant8();
+
+  void configure(const IPortableTensor *input, IPortableTensor *output);
+
+  void run() override;
+
+private:
+  const IPortableTensor *_input;
+  IPortableTensor *_output;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ReduceLayer.cc b/runtime/onert/backend/cpu/ops/ReduceLayer.cc

index 1dad031..fe22dbe 100644 (file)
--- a/runtime/onert/backend/cpu/ops/ReduceLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ReduceLayer.cc
@@ -116,6 +116,39 @@ void evalGeneric(const IPortableTensor *input, IPortableTensor *output,
        throw std::runtime_error{"Reduce(generic): unsupported data type"};
    }
  }
+
+void evalSumQuantized(const IPortableTensor *input, IPortableTensor *output,
+                      const std::vector<int> &axes, bool keep_dims,
+                      nnfw::cker::Reduce &reduce_kernel)
+{
+  const bool same_scale = (input->data_scale() == output->data_scale() &&
+                           input->data_offset() == output->data_offset());
+
+  reduce_kernel.prepare(input->num_dimensions(), axes.size());
+
+  if (!same_scale)
+  {
+    std::vector<int32_t> temp_sum(output->getShape().num_elements());
+    bool result = reduce_kernel.QuantizedMeanOrSum<uint8_t, int32_t>(
+        reinterpret_cast<const uint8_t *>(input->buffer()), input->data_offset(),
+        input->data_scale(), getTensorShape(input), reinterpret_cast<uint8_t *>(output->buffer()),
+        output->data_offset(), output->data_scale(), getTensorShape(output), axes, keep_dims,
+        temp_sum.data(), true, [](const int32_t current, const uint8_t in) -> int32_t {
+          const int32_t actual_in = static_cast<int32_t>(in);
+          return current + actual_in;
+        });
+
+    if (!result)
+    {
+      throw std::runtime_error{"Reduce: Fail to run"};
+    }
+
+    return;
+  }
+
+  evalGeneric<ReduceType::kSum>(input, output, axes, keep_dims, reduce_kernel);
+}
+
  } // namespace
  
  ReduceLayer::ReduceLayer()
@@ -143,6 +176,11 @@ void ReduceLayer::run()
    switch (_reduceType)
    {
      case ReduceType::kSum:
+      if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+      {
+        evalSumQuantized(_input, _output, axes, _keep_dims, *_reduce_kernel);
+        return;
+      }
        evalGeneric<ReduceType::kSum>(_input, _output, axes, _keep_dims, *_reduce_kernel);
        break;
      case ReduceType::kProd:
diff --git a/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.cc b/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.cc

new file mode 100644 (file)

index 0000000..180094b
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.cc
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "OperationUtils.h"
+#include "ResizeBilinearLayer.h"
+#include "cker/operation/ResizeBilinear.h"
+#include <cker/Types.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+ResizeBilinearLayer::ResizeBilinearLayer()
+    : _input(nullptr), _output(nullptr), _output_height(0), _output_width(0), _align_corners(false),
+      _half_pixel_centers(false)
+{
+  // DO NOTHING
+}
+
+void ResizeBilinearLayer::configure(const IPortableTensor *input, IPortableTensor *output,
+                                    int32_t output_height, int32_t output_width, bool align_corners,
+                                    bool half_pixel_centers)
+{
+  _input = input;
+  _output = output;
+  _output_height = output_height;
+  _output_width = output_width;
+  _align_corners = align_corners;
+  _half_pixel_centers = half_pixel_centers;
+}
+
+void ResizeBilinearLayer::run()
+{
+  nnfw::cker::ResizeBilinearParams params;
+  params.align_corners = _align_corners;
+  params.half_pixel_centers = _half_pixel_centers;
+  params.output_height = _output_height;
+  params.output_width = _output_width;
+
+  switch (_input->data_type())
+  {
+    case OperandType::FLOAT32:
+      nnfw::cker::ResizeBilinear(
+          params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+          getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+      break;
+
+    case OperandType::QUANT_UINT8_ASYMM:
+      nnfw::cker::ResizeBilinear(
+          params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
+          getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
+      break;
+
+    case OperandType::UINT8:
+    case OperandType::BOOL8:
+    case OperandType::FLOAT16:
+    case OperandType::INT32:
+    case OperandType::INT64:
+    case OperandType::QUANT_INT8_SYMM:
+      std::runtime_error("ResizeBilinear NYI");
+      break;
+    default:
+      std::runtime_error("ResizeBilinear unsupported data type");
+  }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.h b/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.h

new file mode 100644 (file)

index 0000000..fc49b34
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/ResizeBilinearLayer.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in riting, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_RESIZEBILINEAR_H__
+#define __ONERT_BACKEND_CPU_OPS_RESIZEBILINEAR_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+class ResizeBilinearLayer : public ::onert::exec::IFunction
+{
+public:
+  ResizeBilinearLayer();
+
+public:
+  void configure(const IPortableTensor *input1, IPortableTensor *output, int32_t output_height,
+                 int32_t output_width, bool align_corners, bool half_pixel_centers);
+
+  void run() override;
+
+private:
+  const IPortableTensor *_input;
+  IPortableTensor *_output;
+  int32_t _output_height;
+  int32_t _output_width;
+  bool _align_corners;
+  bool _half_pixel_centers;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_RESIZEBILINEAR_H__
diff --git a/runtime/onert/backend/cpu/ops/SliceLayer.cc b/runtime/onert/backend/cpu/ops/SliceLayer.cc

index a9106c1..449c073 100644 (file)
--- a/runtime/onert/backend/cpu/ops/SliceLayer.cc
+++ b/runtime/onert/backend/cpu/ops/SliceLayer.cc
@@ -46,7 +46,7 @@ void SliceLayer::GetBeginAndSizeVectors(int dimensions, const IPortableTensor *b
    }
  }
  
-void SliceLayer::sliceFloat32()
+template <typename T> void SliceLayer::sliceImpl()
  {
    const int kMaxDim = nnfw::cker::Shape::kMaxSmallSize;
  
@@ -74,14 +74,8 @@ void SliceLayer::sliceFloat32()
    }
  
    nnfw::cker::Slice(op_params, getExtendedTensorShape(_input),
-                    reinterpret_cast<const float *>(_input->buffer()),
-                    reinterpret_cast<float *>(_output->buffer()));
-}
-
-void SliceLayer::sliceQuant8()
-{
-  // cker quant8 slice is not implemented yet
-  throw std::runtime_error{"NYI"};
+                    reinterpret_cast<const T *>(_input->buffer()),
+                    reinterpret_cast<T *>(_output->buffer()));
  }
  
  void SliceLayer::configure(const IPortableTensor *input, const IPortableTensor *begin,
@@ -97,11 +91,11 @@ void SliceLayer::run()
  {
    if (_input->data_type() == OperandType::FLOAT32)
    {
-    sliceFloat32();
+    sliceImpl<float>();
    }
    else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
    {
-    sliceQuant8();
+    sliceImpl<uint8_t>();
    }
    else
    {
diff --git a/runtime/onert/backend/cpu/ops/SliceLayer.h b/runtime/onert/backend/cpu/ops/SliceLayer.h

index 9945d7e..650e2c9 100644 (file)
--- a/runtime/onert/backend/cpu/ops/SliceLayer.h
+++ b/runtime/onert/backend/cpu/ops/SliceLayer.h
@@ -42,8 +42,7 @@ public:
    void run() override;
  
  private:
-  void sliceFloat32();
-  void sliceQuant8();
+  template <typename T> void sliceImpl();
  
    template <typename T>
    void GetBeginAndSizeVectors(int dimensions, const IPortableTensor *begin,
diff --git a/runtime/onert/backend/cpu/ops/SpaceToDepthLayer.cc b/runtime/onert/backend/cpu/ops/SpaceToDepthLayer.cc

new file mode 100644 (file)

index 0000000..a0869ae
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/SpaceToDepthLayer.cc
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SpaceToDepthLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/SpaceToDepth.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+SpaceToDepthLayer::SpaceToDepthLayer() : _input(nullptr), _block_size(0), _output(nullptr)
+{
+  // DO NOTHING
+}
+
+template <typename T> void SpaceToDepthLayer::spaceToDepth()
+{
+
+  nnfw::cker::SpaceToDepthParams params;
+  params.block_size = _block_size;
+
+  nnfw::cker::SpaceToDepth(params, getTensorShape(_input),
+                           reinterpret_cast<const T *>(_input->buffer()), getTensorShape(_output),
+                           reinterpret_cast<T *>(_output->buffer()));
+}
+
+void SpaceToDepthLayer::configure(const IPortableTensor *input, const int32_t block_size,
+                                  IPortableTensor *output)
+{
+  _input = input;
+  _block_size = block_size;
+  _output = output;
+}
+
+void SpaceToDepthLayer::run()
+{
+  if (_input->data_type() == OperandType::FLOAT32)
+  {
+    spaceToDepth<float>();
+  }
+  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+  {
+    spaceToDepth<uint8_t>();
+  }
+  else
+  {
+    throw std::runtime_error{"SpaceToDepth: unsupported data type"};
+  }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/SpaceToDepthLayer.h b/runtime/onert/backend/cpu/ops/SpaceToDepthLayer.h

new file mode 100644 (file)

index 0000000..c11ef2b
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/SpaceToDepthLayer.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in riting, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_SPACE_TO_DEPTH_LAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_SPACE_TO_DEPTH_LAYER_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+class SpaceToDepthLayer : public ::onert::exec::IFunction
+{
+public:
+  SpaceToDepthLayer();
+
+  void configure(const IPortableTensor *input, const int32_t block_size, IPortableTensor *output);
+
+  void run() override;
+
+private:
+  template <typename T> void spaceToDepth();
+
+  const IPortableTensor *_input;
+  int32_t _block_size;
+  IPortableTensor *_output;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_SPACE_TO_BATCH_ND_LAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/SplitVLayer.cc b/runtime/onert/backend/cpu/ops/SplitVLayer.cc

new file mode 100644 (file)

index 0000000..d6ca124
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/SplitVLayer.cc
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SplitVLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/SplitV.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+SplitVLayer::SplitVLayer()
+    : _input(nullptr), _size_splits(nullptr), _split_dim(nullptr), _num_splits(0), _outputs()
+{
+  // DO NOTHING
+}
+
+template <typename T> void SplitVLayer::splitV(void)
+{
+  nnfw::cker::SplitVParams op_params;
+  op_params.axis = *(reinterpret_cast<const int32_t *>(_split_dim->buffer()));
+  op_params.num_split = _num_splits;
+
+  std::vector<T *> outputPtrs;
+  std::vector<nnfw::cker::Shape> outshape;
+
+  for (const auto output : _outputs)
+  {
+    assert(output->total_size() == sizeOfData(output->data_type(), output->getShape().dims()));
+    outputPtrs.emplace_back(reinterpret_cast<T *>(output->buffer()));
+    outshape.emplace_back(getTensorShape(output));
+  }
+
+  assert(_input->total_size() == sizeOfData(_input->data_type(), _input->getShape().dims()));
+  nnfw::cker::SplitV<T>(op_params, getTensorShape(_input), reinterpret_cast<T *>(_input->buffer()),
+                        outshape, outputPtrs.data());
+}
+
+void SplitVLayer::configure(const IPortableTensor *input, const IPortableTensor *size_splits,
+                            const IPortableTensor *split_dim, uint16_t num_splits,
+                            std::vector<IPortableTensor *> &outputs)
+{
+  assert(input != nullptr);
+
+  _num_splits = num_splits;
+  _size_splits = size_splits;
+  _input = input;
+  _split_dim = split_dim;
+  _outputs = outputs;
+}
+
+void SplitVLayer::run()
+{
+  if (_input->data_type() == OperandType::FLOAT32)
+  {
+    splitV<float>();
+  }
+  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+  {
+    splitV<uint8_t>();
+  }
+  else if (_input->data_type() == OperandType::INT32)
+  {
+    splitV<int32_t>();
+  }
+  else if (_input->data_type() == OperandType::INT64)
+  {
+    splitV<int64_t>();
+  }
+  else
+  {
+    throw std::runtime_error{"SplitV: unsupported input type"};
+  }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/SplitVLayer.h b/runtime/onert/backend/cpu/ops/SplitVLayer.h

new file mode 100644 (file)

index 0000000..98f2f44
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/SplitVLayer.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_SPLIT_V_LAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_SPLIT_V_LAYER_H__
+
+#include <backend/IPortableTensor.h>
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+class SplitVLayer : public ::onert::exec::IFunction
+{
+public:
+  SplitVLayer();
+
+public:
+  template <typename T> void splitV(void);
+
+  void configure(const IPortableTensor *input, const IPortableTensor *size_splits,
+                 const IPortableTensor *size_dim, uint16_t num_splits,
+                 std::vector<IPortableTensor *> &outputs);
+
+  void run() override;
+
+private:
+  const IPortableTensor *_input;
+  const IPortableTensor *_size_splits;
+  const IPortableTensor *_split_dim;
+  uint16_t _num_splits;
+  std::vector<IPortableTensor *> _outputs;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_SPLIT_V_LAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/StatelessRandomUniformLayer.cc b/runtime/onert/backend/cpu/ops/StatelessRandomUniformLayer.cc

new file mode 100644 (file)

index 0000000..b8dfcb4
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/StatelessRandomUniformLayer.cc
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "StatelessRandomUniformLayer.h"
+
+#include <cker/operation/StatelessRandomUniform.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+StatelessRandomUniformLayer::StatelessRandomUniformLayer()
+    : _shape(nullptr), _seed(nullptr), _output(nullptr)
+{
+  // DO NOTHING
+}
+
+void StatelessRandomUniformLayer::configure(const IPortableTensor *shape,
+                                            const IPortableTensor *seed, IPortableTensor *output)
+{
+  _shape = shape;
+  _seed = seed;
+  _output = output;
+}
+
+void StatelessRandomUniformLayer::StatelessRandomUniformFloat32()
+{
+  nnfw::cker::StatelessRandomUniform(
+      getTensorShape(_shape), reinterpret_cast<const int *>(_shape->buffer()),
+      getTensorShape(_seed), reinterpret_cast<const int *>(_seed->buffer()),
+      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+}
+
+void StatelessRandomUniformLayer::run()
+{
+  switch (_output->data_type())
+  {
+    // ToDo : It need to support INT8 and UINT8 also when will be applied quantization.
+    case OperandType::FLOAT32:
+      StatelessRandomUniformFloat32();
+      break;
+    default:
+      throw std::runtime_error{"StatelessRandomUniformLayer: unsupported data type"};
+  }
+}
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/StatelessRandomUniformLayer.h b/runtime/onert/backend/cpu/ops/StatelessRandomUniformLayer.h

new file mode 100644 (file)

index 0000000..ef11d62
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/StatelessRandomUniformLayer.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_STATELESS_RANDOM_UNIFORM_H__
+#define __ONERT_BACKEND_CPU_OPS_STATELESS_RANDOM_UNIFORM_H__
+
+#include <backend/IPortableTensor.h>
+#include "OperationUtils.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+class StatelessRandomUniformLayer : public ::onert::exec::IFunction
+{
+public:
+  StatelessRandomUniformLayer();
+
+public:
+  void configure(const IPortableTensor *shape, const IPortableTensor *seed,
+                 IPortableTensor *output);
+
+  void StatelessRandomUniformFloat32();
+
+  void run() override;
+
+private:
+  const IPortableTensor *_shape;
+  const IPortableTensor *_seed;
+
+  IPortableTensor *_output;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_STATELESS_RANDOM_UNIFORM_H__
diff --git a/runtime/onert/core/include/backend/BackendContext.h b/runtime/onert/core/include/backend/BackendContext.h

index c82e5b7..c263aef 100644 (file)
--- a/runtime/onert/core/include/backend/BackendContext.h
+++ b/runtime/onert/core/include/backend/BackendContext.h
@@ -56,6 +56,8 @@ public:
    {
    }
  
+  virtual ~BackendContext() = default;
+
    void initialize(const std::vector<OperationInfo> &operation_list,
                    const std::vector<ir::OperandIndex> &operand_list);
    void initConsts();
diff --git a/runtime/onert/core/include/backend/IExternalContext.h b/runtime/onert/core/include/backend/IExternalContext.h

new file mode 100644 (file)

index 0000000..88ffb50
--- /dev/null
+++ b/runtime/onert/core/include/backend/IExternalContext.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_IEXTERNAL_CONTEXT_H__
+#define __ONERT_BACKEND_IEXTERNAL_CONTEXT_H__
+
+namespace onert
+{
+namespace backend
+{
+
+struct IExternalContext
+{
+  virtual ~IExternalContext() = default;
+  virtual void setMaxNumThreads(int) = 0;
+};
+
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_IEXTERNAL_CONTEXT__
diff --git a/runtime/onert/core/include/backend/IPortableTensor.h b/runtime/onert/core/include/backend/IPortableTensor.h

index 2b2d008..a05b39a 100644 (file)
--- a/runtime/onert/core/include/backend/IPortableTensor.h
+++ b/runtime/onert/core/include/backend/IPortableTensor.h
@@ -37,6 +37,9 @@ class IPortableTensor : public ITensor
  {
  public:
    virtual ~IPortableTensor() = default;
+  virtual bool is_sparse() const { return false; }
+  virtual const uint16_t *w1_segments() const { return nullptr; }
+  virtual const uint16_t *w1_indices() const { return nullptr; }
  
  public:
    bool has_padding() const final { return false; }
diff --git a/runtime/onert/core/include/backend/ITensor.h b/runtime/onert/core/include/backend/ITensor.h

index 217d9de..12b1c54 100644 (file)
--- a/runtime/onert/core/include/backend/ITensor.h
+++ b/runtime/onert/core/include/backend/ITensor.h
@@ -32,6 +32,8 @@ namespace onert
  namespace backend
  {
  
+struct IDynamicTensorManager;
+
  class ITensor
  {
  public:
@@ -51,6 +53,15 @@ public:
    virtual void access(const std::function<void(ITensor &tensor)> &fn) = 0;
  
    /**
+   * @brief Return the dynamic tensor manager
+   *
+   * If dynamic tensors are not supported, it returns @c nullptr .
+   *
+   * @return IDynamicTensorManager* DynamicTensorManager
+   */
+  virtual IDynamicTensorManager *dynamic_tensor_manager() { return nullptr; }
+
+  /**
     * @brief Return true if the tensor is constant
     */
    virtual bool is_constant() const
diff --git a/runtime/onert/core/include/backend/ITensorBuilder.h b/runtime/onert/core/include/backend/ITensorBuilder.h

index a49525b..b760cda 100644 (file)
--- a/runtime/onert/core/include/backend/ITensorBuilder.h
+++ b/runtime/onert/core/include/backend/ITensorBuilder.h
@@ -112,12 +112,12 @@ public: // methods for static tensor allocation
    virtual std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) = 0;
  
    /**
-   * @brief Set the External Tensor object
+   * @brief Set the migrant tensor object
     *
     * @return true if succeeded
     * @return false if failed or unsupported
     */
-  virtual bool setExternalTensor(const ir::OperandIndex &, const std::shared_ptr<IPortableTensor> &)
+  virtual bool setMigrantTensor(const ir::OperandIndex &, const std::shared_ptr<IPortableTensor> &)
    {
      return false;
    }
diff --git a/runtime/onert/core/include/backend/ITensorRegistry.h b/runtime/onert/core/include/backend/ITensorRegistry.h

index f5a95f4..8555131 100644 (file)
--- a/runtime/onert/core/include/backend/ITensorRegistry.h
+++ b/runtime/onert/core/include/backend/ITensorRegistry.h
@@ -35,17 +35,22 @@ struct ITensorRegistry
    virtual ~ITensorRegistry() = default;
  
    /**
-   * @brief Returns pointer of ITensor among managed and external tensors
+   * @brief Returns pointer of ITensor among native and migrant tensors
+   *
+   * Native Tensor is a tensor that is managed by this backend
+   * Migrant Tensor is a tensor that is imported from another backend
+   *
     * @note  Return tensor cannot be used longer than dynamic tensor manager
     */
    virtual std::shared_ptr<ITensor> getITensor(const ir::OperandIndex &) = 0;
    /**
-   * @brief Returns pointer of ITensor among managed tensors
+   * @brief Returns pointer of ITensor among native tensors
     *
-   * Unlike @c getITensor , this function only searches from managed tensors
-   * @note  Return tensor cannot be used longer than dynamic tensor manager
+   * Unlike @c getITensor , this function only searches from native tensors
+   *
+   * @note  Returned tensor cannot be used longer than dynamic tensor manager
     */
-  virtual std::shared_ptr<ITensor> getManagedITensor(const ir::OperandIndex &) = 0;
+  virtual std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &) = 0;
  };
  
  } // namespace backend
@@ -73,68 +78,67 @@ public:
    std::shared_ptr<ITensor> getITensor(const ir::OperandIndex &ind) override
    {
      static_assert(std::is_base_of<ITensor, T_Tensor>::value, "T_Tensor must derive from ITensor.");
-    auto external_tensor = _external.find(ind);
-    if (external_tensor != _external.end())
+    auto external_tensor = _migrant.find(ind);
+    if (external_tensor != _migrant.end())
        return external_tensor->second;
-    return getManagedTensor(ind);
+    return getNativeTensor(ind);
    }
  
-  std::shared_ptr<ITensor> getManagedITensor(const ir::OperandIndex &ind) override
+  std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &ind) override
    {
-    return getManagedTensor(ind);
+    return getNativeTensor(ind);
    }
  
    std::shared_ptr<IPortableTensor> getPortableTensor(const ir::OperandIndex &ind)
    {
-    auto external_tensor = _external.find(ind);
-    if (external_tensor != _external.end())
+    auto external_tensor = _migrant.find(ind);
+    if (external_tensor != _migrant.end())
      {
        if (external_tensor->second)
          return external_tensor->second;
      }
-    return getManagedTensor(ind);
+    return getNativeTensor(ind);
    }
  
-  std::shared_ptr<T_Tensor> getManagedTensor(const ir::OperandIndex &ind)
+  std::shared_ptr<T_Tensor> getNativeTensor(const ir::OperandIndex &ind)
    {
-    auto tensor = _managed.find(ind);
-    if (tensor != _managed.end())
+    auto tensor = _native.find(ind);
+    if (tensor != _native.end())
        return tensor->second;
      return nullptr;
    }
  
-  bool setExternalTensor(const ir::OperandIndex &ind,
-                         const std::shared_ptr<IPortableTensor> &tensor)
+  bool setMigrantTensor(const ir::OperandIndex &ind, const std::shared_ptr<IPortableTensor> &tensor)
    {
      // TODO Uncomment this as two tensors for an index is not allowed.
      //      But now it is temporarily allowed as a workaround. External one hides Managed one.
-    // auto itr = _managed.find(ind);
-    // if (itr != _managed.end() && itr->second != nullptr && tensor != nullptr)
+    // auto itr = _native.find(ind);
+    // if (itr != _native.end() && itr->second != nullptr && tensor != nullptr)
      //  throw std::runtime_error{
-    //      "Tried to set an external tensor but an managed tensor already exists."};
-    _external[ind] = tensor;
+    //      "Tried to set an migrant tensor but an native tensor already exists."};
+    _migrant[ind] = tensor;
      return true;
    }
  
-  void setManagedTensor(const ir::OperandIndex &ind, const std::shared_ptr<T_Tensor> &tensor)
+  void setNativeTensor(const ir::OperandIndex &ind, const std::shared_ptr<T_Tensor> &tensor)
    {
-    auto itr = _external.find(ind);
-    if (itr != _external.end() && itr->second != nullptr && tensor != nullptr)
+    auto itr = _migrant.find(ind);
+    if (itr != _migrant.end() && itr->second != nullptr && tensor != nullptr)
        throw std::runtime_error{
-          "Tried to set a managed tensor but an external tensor already exists."};
-    _managed[ind] = tensor;
+          "Tried to set a native tensor but an migrant tensor already exists."};
+    _native[ind] = tensor;
    }
  
-  const ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &managed_tensors() { return _managed; }
+  const ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &native_tensors() { return _native; }
  
-  const ir::OperandIndexMap<std::shared_ptr<IPortableTensor>> &external_tensors()
+  const ir::OperandIndexMap<std::shared_ptr<IPortableTensor>> &migrant_tensors()
    {
-    return _external;
+    return _migrant;
    }
  
  private:
-  ir::OperandIndexMap<std::shared_ptr<IPortableTensor>> _external;
-  ir::OperandIndexMap<std::shared_ptr<T_Tensor>> _managed;
+  ir::OperandIndexMap<std::shared_ptr<IPortableTensor>> _migrant;
+  ir::OperandIndexMap<std::shared_ptr<T_Tensor>> _native;
  };
  
  } // namespace backend
diff --git a/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h b/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h

index 6ddacc7..a7e034a 100644 (file)
--- a/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h
+++ b/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h
@@ -19,7 +19,7 @@
  
  #include "MemoryManager.h"
  
-#include "backend/ITensorManager.h"
+#include "backend/IStaticTensorManager.h"
  #include "ir/OperandIndexMap.h"
  #include "ir/OperandInfo.h"
  #include "TensorRegistry.h"
@@ -31,7 +31,7 @@ namespace backend
  namespace cpu_common
  {
  
-class StaticTensorManager : public backend::ITensorManager
+class StaticTensorManager : public backend::IStaticTensorManager
  {
  public:
    StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg);
diff --git a/runtime/onert/core/include/backend/cpu_common/Tensor.h b/runtime/onert/core/include/backend/cpu_common/Tensor.h

index a0db96d..974501e 100644 (file)
--- a/runtime/onert/core/include/backend/cpu_common/Tensor.h
+++ b/runtime/onert/core/include/backend/cpu_common/Tensor.h
@@ -35,27 +35,42 @@ public:
    Tensor() = delete;
  
  public:
-  Tensor(const ir::OperandInfo &info, const ir::Layout layout)
-      : _info(info), _layout(layout), _buffer(nullptr), _num_references(0), _allocator(nullptr)
+  Tensor(const ir::OperandInfo &info, const ir::Layout layout,
+         IDynamicTensorManager *dynamic_tensor_manager)
+      : _info(info), _layout(layout), _buffer(nullptr), _num_references(0),
+        _dynamic_tensor_manager(dynamic_tensor_manager), _allocator(nullptr)
    {
      // DO NOTHING
    }
  
  public:
    // Only one of two method 'setBuffer' must be called once
+
+  /**
+   * @brief Set the Buffer object. This method is called for static and non-const tensor
+   */
    void setBuffer(uint8_t *buffer)
    {
-    assert(_buffer == nullptr && _allocator == nullptr);
+    assert(_buffer == nullptr);
      _buffer = buffer;
    }
+
+  /**
+   * @brief Set the Buffer object. This method is called for dynamic or const tensor
+   */
    void setBuffer(const std::shared_ptr<Allocator> &alloc)
    {
-    assert(_buffer == nullptr && _allocator == nullptr);
+    assert(_buffer == nullptr);
      _allocator = alloc;
+    _buffer = alloc->base();
    }
  
    // This works just as setBuffer but it simply overwrite existing Allocator without nullptr check
-  void overwriteBuffer(const std::shared_ptr<Allocator> &alloc) { _allocator = alloc; }
+  void overwriteBuffer(const std::shared_ptr<Allocator> &alloc)
+  {
+    _allocator = alloc;
+    _buffer = alloc->base();
+  }
  
    /**
     * @brief Mark this tensor does not have memory.
@@ -68,13 +83,7 @@ public:
    }
  
  public:
-  uint8_t *buffer() const override
-  {
-    if (_allocator != nullptr)
-      return _allocator->base();
-    else
-      return _buffer;
-  }
+  uint8_t *buffer() const override { return _buffer; }
    /**
     * @brief Get dimension by index
     *
@@ -96,12 +105,16 @@ public:
    bool is_constant() const override { return _info.isConstant(); }
    bool is_dynamic() const override { return _info.isDynamic(); }
    void set_dynamic() override { _info.setDynamic(); }
+  IDynamicTensorManager *dynamic_tensor_manager() override { return _dynamic_tensor_manager; }
+  bool is_sparse() const override { return _info.typeInfo().sparse(); }
+  virtual const uint16_t *w1_segments() const override { return _info.typeInfo().w1_segments(); }
+  virtual const uint16_t *w1_indices() const override { return _info.typeInfo().w1_indices(); }
  
    virtual void increase_ref()
    {
      assert(is_dynamic() ||
             // when not dynamic
-           (_buffer != nullptr || _allocator != nullptr));
+           (_buffer != nullptr));
  
      ++_num_references;
    }
@@ -110,12 +123,12 @@ public:
      assert(_buffer != nullptr || _allocator != nullptr);
      assert(_num_references > 0);
      --_num_references;
-    // Only constant tensor has allocator pointer
+    // constant tensor and dynamic tensor has _allocator
      if (_num_references == 0)
      {
        if (_buffer != nullptr)
          _buffer = nullptr;
-      else
+      if (_allocator != nullptr)
        {
          _allocator->release();
          _allocator = nullptr;
@@ -130,8 +143,15 @@ protected:
    ir::Layout _layout;
    uint8_t *_buffer;
    int32_t _num_references;
+  IDynamicTensorManager *_dynamic_tensor_manager;
  
  private:
+  /**
+   * @brief Memory allocator for dynamic tensor and const tensor
+   *        Since maintaing _allocator and also _buffer makes confusion,
+   *        we will mainly use _buffer (not _allocator.base()) for memory pointer in this code.
+   *        _allocator(shared_ptr) is used to guarantee that we have valid _buffer.
+   */
    std::shared_ptr<Allocator> _allocator;
  };
  
diff --git a/runtime/onert/core/include/compiler/StaticShapeInference.h b/runtime/onert/core/include/compiler/StaticShapeInference.h

index 379143b..bff68c9 100644 (file)
--- a/runtime/onert/core/include/compiler/StaticShapeInference.h
+++ b/runtime/onert/core/include/compiler/StaticShapeInference.h
@@ -99,6 +99,7 @@ private:
    void visit(const ir::operation::LogicalNot &op) override;
    void visit(const ir::operation::LogicalOr &op) override;
    void visit(const ir::operation::Logistic &op) override;
+  void visit(const ir::operation::L2Normalization &op) override;
    void visit(const ir::operation::MatrixBandPart &op) override;
    void visit(const ir::operation::Max &op) override;
    void visit(const ir::operation::Min &op) override;
@@ -114,6 +115,7 @@ private:
    void visit(const ir::operation::Reshape &op) override;
    void visit(const ir::operation::Round &op) override;
    void visit(const ir::operation::RSQRT &op) override;
+  void visit(const ir::operation::ResizeBilinear &op) override;
    void visit(const ir::operation::Reverse &op) override;
    void visit(const ir::operation::Select &op) override;
    void visit(const ir::operation::Shape &op) override;
diff --git a/runtime/onert/core/include/exec/DynamicShapeInference.h b/runtime/onert/core/include/exec/DynamicShapeInference.h

index 113c348..bca80db 100644 (file)
--- a/runtime/onert/core/include/exec/DynamicShapeInference.h
+++ b/runtime/onert/core/include/exec/DynamicShapeInference.h
@@ -72,6 +72,7 @@ public:
    void visit(const ir::operation::LogicalNot &op) override;
    void visit(const ir::operation::LogicalOr &op) override;
    void visit(const ir::operation::Logistic &op) override;
+  void visit(const ir::operation::L2Normalization &op) override;
    void visit(const ir::operation::MatrixBandPart &op) override;
    void visit(const ir::operation::Max &op) override;
    void visit(const ir::operation::Min &op) override;
@@ -88,6 +89,7 @@ public:
    void visit(const ir::operation::Reshape &op) override;
    void visit(const ir::operation::Round &op) override;
    void visit(const ir::operation::RSQRT &op) override;
+  void visit(const ir::operation::ResizeBilinear &op) override;
    void visit(const ir::operation::Reverse &op) override;
    void visit(const ir::operation::Select &op) override;
    void visit(const ir::operation::Shape &op) override;
@@ -127,6 +129,7 @@ private:
    /**
     * @brief To allocate memory for output tensor if needed
     */
+  // TODO Remove this, as it is no longer used
    backend::IDynamicTensorManager *_dynamic_tensor_manager;
    /**
     * @brief To get tensor object and access tensor-level info, e.g., ITensor::buffer()
diff --git a/runtime/onert/core/include/ir/Operand.h b/runtime/onert/core/include/ir/Operand.h

index 53371d6..1b3a43b 100644 (file)
--- a/runtime/onert/core/include/ir/Operand.h
+++ b/runtime/onert/core/include/ir/Operand.h
@@ -49,11 +49,11 @@ public:
    size_t operandSize(void) const;
  
    const OperationIndexSet &getUses() const { return _uses; }
-  const OperationIndexSet &getDef() const { return _def; }
+  OperationIndex getDef() const { return _def; }
    void insertUse(const OperationIndex &idx);
    void removeUse(const OperationIndex &idx);
-  void insertDef(const OperationIndex &idx);
-  void removeDef(const OperationIndex &idx);
+  void setDef(const OperationIndex &idx);
+  void unsetDef();
  
  public:
    void type(const DataType type) { _info.type(type); };
@@ -107,7 +107,7 @@ private:
    std::shared_ptr<Data> _data;
  
    OperationIndexSet _uses;
-  OperationIndexSet _def; // size is 0 (constant) or 1 (from def operation)
+  OperationIndex _def;
  };
  
  } // namespace ir
diff --git a/runtime/onert/core/include/ir/Operations.Include.h b/runtime/onert/core/include/ir/Operations.Include.h

index 5fac54e..30c4ff2 100644 (file)
--- a/runtime/onert/core/include/ir/Operations.Include.h
+++ b/runtime/onert/core/include/ir/Operations.Include.h
@@ -79,6 +79,7 @@
  #include "ir/operation/Pack.h"
  #include "ir/operation/Select.h"
  #include "ir/operation/Split.h"
+#include "ir/operation/SplitV.h"
  #include "ir/operation/Unpack.h"
  #include "ir/operation/Pad.h"
  #include "ir/operation/Min.h"
@@ -103,3 +104,5 @@
  #include "ir/operation/BatchMatMul.h"
  #include "ir/operation/FusedBatchNorm.h"
  #include "ir/operation/LogSoftmax.h"
+#include "ir/operation/Quantize.h"
+#include "ir/operation/StatelessRandomUniform.h"
diff --git a/runtime/onert/core/include/ir/Operations.lst b/runtime/onert/core/include/ir/Operations.lst

index 9d0642f..75c6d82 100644 (file)
--- a/runtime/onert/core/include/ir/Operations.lst
+++ b/runtime/onert/core/include/ir/Operations.lst
@@ -81,6 +81,7 @@ OP(DepthToSpace)
  OP(Pack)
  OP(Select)
  OP(Split)
+OP(SplitV)
  OP(Unpack)
  OP(Pad)
  OP(Custom)
@@ -106,3 +107,5 @@ OP(MatrixBandPart)
  OP(BatchMatMul)
  OP(FusedBatchNorm)
  OP(LogSoftmax)
+OP(Quantize)
+OP(StatelessRandomUniform)
diff --git a/runtime/onert/core/include/ir/TypeInfo.h b/runtime/onert/core/include/ir/TypeInfo.h

index 07d82b6..3f7eab4 100644 (file)
--- a/runtime/onert/core/include/ir/TypeInfo.h
+++ b/runtime/onert/core/include/ir/TypeInfo.h
@@ -18,6 +18,7 @@
  #define __ONERT_IR_TYPEINFO_H__
  
  #include <cstdint>
+#include <vector>
  
  #include "ir/DataType.h"
  
@@ -32,7 +33,7 @@ public:
    TypeInfo() = delete;
  
    explicit TypeInfo(DataType type, float scale = 0, int32_t offset = 0)
-      : _type(type), _scale(scale), _offset(offset)
+      : _type(type), _scale(scale), _offset(offset), _sparse(false)
    {
    }
  
@@ -40,14 +41,28 @@ public:
    DataType type() const { return _type; }
    float scale() const { return _scale; }
    int32_t offset() const { return _offset; }
+  bool sparse() const { return _sparse; }
+  const uint16_t *w1_segments() const { return _w1_segments.data(); }
+  const uint16_t *w1_indices() const { return _w1_indices.data(); }
  
  public:
    void type(const DataType type) { _type = type; }
+  void sparse2DMetadata(std::vector<uint16_t> &&w1_segments, std::vector<uint16_t> &&w1_indices)
+  {
+    _sparse = true;
+    _w1_segments = w1_segments;
+    _w1_indices = w1_indices;
+  }
  
  private:
    DataType _type;
+  // for quantization
    float _scale;
    int32_t _offset;
+  // for sparsity
+  bool _sparse;
+  std::vector<uint16_t> _w1_segments;
+  std::vector<uint16_t> _w1_indices;
  };
  
  bool operator==(const TypeInfo &lhs, const TypeInfo &rhs);
diff --git a/runtime/onert/core/include/ir/operation/BatchToSpaceND.h b/runtime/onert/core/include/ir/operation/BatchToSpaceND.h

index bb6be57..3e69b42 100644 (file)
--- a/runtime/onert/core/include/ir/operation/BatchToSpaceND.h
+++ b/runtime/onert/core/include/ir/operation/BatchToSpaceND.h
@@ -32,7 +32,8 @@ public:
    enum Input
    {
      INPUT = 0,
-    BLOCK_SIZE = 1
+    BLOCK_SIZE = 1,
+    CROPS_DATA = 2
    };
  
  public:
diff --git a/runtime/onert/core/include/ir/operation/LogSoftmax.h b/runtime/onert/core/include/ir/operation/LogSoftmax.h

index 26a92d7..391b4ba 100644 (file)
--- a/runtime/onert/core/include/ir/operation/LogSoftmax.h
+++ b/runtime/onert/core/include/ir/operation/LogSoftmax.h
@@ -48,7 +48,7 @@ public:
  
  public:
    void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Softmax; }
+  OpCode opcode() const final { return OpCode::LogSoftmax; }
  
  public:
    const Param &param() const { return _param; }
diff --git a/runtime/onert/core/include/ir/operation/Pad.h b/runtime/onert/core/include/ir/operation/Pad.h

index a486061..00481cd 100644 (file)
--- a/runtime/onert/core/include/ir/operation/Pad.h
+++ b/runtime/onert/core/include/ir/operation/Pad.h
@@ -33,7 +33,7 @@ public:
    {
      INPUT = 0,
      PAD = 1,
-    // VALUE = 2 Not allow padding value operand yet
+    VALUE = 2
    };
  
  public:
diff --git a/runtime/onert/core/include/ir/operation/Quantize.h b/runtime/onert/core/include/ir/operation/Quantize.h

new file mode 100644 (file)

index 0000000..2533ce4
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Quantize.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_QUANTIZE_H__
+#define __ONERT_IR_OPERATION_QUANTIZE_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Quantize : public Operation
+{
+public:
+  enum Input
+  {
+    INPUT = 0,
+  };
+
+public:
+  Quantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+  void accept(OperationVisitor &v) const override;
+  OpCode opcode() const final { return OpCode::Quantize; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_QUANTIZE_H__
diff --git a/runtime/onert/core/include/ir/operation/ResizeBilinear.h b/runtime/onert/core/include/ir/operation/ResizeBilinear.h

index 2887ed8..29aa496 100644 (file)
--- a/runtime/onert/core/include/ir/operation/ResizeBilinear.h
+++ b/runtime/onert/core/include/ir/operation/ResizeBilinear.h
@@ -33,13 +33,15 @@ class ResizeBilinear : public Operation
  public:
    enum Input
    {
-    INPUT = 0
+    INPUT = 0,
    };
  
    struct Param
    {
      int32_t height_out;
      int32_t width_out;
+    bool align_corners;
+    bool half_pixel_centers;
    };
  
  public:
diff --git a/runtime/onert/core/include/ir/operation/SplitV.h b/runtime/onert/core/include/ir/operation/SplitV.h

new file mode 100644 (file)

index 0000000..99a06ee
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/SplitV.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __ONERT_IR_OPERATION_SPLIT_V_H__
+#define __ONERT_IR_OPERATION_SPLIT_V_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+class SplitV : public Operation
+{
+public:
+  enum Input
+  {
+    INPUT = 0,
+    SIZE_SPLITS = 1,
+    SPLIT_DIM = 2
+  };
+
+  struct Param
+  {
+    int num_splits;
+  };
+
+public:
+  SplitV(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+         const Param &param);
+
+public:
+  void accept(OperationVisitor &v) const override;
+  OpCode opcode() const final { return OpCode::SplitV; }
+
+public:
+  const Param &param() const { return _param; }
+
+private:
+  Param _param;
+};
+} // namespace operation
+} // namespace ir
+} // namespace onert
+#endif // __ONERT_IR_OPERATION_SPLIT_V_H__
diff --git a/runtime/onert/core/include/ir/operation/StatelessRandomUniform.h b/runtime/onert/core/include/ir/operation/StatelessRandomUniform.h

new file mode 100644 (file)

index 0000000..112a748
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/StatelessRandomUniform.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_STATELESS_RANDOM_UNIFORM_H__
+#define __ONERT_IR_OPERATION_STATELESS_RANDOM_UNIFORM_H__
+
+#include <memory>
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class StatelessRandomUniform : public Operation
+{
+public:
+  enum Input
+  {
+    SHAPE = 0,
+    SEED = 1
+  };
+
+public:
+  StatelessRandomUniform(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+
+public:
+  void accept(OperationVisitor &v) const override;
+  OpCode opcode() const final { return OpCode::StatelessRandomUniform; }
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_STATELESS_RANDOM_UNIFORM_H__
diff --git a/runtime/onert/core/include/util/ShapeInference.h b/runtime/onert/core/include/util/ShapeInference.h

index 0d45251..a68c22b 100644 (file)
--- a/runtime/onert/core/include/util/ShapeInference.h
+++ b/runtime/onert/core/include/util/ShapeInference.h
@@ -95,6 +95,9 @@ template <float *> ir::Shape inferRangeShape(float *start_val, float *limit_val,
  
  template <typename T> ir::Shape inferRangeShape(T start_val, T limit_val, T delta_val);
  
+ir::Shape inferResizeBilinearShape(const ir::Shape &in_shape, const int32_t output_height,
+                                   const int32_t output_width);
+
  ir::Shape inferSelectShape(const ir::Shape &input_cond_shape, const ir::Shape &input_true_shape,
                             const ir::Shape &input_false_shape);
  
diff --git a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc

index 32a8041..e538f3f 100644 (file)
--- a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc
+++ b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc
@@ -36,7 +36,7 @@ DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<cpu_common::Ten
  void DynamicTensorManager::applyShape(const ir::OperandIndex &ind, const ir::Shape &new_shape)
  {
    // NOTE Handle user tensors first
-  auto user_tensor = _user_tensors->getManagedTensor(ind);
+  auto user_tensor = _user_tensors->getNativeTensor(ind);
    if (user_tensor)
    {
      // User tensors cannot be reallocated.
@@ -45,10 +45,11 @@ void DynamicTensorManager::applyShape(const ir::OperandIndex &ind, const ir::Sha
      if (buffer_size < new_size)
        throw std::runtime_error{"ExecutorBase: output buffer size is less than output tensor size"};
      user_tensor->setShape(new_shape);
+    return;
    }
  
-  // NOTE Then handle managed tensors
-  auto tensor = _tensors->getManagedTensor(ind);
+  // NOTE Then handle native tensors
+  auto tensor = _tensors->getNativeTensor(ind);
    assert(tensor);
  
    bool previously_dynamic = tensor->is_dynamic();
@@ -101,9 +102,9 @@ void DynamicTensorManager::buildTensor(const ir::OperandIndex &ind,
                                         const ir::OperandInfo &tensor_info,
                                         ir::Layout backend_layout)
  {
-  assert(_tensors->getManagedTensor(ind) == nullptr);
-  auto tensor = std::make_shared<cpu_common::Tensor>(tensor_info, backend_layout);
-  _tensors->setManagedTensor(ind, tensor);
+  assert(_tensors->getNativeTensor(ind) == nullptr);
+  auto tensor = std::make_shared<cpu_common::Tensor>(tensor_info, backend_layout, this);
+  _tensors->setNativeTensor(ind, tensor);
  }
  
  void DynamicTensorManager::planDealloc(ir::OperationIndex op_ind, ir::OperandIndex operand_ind)
@@ -130,7 +131,7 @@ void DynamicTensorManager::deallocInput(ir::OperationIndex op_ind)
    auto &input_set = find->second;
    for (auto input_ind : input_set)
    {
-    if (!_tensors->getManagedTensor(input_ind)->is_dynamic())
+    if (!_tensors->getNativeTensor(input_ind)->is_dynamic())
        continue;
  
      _dynamic_mem_mgr->deallocate(input_ind);
@@ -141,7 +142,7 @@ void DynamicTensorManager::deallocInput(ir::OperationIndex op_ind)
  
  void DynamicTensorManager::deallocSubgraphOutput(ir::OperandIndex output_ind)
  {
-  if (!_tensors->getManagedTensor(output_ind)->is_dynamic())
+  if (!_tensors->getNativeTensor(output_ind)->is_dynamic())
      return;
  
    _dynamic_mem_mgr->deallocate(output_ind);
diff --git a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h

index 300c342..446427d 100644 (file)
--- a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h
+++ b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h
@@ -61,6 +61,7 @@ private:
     * @todo  DynamicMemoryManager is not optimized. Optimized one is needed
     */
    std::shared_ptr<cpu_common::DynamicMemoryManager> _dynamic_mem_mgr;
+  // TODO Refactoring : Merge two TensorRegistries into one
    const std::shared_ptr<cpu_common::TensorRegistry> _tensors;
    const std::shared_ptr<UserTensorRegistry> _user_tensors;
  
diff --git a/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc b/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc

index 4b683fb..eb83b7d 100644 (file)
--- a/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc
+++ b/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc
@@ -81,23 +81,23 @@ void KernelGenerator::visit(const ir::operation::If &node)
    std::vector<std::shared_ptr<backend::ITensor>> input_tensors;
    for (const auto input_index : node.getInputs())
    {
-    auto input_alloc = getTensor(input_index);
+    auto input_tensor = getTensor(input_index);
  
-    input_tensors.emplace_back(input_alloc);
+    input_tensors.emplace_back(input_tensor);
    }
  
    std::vector<std::shared_ptr<backend::ITensor>> output_tensors;
    exec::DynAllocInfoMap outputs_dyn_alloc_info;
    for (const auto output_index : node.getOutputs())
    {
-    auto output_alloc = getTensor(output_index);
+    auto output_tensor = getTensor(output_index);
  
-    output_tensors.emplace_back(output_alloc);
+    output_tensors.emplace_back(output_tensor);
      const auto output_tensor_builder = getTensorBuilder(output_index);
      if (output_tensor_builder->supportDynamicTensor())
      {
        auto output_dyn_manager = output_tensor_builder->dynamicTensorManager();
-      outputs_dyn_alloc_info[output_alloc] = exec::DynAllocInfo{output_index, output_dyn_manager};
+      outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index, output_dyn_manager};
      }
    }
  
@@ -146,24 +146,24 @@ void KernelGenerator::visit(const ir::operation::While &node)
    std::vector<std::shared_ptr<backend::ITensor>> input_tensors;
    for (const auto input_index : node.getInputs())
    {
-    auto input_alloc = getTensor(input_index);
+    auto input_tensor = getTensor(input_index);
  
-    input_tensors.emplace_back(input_alloc);
+    input_tensors.emplace_back(input_tensor);
    }
  
    std::vector<std::shared_ptr<backend::ITensor>> output_tensors;
    std::unordered_map<std::shared_ptr<ITensor>, exec::DynAllocInfo> outputs_dyn_alloc_info;
    for (const auto output_index : node.getOutputs())
    {
-    auto output_alloc = getTensor(output_index);
+    auto output_tensor = getTensor(output_index);
  
-    output_tensors.emplace_back(output_alloc);
+    output_tensors.emplace_back(output_tensor);
  
      const auto output_tensor_builder = getTensorBuilder(output_index);
      if (output_tensor_builder->supportDynamicTensor())
      {
        auto output_dyn_manager = output_tensor_builder->dynamicTensorManager();
-      outputs_dyn_alloc_info[output_alloc] = exec::DynAllocInfo{output_index, output_dyn_manager};
+      outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index, output_dyn_manager};
      }
    }
  
@@ -199,7 +199,7 @@ KernelGenerator::getTensorBuilder(const ir::OperandIndex &index)
    for (auto tensor_builder : _tensor_builder_set)
    {
      auto reg = tensor_builder->tensorRegistry();
-    auto tensor = reg ? reg->getManagedITensor(index) : tensor_builder->tensorAt(index);
+    auto tensor = reg ? reg->getNativeITensor(index) : tensor_builder->tensorAt(index);
      if (tensor)
      {
        ret = tensor_builder;
diff --git a/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc b/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc

index 16cd3ec..5bddb91 100644 (file)
--- a/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc
+++ b/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc
@@ -92,7 +92,7 @@ void TensorBuilder::allocate()
  std::shared_ptr<ITensor> TensorBuilder::tensorAt(const ir::OperandIndex &ind)
  {
    // NOTE Find from User Tensor Registry first
-  // FIXME There may be both user tensor and managed tensor for a `ind` which is a waste
+  // FIXME There may be both user tensor and native tensor for a `ind` which is a waste
    auto user_tensor = _user_tensor_reg->getITensor(ind);
    auto tensor = _tensor_reg->getITensor(ind);
    if (user_tensor)
@@ -107,7 +107,7 @@ void TensorBuilder::iterate(const IterateFunction &fn) { _static_tensor_mgr->ite
  
  std::shared_ptr<cpu_common::Tensor> TensorBuilder::at(const ir::OperandIndex &ind)
  {
-  return _tensor_reg->getManagedTensor(ind);
+  return _tensor_reg->getNativeTensor(ind);
  }
  
  std::unique_ptr<ITensorManager> TensorBuilder::releaseStaticTensorManager(void)
@@ -123,7 +123,7 @@ std::unique_ptr<ITensorManager> TensorBuilder::releaseDynamicTensorManager(void)
  void TensorBuilder::setUserTensor(const ir::OperandIndex &ind,
                                    const std::shared_ptr<UserTensor> &tensor)
  {
-  _user_tensor_reg->setManagedTensor(ind, tensor);
+  _user_tensor_reg->setNativeTensor(ind, tensor);
  }
  
  } // namespace controlflow
diff --git a/runtime/onert/core/src/backend/controlflow/UserTensor.h b/runtime/onert/core/src/backend/controlflow/UserTensor.h

index ce94ea0..9be3359 100644 (file)
--- a/runtime/onert/core/src/backend/controlflow/UserTensor.h
+++ b/runtime/onert/core/src/backend/controlflow/UserTensor.h
@@ -38,12 +38,16 @@ namespace controlflow
  class UserTensor : public IPortableTensor
  {
  public:
-  UserTensor(const ir::OperandInfo &info, ir::Layout layout, uint8_t *buffer, size_t size)
-      : _info{info}, _layout{layout}, _buffer{buffer}, _size{size}, _dynamic{false}
+  UserTensor(const ir::OperandInfo &info, ir::Layout layout, uint8_t *buffer, size_t size,
+             IDynamicTensorManager *dynamic_tensor_manager)
+      : _info{info}, _layout{layout}, _buffer{buffer}, _size{size}, _dynamic{false},
+        _dynamic_tensor_manager{dynamic_tensor_manager}
    {
    }
  
-  UserTensor(const ir::OperandInfo &info, ir::Layout layout) : UserTensor{info, layout, nullptr, 0}
+  UserTensor(const ir::OperandInfo &info, ir::Layout layout,
+             IDynamicTensorManager *dynamic_tensor_manager)
+      : UserTensor{info, layout, nullptr, 0, dynamic_tensor_manager}
    {
    }
  
@@ -68,6 +72,8 @@ public:
    void set_dynamic() override { _dynamic = true; }
    ir::Shape getShape() const override { return _info.shape(); }
    void setShape(const ir::Shape &new_shape) override { _info.shape(new_shape); }
+  bool is_constant() const override { return false; }
+  IDynamicTensorManager *dynamic_tensor_manager() override { return _dynamic_tensor_manager; }
  
  private:
    ir::OperandInfo _info;
@@ -75,6 +81,7 @@ private:
    uint8_t *_buffer;
    size_t _size;
    bool _dynamic;
+  IDynamicTensorManager *_dynamic_tensor_manager;
  };
  
  } // namespace controlflow
diff --git a/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc b/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc

index 0ccf700..cb27d75 100644 (file)
--- a/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc
+++ b/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc
@@ -35,7 +35,7 @@ void DynamicTensorManager::applyShape(const ir::OperandIndex &ind, const ir::Sha
  {
    VERBOSE_F() << ind << std::endl;
  
-  auto tensor = _tensors->getManagedTensor(ind);
+  auto tensor = _tensors->getNativeTensor(ind);
    assert(tensor);
  
    bool previously_dynamic = tensor->is_dynamic();
@@ -88,9 +88,9 @@ void DynamicTensorManager::buildTensor(const ir::OperandIndex &ind,
                                         const ir::OperandInfo &tensor_info,
                                         ir::Layout backend_layout)
  {
-  assert(_tensors->getManagedTensor(ind) == nullptr);
-  auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout);
-  _tensors->setManagedTensor(ind, tensor);
+  assert(_tensors->getNativeTensor(ind) == nullptr);
+  auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout, this);
+  _tensors->setNativeTensor(ind, tensor);
  }
  
  void DynamicTensorManager::planDealloc(ir::OperationIndex op_ind, ir::OperandIndex operand_ind)
@@ -117,7 +117,7 @@ void DynamicTensorManager::deallocInput(ir::OperationIndex op_ind)
    auto &input_set = find->second;
    for (auto input_ind : input_set)
    {
-    auto *tensor = _tensors->getManagedTensor(input_ind).get();
+    auto *tensor = _tensors->getNativeTensor(input_ind).get();
      if (!tensor->is_dynamic())
        continue;
  
@@ -131,7 +131,7 @@ void DynamicTensorManager::deallocInput(ir::OperationIndex op_ind)
  
  void DynamicTensorManager::deallocSubgraphOutput(ir::OperandIndex output_ind)
  {
-  auto *tensor = _tensors->getManagedTensor(output_ind).get();
+  auto *tensor = _tensors->getNativeTensor(output_ind).get();
    if (!tensor->is_dynamic())
      return;
  
diff --git a/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc b/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc

index 47bea35..820cad3 100644 (file)
--- a/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc
+++ b/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc
@@ -16,6 +16,7 @@
  
  #include "backend/cpu_common/StaticTensorManager.h"
  
+#include "backend/cpu_common/DynamicTensorManager.h"
  #include <util/logging.h>
  
  namespace onert
@@ -33,7 +34,7 @@ StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &
  
  void StaticTensorManager::allocateConsts(void)
  {
-  for (auto &pair : _tensors->managed_tensors())
+  for (auto &pair : _tensors->native_tensors())
    {
      const auto &ind = pair.first;
      auto tensor = pair.second;
@@ -42,9 +43,9 @@ void StaticTensorManager::allocateConsts(void)
        auto mem_alloc = _const_mgr->allocate(ind, tensor->total_size());
        tensor->setBuffer(mem_alloc);
        auto buffer = mem_alloc->base();
-      VERBOSE(CPU_StaticTensorManager) << "CONSTANT TENSOR(#" << ind.value()
-                                       << "): " << static_cast<void *>(buffer)
-                                       << "size : " << tensor->total_size() << std::endl;
+      VERBOSE(CPU_COMMON_StaticTensorManager) << "CONSTANT TENSOR(#" << ind.value()
+                                              << "): " << static_cast<void *>(buffer)
+                                              << "size : " << tensor->total_size() << std::endl;
      }
    }
  }
@@ -53,7 +54,7 @@ void StaticTensorManager::allocateNonconsts(void)
  {
    _nonconst_mgr->allocate();
  
-  for (auto &pair : _tensors->managed_tensors())
+  for (auto &pair : _tensors->native_tensors())
    {
      const auto &ind = pair.first;
      auto tensor = pair.second;
@@ -62,8 +63,8 @@ void StaticTensorManager::allocateNonconsts(void)
        auto *buffer = _nonconst_mgr->getBuffer(ind);
        tensor->setBuffer(buffer);
  
-      VERBOSE(CPU_StaticTensorManager) << "TENSOR(#" << ind.value()
-                                       << "): " << static_cast<void *>(buffer) << std::endl;
+      VERBOSE(CPU_COMMON_StaticTensorManager) << "TENSOR(#" << ind.value()
+                                              << "): " << static_cast<void *>(buffer) << std::endl;
      }
    }
  }
@@ -76,18 +77,18 @@ void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
                                        const ir::OperandInfo &tensor_info, ir::Layout backend_layout,
                                        bool as_const)
  {
-  assert(!_tensors->getManagedTensor(ind));
-  auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout);
-  _tensors->setManagedTensor(ind, tensor);
+  assert(!_tensors->getNativeTensor(ind));
+  auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout, nullptr);
+  _tensors->setNativeTensor(ind, tensor);
    _as_constants[ind] = as_const;
  }
  
  void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
  {
-  assert(_tensors->getManagedTensor(ind));
+  assert(_tensors->getNativeTensor(ind));
  
    // This method is called only when a tensor has proper shape
-  assert(!_tensors->getManagedTensor(ind)->is_dynamic());
+  assert(!_tensors->getNativeTensor(ind)->is_dynamic());
  
    if (!_as_constants[ind])
      _nonconst_mgr->claimPlan(ind, size);
@@ -95,10 +96,10 @@ void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
  
  void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
  {
-  assert(_tensors->getManagedTensor(ind));
+  assert(_tensors->getNativeTensor(ind));
  
    // This method is called only when a tensor has proper shape
-  assert(!_tensors->getManagedTensor(ind)->is_dynamic());
+  assert(!_tensors->getNativeTensor(ind)->is_dynamic());
  
    if (!_as_constants[ind])
      _nonconst_mgr->releasePlan(ind);
@@ -106,7 +107,7 @@ void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
  
  void StaticTensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
  {
-  for (const auto &it : _tensors->managed_tensors())
+  for (const auto &it : _tensors->native_tensors())
      fn(it.first);
  }
  
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.cc b/runtime/onert/core/src/compiler/ExecutorFactory.cc

index f3f69ad..82afd9e 100644 (file)
--- a/runtime/onert/core/src/compiler/ExecutorFactory.cc
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.cc
@@ -196,23 +196,41 @@ ExecutorFactory::initializeModelIOTensors(ir::LoweredGraph &lowered_graph,
      const auto &operand = lowered_graph.graph().operands().at(ind);
      auto tensor = std::make_shared<backend::controlflow::UserTensor>(
          operand.info(),
-        ir::Layout::NHWC /* FIXME find op_seq for this operand and use frontend_layout */);
+        ir::Layout::NHWC, /* FIXME find op_seq for this operand and use frontend_layout */
+        cf_tensor_builder->dynamicTensorManager());
  
      // Add tensor to controlflow TensorRegistry.
      cf_tensor_builder->setUserTensor(ind, tensor);
      ret.push_back(tensor);
-
-    // Set other tensors as external tensors
-    for (auto &tensor_builder : tensor_builders)
-    {
-      // FIXME This is a workaround registering all user tensors to all backends
-      // FIXME Handle when it is failed
-      tensor_builder->setExternalTensor(ind, tensor);
-    }
    }
    return ret;
  }
  
+void ExecutorFactory::prepareExternalTensors(ir::LoweredGraph &lowered_graph,
+                                             TensorBuilders &tensor_builders)
+{
+  lowered_graph.op_seqs().iterate(
+      [&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
+        auto lower_info = lowered_graph.getLowerInfo(op_seq_index);
+        auto &backend_ctx = lowered_graph.backend_contexts().at(lower_info->backend());
+        for (auto ind : (op_seq.getInputs() + op_seq.getOutputs()) | ir::Remove::DUPLICATED |
+                            ir::Remove::UNDEFINED)
+        {
+          // If an OpSequence input/output tensor does not have a own tensor object,
+          // it must be using external tensors, so find the tensor from other tensor builders and
+          // set the tensor to this tensor builder if portable
+          if (!backend_ctx->tensor_builder->tensorAt(ind))
+          {
+            auto tensor = tensor_builders.getITensor(ind);
+            assert(tensor); // The tensor must have been created in one of TensorBuilders
+            auto ptensor = std::dynamic_pointer_cast<backend::IPortableTensor>(tensor);
+            if (ptensor)
+              backend_ctx->tensor_builder->setMigrantTensor(ind, ptensor);
+          }
+        }
+      });
+}
+
  exec::IExecutor *
  ExecutorFactory::createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
                                        const compiler::CompilerOptions &options,
@@ -265,6 +283,8 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_
      tensor_builder->prepare();
    }
  
+  prepareExternalTensors(*lowered_graph, tensor_builders);
+
    ExecutionBuilder builder;
  
    // Generate kernels
@@ -367,6 +387,8 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
      tensor_builder->prepare();
    }
  
+  prepareExternalTensors(*lowered_graph, tensor_builders);
+
    ExecutionBuilder builder;
  
    // Generate kernels
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.h b/runtime/onert/core/src/compiler/ExecutorFactory.h

index 1e82b98..418e5a7 100644 (file)
--- a/runtime/onert/core/src/compiler/ExecutorFactory.h
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.h
@@ -22,6 +22,7 @@
  #include "backend/ITensor.h"
  #include "exec/IExecutor.h"
  #include "ir/LoweredGraph.h"
+#include "TensorBuilders.h"
  
  namespace onert
  {
@@ -48,6 +49,8 @@ private:
    static std::vector<std::shared_ptr<backend::ITensor>>
    initializeModelIOTensors(ir::LoweredGraph &lowered_graph,
                             const ir::OperandIndexSequence &indices);
+  static void prepareExternalTensors(ir::LoweredGraph &lowered_graph,
+                                     TensorBuilders &tensor_builders);
    static exec::IExecutor *
    createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
                         const compiler::CompilerOptions &options,
diff --git a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc

index c68e6c3..5c4b84e 100644 (file)
--- a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
+++ b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
@@ -316,7 +316,7 @@ void Fp32ToFp16Converter::appendNewOpSeqForConvertFp16ToFp32(const ir::OpSequenc
  
      // manipulate output of operation and op_seq
      // - replace output of the last operation's output to new operand
-    //    with old operand's removeDef and new operand's appendDef()
+    //    with old operand's unsetDef and new operand's appendDef()
      manipulateOutput(op_seq_ind, op_seq_output_ind, new_op_ind);
  
      // new op
@@ -584,8 +584,9 @@ void Fp32ToFp16Converter::manipulateOutput(const ir::OpSequenceIndex &op_seq_ind
    last_node.replaceOutputs(op_seq_output_ind, new_op_ind);
  
    // op_seq_obj doesn't have uses/def
-  output_obj.removeDef(last_node_ind);
-  new_op_obj.insertDef(last_node_ind);
+  assert(output_obj.getDef() == last_node_ind);
+  output_obj.unsetDef();
+  new_op_obj.setDef(last_node_ind);
  }
  
  ir::OperationIndex
@@ -603,7 +604,7 @@ Fp32ToFp16Converter::newOperationConvertFp32ToFp16(const ir::OperandIndex &op_se
    const auto new_node_ind = operations.push(std::move(new_node));
  
    input_obj.insertUse(new_node_ind);
-  new_op_obj.insertDef(new_node_ind);
+  new_op_obj.setDef(new_node_ind);
  
    return new_node_ind;
  }
@@ -623,7 +624,7 @@ Fp32ToFp16Converter::newOperationConvertFp16ToFp32(const ir::OperandIndex &op_se
    const auto new_node_ind = operations.push(std::move(new_node));
  
    new_op_obj.insertUse(new_node_ind);
-  output_obj.insertDef(new_node_ind);
+  output_obj.setDef(new_node_ind);
  
    return new_node_ind;
  }
@@ -925,7 +926,8 @@ void Fp32ToFp16Converter::deleteContiguousOpSequences(
      for (auto &ind : first_node.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
      {
        auto &obj = operands.at(ind);
-      obj.removeDef(first_node_ind);
+      assert(obj.getDef() == first_node_ind);
+      obj.unsetDef();
        VERBOSE(Fp32ToFp16Converter) << "Operand #" << ind.value() << "'s Def(Node#"
                                     << first_node_ind.value() << ") is removed" << std::endl;
      }
diff --git a/runtime/onert/core/src/compiler/HEScheduler.cc b/runtime/onert/core/src/compiler/HEScheduler.cc

index b0e61f6..de9b4fb 100644 (file)
--- a/runtime/onert/core/src/compiler/HEScheduler.cc
+++ b/runtime/onert/core/src/compiler/HEScheduler.cc
@@ -105,7 +105,7 @@ static bool isMergeable(const ir::Graph &graph, const ir::Operation &node)
        continue;
  
      // This operand is output of operation, not weight or bias
-    if (operand.getDef().size() > 0)
+    if (operand.getDef().valid())
        ++prev_op_cnt;
  
      // Current node has multiple inputs as concat or at the beginning of the separated branch
@@ -599,7 +599,8 @@ int64_t HEScheduler::predMaxEFT(const backend::Backend *backend, const ir::Opera
      const auto &input_operand = _graph->operands().at(input_operand_idx);
      const bool quant = input_operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM;
  
-    for (const auto &input_node_idx : input_operand.getDef())
+    auto input_node_idx = input_operand.getDef();
+    if (input_node_idx.valid())
      {
        // Data transfer cost from parent's node backend to current node's backend:
        auto parent_backend = _backend_resolver->getBackend(input_node_idx);
diff --git a/runtime/onert/core/src/compiler/HEScheduler.h b/runtime/onert/core/src/compiler/HEScheduler.h

index f507539..d8ceca9 100644 (file)
--- a/runtime/onert/core/src/compiler/HEScheduler.h
+++ b/runtime/onert/core/src/compiler/HEScheduler.h
@@ -51,16 +51,12 @@ public:
     * @param[in] backend_resolver backend resolver
     */
    HEScheduler(const backend::BackendContexts &backend_contexts, const CompilerOptions &options)
-      : _backend_contexts{backend_contexts}, _is_supported{}, _backends_avail_time{}, _ops_eft{},
+      : _is_supported{}, _backends_avail_time{}, _ops_eft{},
          _op_to_rank{std::make_shared<ir::OperationIndexMap<int64_t>>()},
          _is_profiling_mode{options.he_profiling_mode},
          _is_linear_exec{options.executor == "Linear"},
          _is_parallel_exec{options.executor == "Parallel"}
    {
-    // Workaround to avoid unused-private-field warning
-    // TODO use _backend_contexts and remove workaround
-    (void)_backend_contexts;
-
      for (auto &entry : backend_contexts)
      {
        _all_backends.push_back(entry.first);
@@ -165,7 +161,6 @@ private:
    // whether it should assign these backends to these nodes:
    // * It stores false for unsupported nodes
    // * During rank calculation with enabled profiling mode it stores true for supported nodes
-  const backend::BackendContexts &_backend_contexts;
    std::unordered_map<const backend::Backend *, std::unordered_map<std::string, bool>> _is_supported;
    // Finishing and starting time of each backend
    std::unordered_map<const backend::Backend *, std::map<int64_t, int64_t>> _backends_avail_time;
@@ -175,8 +170,7 @@ private:
    std::unique_ptr<compiler::BackendResolver> _backend_resolver;
    std::unique_ptr<exec::ExecTime> _exec_time;
    const ir::Graph *_graph{nullptr};
-  std::vector<const backend::Backend *>
-      _all_backends; // TODO Remove this and use _backend_contexts instead
+  std::vector<const backend::Backend *> _all_backends;
    const backend::Backend *_cpu_backend{nullptr}; // TODO Change this to controlflow_backend
    bool _is_profiling_mode;
    bool _is_linear_exec;
diff --git a/runtime/onert/core/src/compiler/Linear.cc b/runtime/onert/core/src/compiler/Linear.cc

index b9db2f3..493ca1e 100644 (file)
--- a/runtime/onert/core/src/compiler/Linear.cc
+++ b/runtime/onert/core/src/compiler/Linear.cc
@@ -96,7 +96,7 @@ void Linear::planTensors(const ir::LoweredGraph &lowered_graph,
      }
  
      uses_map[ind] = obj.getUses().size();
-    def_map[ind] = obj.getDef().size(); // should be 1 or 0
+    def_map[ind] = obj.getDef().valid() ? 1 : 0;
  
      bool is_const = obj.isConstant();
      if (is_const)
diff --git a/runtime/onert/core/src/compiler/OperandContext.h b/runtime/onert/core/src/compiler/OperandContext.h

deleted file mode 100644 (file)

index 390b376..0000000
--- a/runtime/onert/core/src/compiler/OperandContext.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_COMPILER_OPERAND_CONTEXT_H__
-#define __ONERT_COMPILER_OPERAND_CONTEXT_H__
-
-#include "backend/ITensor.h"
-#include "ir/OperandIndexMap.h"
-#include <unordered_map>
-#include <memory>
-
-namespace onert
-{
-namespace compiler
-{
-
-class OperandContext
-{
-public:
-  OperandContext &set(const ir::OperandIndex &ind, const std::shared_ptr<backend::ITensor> &tensor);
-
-public:
-  bool exist(const ir::OperandIndex &ind) const { return _tensors.find(ind) != _tensors.end(); }
-
-public:
-  std::shared_ptr<backend::ITensor> at(const ir::OperandIndex &ind) const
-  {
-    return _tensors.at(ind);
-  }
-
-  std::shared_ptr<backend::ITensor> &at(const ir::OperandIndex &ind) { return _tensors.at(ind); }
-
-  void iterate(const std::function<void(const ir::OperandIndex &, backend::ITensor &)> &fn);
-
-private:
-  ir::OperandIndexMap<std::shared_ptr<backend::ITensor>> _tensors;
-};
-
-} // namespace compiler
-} // namespace onert
-
-#endif // __ONERT_COMPILER_OPERAND_CONTEXT_H__
diff --git a/runtime/onert/core/src/compiler/OperationValidator.cc b/runtime/onert/core/src/compiler/OperationValidator.cc

index 5c545ae..4449631 100644 (file)
--- a/runtime/onert/core/src/compiler/OperationValidator.cc
+++ b/runtime/onert/core/src/compiler/OperationValidator.cc
@@ -41,6 +41,21 @@ OperationValidator::OperationValidator(const ir::Graph &graph)
  {
  }
  
+void OperationValidator::checkUnaryOp(const ir::Operation &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(0)};
+
+  // Check if I/O types match
+  OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
+
+  if (_ctx.at(output_index).info().isDynamic())
+    return;
+
+  // Check if I/O shapes match
+  OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
+}
+
  void OperationValidator::operator()()
  {
    // There is no reason for each subgraph to have subgraphs since compiler has subgraphs when
@@ -53,16 +68,7 @@ void OperationValidator::operator()()
        [&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); });
  }
  
-void OperationValidator::visit(const ir::operation::Abs &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-
-  const auto input_index{node.getInputs().at(0)};
-
-  OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
-}
+void OperationValidator::visit(const ir::operation::Abs &node) { checkUnaryOp(node); }
  
  void OperationValidator::visit(const ir::operation::AvgPool2D &node)
  {
@@ -292,17 +298,7 @@ void OperationValidator::visit(const ir::operation::RNN &node)
                num_units == _ctx.at(hidden_state_out_index).shape().dim(1));
  }
  
-void OperationValidator::visit(const ir::operation::Round &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Round::Input::INPUT)};
-
-  OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
-
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-  OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
-}
+void OperationValidator::visit(const ir::operation::Round &node) { checkUnaryOp(node); }
  
  void OperationValidator::visit(const ir::operation::SpaceToBatchND &node)
  {
@@ -393,17 +389,7 @@ void OperationValidator::visit(const ir::operation::EmbeddingLookup &node)
    }
  }
  
-void OperationValidator::visit(const ir::operation::Exp &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
-
-  OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
-
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-  OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
-}
+void OperationValidator::visit(const ir::operation::Exp &node) { checkUnaryOp(node); }
  
  void OperationValidator::visit(const ir::operation::ExpandDims &node)
  {
@@ -419,17 +405,7 @@ void OperationValidator::visit(const ir::operation::ExpandDims &node)
    OP_REQUIRES(_ctx.at(axis_index).shape().rank() <= 1);
  }
  
-void OperationValidator::visit(const ir::operation::Floor &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
-
-  OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
-
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-  OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
-}
+void OperationValidator::visit(const ir::operation::Floor &node) { checkUnaryOp(node); }
  
  void OperationValidator::visit(const ir::operation::HashtableLookup &node)
  {
@@ -789,6 +765,25 @@ void OperationValidator::visit(const ir::operation::LSTM &node)
    }
  }
  
+void OperationValidator::visit(const ir::operation::L2Normalization &node)
+{
+  const auto ofm_index{node.getOutputs().at(0)};
+  if (_ctx.at(ofm_index).info().isDynamic())
+    return;
+
+  const auto ifm_index{node.getInputs().at(ir::operation::L2Normalization::Input::INPUT)};
+
+  auto ifm_shape = _ctx.at(ifm_index).shape();
+  auto ofm_shape = _ctx.at(ofm_index).shape();
+
+  OP_REQUIRES(ifm_shape.rank() == ofm_shape.rank());
+
+  for (auto i = 0; i < ifm_shape.rank(); i++)
+  {
+    OP_REQUIRES(ifm_shape.dim(i) == ofm_shape.dim(i));
+  }
+}
+
  void OperationValidator::visit(const ir::operation::Unpack &node)
  {
    const auto num{node.param().num};
@@ -904,45 +899,39 @@ void OperationValidator::visit(const ir::operation::Split &node)
    OP_REQUIRES(_ctx.at(input_index).shape().dim(axis) % num_splits == 0);
  }
  
-void OperationValidator::visit(const ir::operation::Cos &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
+void OperationValidator::visit(const ir::operation::Cos &node) { checkUnaryOp(node); }
  
-  const auto input_index{node.getInputs().at(0)};
-  OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
-}
-
-void OperationValidator::visit(const ir::operation::Sin &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
+void OperationValidator::visit(const ir::operation::Sin &node) { checkUnaryOp(node); }
  
-  const auto input_index{node.getInputs().at(0)};
-  OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
-}
+void OperationValidator::visit(const ir::operation::RSQRT &node) { checkUnaryOp(node); }
  
-void OperationValidator::visit(const ir::operation::RSQRT &node)
+void OperationValidator::visit(const ir::operation::Shape &node)
  {
    const auto output_index{node.getOutputs().at(0)};
    if (_ctx.at(output_index).info().isDynamic())
      return;
  
    const auto input_index{node.getInputs().at(0)};
-  OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
+  UNUSED_RELEASE(input_index);
+  OP_REQUIRES(_ctx.at(output_index).shape().rank() == 1);
  }
  
-void OperationValidator::visit(const ir::operation::Shape &node)
+void OperationValidator::visit(const ir::operation::ResizeBilinear &node)
  {
    const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
+
    if (_ctx.at(output_index).info().isDynamic())
+  {
      return;
+  }
+  OP_REQUIRES(_ctx.at(input_index).shape().rank() == 4);
+  OP_REQUIRES(_ctx.at(output_index).shape().rank() == 4);
  
-  const auto input_index{node.getInputs().at(0)};
-  UNUSED_RELEASE(input_index);
-  OP_REQUIRES(_ctx.at(output_index).shape().rank() == 1);
+  auto align_corners = node.param().align_corners;
+  auto half_pixel_centers = node.param().half_pixel_centers;
+
+  OP_REQUIRES(!align_corners || !half_pixel_centers);
  }
  
  void OperationValidator::visit(const ir::operation::Reverse &node)
@@ -972,35 +961,11 @@ void OperationValidator::visit(const ir::operation::While &node)
    // TODO Add to validate with subgraphs
  }
  
-void OperationValidator::visit(const ir::operation::Neg &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
+void OperationValidator::visit(const ir::operation::Neg &node) { checkUnaryOp(node); }
  
-  const auto input_index{node.getInputs().at(0)};
-  OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
-}
+void OperationValidator::visit(const ir::operation::Log &node) { checkUnaryOp(node); }
  
-void OperationValidator::visit(const ir::operation::Log &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-
-  const auto input_index{node.getInputs().at(0)};
-  OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
-}
-
-void OperationValidator::visit(const ir::operation::LogicalNot &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-
-  const auto input_index{node.getInputs().at(0)};
-  OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
-}
+void OperationValidator::visit(const ir::operation::LogicalNot &node) { checkUnaryOp(node); }
  
  void OperationValidator::visit(const ir::operation::SquaredDifference &node)
  {
@@ -1118,5 +1083,25 @@ void OperationValidator::visit(const ir::operation::LogSoftmax &node)
  
    OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
  }
+
+void OperationValidator::visit(const ir::operation::Quantize &node)
+{
+  VERBOSE(Quantize) << "Configure Quantize operation" << std::endl;
+
+  OP_REQUIRES(node.getInputs().size() == 1);
+  OP_REQUIRES(node.getOutputs().size() == 1);
+
+  const auto input_index{node.getInputs().at(0)};
+  const auto output_index{node.getOutputs().at(0)};
+
+  OP_REQUIRES(_ctx.at(input_index).typeInfo().type() == ir::DataType::FLOAT32);
+
+  if (_ctx.at(output_index).info().isDynamic())
+    return;
+
+  OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM);
+
+  OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
+}
  } // namespace compiler
  } // namespace onert
diff --git a/runtime/onert/core/src/compiler/OperationValidator.h b/runtime/onert/core/src/compiler/OperationValidator.h

index 6ceafe8..b27e686 100644 (file)
--- a/runtime/onert/core/src/compiler/OperationValidator.h
+++ b/runtime/onert/core/src/compiler/OperationValidator.h
@@ -70,6 +70,7 @@ public:
    void visit(const ir::operation::DepthToSpace &node) override;
    void visit(const ir::operation::Pack &node) override;
    void visit(const ir::operation::LSTM &node) override;
+  void visit(const ir::operation::L2Normalization &node) override;
    void visit(const ir::operation::Unpack &node) override;
    void visit(const ir::operation::Pad &node) override;
    void visit(const ir::operation::Min &node) override;
@@ -81,6 +82,7 @@ public:
    void visit(const ir::operation::Sin &node) override;
    void visit(const ir::operation::RSQRT &node) override;
    void visit(const ir::operation::Shape &node) override;
+  void visit(const ir::operation::ResizeBilinear &node) override;
    void visit(const ir::operation::Reverse &node) override;
    void visit(const ir::operation::If &node) override;
    void visit(const ir::operation::While &node) override;
@@ -93,9 +95,10 @@ public:
    void visit(const ir::operation::Range &node) override;
    void visit(const ir::operation::MatrixBandPart &node) override;
    void visit(const ir::operation::LogSoftmax &node) override;
+  void visit(const ir::operation::Quantize &node) override;
  
  private:
-  void checkReduceOp(const ir::OperandIndex input_index, const ir::OperandIndex output_index);
+  void checkUnaryOp(const ir::Operation &node);
  
  private:
    // TODO Remove _ctx field
diff --git a/runtime/onert/core/src/compiler/StaticShapeInference.cc b/runtime/onert/core/src/compiler/StaticShapeInference.cc

index 5a58f2e..76c1edc 100644 (file)
--- a/runtime/onert/core/src/compiler/StaticShapeInference.cc
+++ b/runtime/onert/core/src/compiler/StaticShapeInference.cc
@@ -497,6 +497,11 @@ void StaticShapeInferer::visit(const ir::operation::Logistic &op)
    handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Logistic::Input::INPUT));
  }
  
+void StaticShapeInferer::visit(const ir::operation::L2Normalization &op)
+{
+  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::L2Normalization::Input::INPUT));
+}
+
  void StaticShapeInferer::visit(const ir::operation::MatrixBandPart &op)
  {
    handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::Input::INPUT));
@@ -813,6 +818,35 @@ void StaticShapeInferer::visit(const ir::operation::Reshape &op)
    }
  }
  
+void StaticShapeInferer::visit(const ir::operation::ResizeBilinear &op)
+{
+  const auto input_idx{op.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
+  const auto &input = _operands.at(input_idx);
+
+  // get mutable output operand
+  const auto output_idx = op.getOutputs().at(0);
+  ir::Operand &output = _operands.at(output_idx);
+
+  // if input is dynamic, output also becomes dynamic
+  if (input.info().isDynamic())
+  {
+    output.info().setDynamic();
+    _return_has_dynamic_tensor = true;
+    return;
+  }
+
+  // Shape inferencing logic based on Params
+  ir::Shape new_shape = shape_inference::inferResizeBilinearShape(
+      input.shape(), op.param().height_out, op.param().width_out);
+
+  // if size_op is from Const, TFLC put the shape of output into tensor
+  if (new_shape != output.shape())
+  {
+    // change on output shape
+    output.info().shape(new_shape);
+  }
+}
+
  void StaticShapeInferer::visit(const ir::operation::Reverse &op)
  {
    handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Reverse::Input::INPUT));
diff --git a/runtime/onert/core/src/compiler/TensorBuilders.h b/runtime/onert/core/src/compiler/TensorBuilders.h

index 4bb7413..c0a1ebc 100644 (file)
--- a/runtime/onert/core/src/compiler/TensorBuilders.h
+++ b/runtime/onert/core/src/compiler/TensorBuilders.h
@@ -23,6 +23,7 @@
  #include "backend/Backend.h"
  #include "backend/controlflow/Config.h"
  #include "backend/controlflow/TensorBuilder.h"
+#include "util/logging.h"
  
  namespace onert
  {
@@ -66,6 +67,17 @@ public:
      return _cf_tensor_builder;
    }
  
+  std::shared_ptr<backend::ITensor> getITensor(ir::OperandIndex ind)
+  {
+    for (auto &tensor_builder : _tensor_builders)
+    {
+      auto tensor = tensor_builder->tensorAt(ind);
+      if (tensor)
+        return tensor;
+    }
+    return nullptr;
+  }
+
  private:
    std::unordered_set<std::shared_ptr<backend::ITensorBuilder>> _tensor_builders;
    std::shared_ptr<backend::controlflow::TensorBuilder> _cf_tensor_builder;
diff --git a/runtime/onert/core/src/exec/DynamicShapeInference.cc b/runtime/onert/core/src/exec/DynamicShapeInference.cc

index 1b82029..5ec7012 100644 (file)
--- a/runtime/onert/core/src/exec/DynamicShapeInference.cc
+++ b/runtime/onert/core/src/exec/DynamicShapeInference.cc
@@ -16,12 +16,21 @@
  
  #include "exec/DynamicShapeInference.h"
  #include "util/ShapeInference.h"
+#include <assert.h>
  
  namespace onert
  {
  namespace exec
  {
  
+inline backend::IDynamicTensorManager *
+dynamicTensorManagerOf(const std::shared_ptr<backend::ITensor> &tensor)
+{
+  if (!tensor->dynamic_tensor_manager())
+    throw std::runtime_error{"Dynamic Tensor Manager is not available for this tensor."};
+  return tensor->dynamic_tensor_manager();
+}
+
  void DynamicShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op,
                                                     const ir::OperandIndex lhs_idx,
                                                     const ir::OperandIndex rhs_idx)
@@ -55,7 +64,7 @@ void DynamicShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op,
  
    ir::Shape new_shape = shape_inference::inferEltwiseShape(lhs_shape, rhs_shape);
  
-  _dynamic_tensor_manager->applyShape(output_idx, new_shape);
+  dynamicTensorManagerOf(output)->applyShape(output_idx, new_shape);
    assert(output->buffer() != nullptr);
  }
  
@@ -87,7 +96,7 @@ void DynamicShapeInferer::handleSimpleUnaryOp(const ir::Operation &op,
    auto output_ind = op.getOutputs().at(0);
    auto output = _tensor_registry->getITensor(output_ind);
  
-  _dynamic_tensor_manager->applyShape(output_ind, output_shape);
+  dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
    assert(output->buffer() != nullptr);
  }
  
@@ -121,7 +130,7 @@ void DynamicShapeInferer::visit(const ir::operation::ArgMax &op)
  
    ir::Shape new_shape = shape_inference::inferArgMaxShape(input_shape, axis, rank);
  
-  _dynamic_tensor_manager->applyShape(output_ind, new_shape);
+  dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
    assert(output->buffer() != nullptr);
  }
  
@@ -143,7 +152,7 @@ void DynamicShapeInferer::visit(const ir::operation::BatchMatMul &op)
    // TODO
  
    auto new_shape = shape_inference::inferBatchMatMulShape(lhs_shape, rhs_shape, op.param());
-  _dynamic_tensor_manager->applyShape(output_index, new_shape);
+  dynamicTensorManagerOf(output)->applyShape(output_index, new_shape);
  }
  
  void DynamicShapeInferer::visit(const ir::operation::BroadcastTo &op)
@@ -166,7 +175,7 @@ void DynamicShapeInferer::visit(const ir::operation::BroadcastTo &op)
        shape->getShape(), reinterpret_cast<const int32_t *>(shape->buffer()));
  
    // set output shape and output buffer
-  _dynamic_tensor_manager->applyShape(output_ind, output_shape);
+  dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
    assert(output->buffer() != nullptr);
  }
  
@@ -256,7 +265,7 @@ void DynamicShapeInferer::visit(const ir::operation::Concat &op)
    auto output = _tensor_registry->getITensor(output_ind);
    auto output_shape = shape_inference::inferConcatShape(in_shapes, op.param());
  
-  _dynamic_tensor_manager->applyShape(output_ind, output_shape);
+  dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
  }
  
  void DynamicShapeInferer::visit(const ir::operation::Conv2D &op)
@@ -279,7 +288,7 @@ void DynamicShapeInferer::visit(const ir::operation::Conv2D &op)
  
    ir::Shape output_shape = shape_inference::inferConv2DShape(input_shape, ker_shape, op.param());
  
-  _dynamic_tensor_manager->applyShape(output_ind, output_shape);
+  dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
    assert(output->buffer() != nullptr);
  }
  
@@ -339,7 +348,7 @@ void DynamicShapeInferer::visit(const ir::operation::ExpandDims &op)
  
    auto output_shape = shape_inference::inferExpandDimsShape(input_shape, axis_buf[0]);
  
-  _dynamic_tensor_manager->applyShape(output_ind, output_shape);
+  dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
    assert(output->buffer() != nullptr);
  }
  
@@ -362,7 +371,7 @@ void DynamicShapeInferer::visit(const ir::operation::Fill &op)
  
    auto output_shape = shape_inference::inferFillShape(input_shape, input_buf);
  
-  _dynamic_tensor_manager->applyShape(output_ind, output_shape);
+  dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
    assert(output->buffer() != nullptr);
  }
  
@@ -385,7 +394,7 @@ void DynamicShapeInferer::visit(const ir::operation::FullyConnected &op)
    auto output_ind = op.getOutputs().at(0);
    auto output = _tensor_registry->getITensor(output_ind);
  
-  _dynamic_tensor_manager->applyShape(output_ind, new_shape);
+  dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
    assert(output->buffer() != nullptr);
  }
  
@@ -417,7 +426,7 @@ void DynamicShapeInferer::visit(const ir::operation::Gather &op)
    auto output_ind = op.getOutputs().at(0);
    auto output = _tensor_registry->getITensor(output_ind);
  
-  _dynamic_tensor_manager->applyShape(output_ind, new_shape);
+  dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
    assert(output->buffer() != nullptr);
  }
  
@@ -442,6 +451,11 @@ void DynamicShapeInferer::visit(const ir::operation::Logistic &op)
    handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Logistic::INPUT));
  }
  
+void DynamicShapeInferer::visit(const ir::operation::L2Normalization &op)
+{
+  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::L2Normalization::INPUT));
+}
+
  void DynamicShapeInferer::visit(const ir::operation::MatrixBandPart &op)
  {
    handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::INPUT));
@@ -492,7 +506,7 @@ void DynamicShapeInferer::visit(const ir::operation::OneHot &op)
    const auto axis_val = op.param().axis;
  
    ir::Shape new_shape = shape_inference::inferOnehotShape(indices_shape, *depth_buf, axis_val);
-  _dynamic_tensor_manager->applyShape(output_ind, new_shape);
+  dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
    assert(output->buffer() != nullptr);
  }
  
@@ -528,7 +542,7 @@ void DynamicShapeInferer::visit(const ir::operation::Pack &op)
  
    ir::Shape new_shape = shape_inference::inferPackShape(input_shape, axis, rank, num);
  
-  _dynamic_tensor_manager->applyShape(output_ind, new_shape);
+  dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
    assert(output->buffer() != nullptr);
  }
  
@@ -555,7 +569,7 @@ void DynamicShapeInferer::visit(const ir::operation::Pad &op)
        shape_inference::inferPadShape(input->getShape(), pad_buf, pad->getShape().num_elements());
  
    // change output shape and reallocate output tensor memory
-  _dynamic_tensor_manager->applyShape(output_ind, output_shape);
+  dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
    assert(output->buffer() != nullptr);
  }
  
@@ -607,7 +621,7 @@ void DynamicShapeInferer::visit(const ir::operation::Range &op)
          *reinterpret_cast<int32_t *>(limit_tensor->buffer()),
          *reinterpret_cast<int32_t *>(delta_tensor->buffer()));
    }
-  _dynamic_tensor_manager->applyShape(output_ind, new_shape);
+  dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
    assert(output->buffer() != nullptr);
  }
  
@@ -651,7 +665,7 @@ void DynamicShapeInferer::visit(const ir::operation::Reduce &op)
  
    ir::Shape new_shape = shape_inference::inferReduceShape(input_shape, axes_vec, keep_dims);
  
-  _dynamic_tensor_manager->applyShape(output_ind, new_shape);
+  dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
    assert(output->buffer() != nullptr);
  }
  
@@ -705,7 +719,7 @@ void DynamicShapeInferer::visit(const ir::operation::Reshape &op)
      if (output_shape != output->getShape() || output->buffer() == nullptr)
      {
        // change on output shape
-      _dynamic_tensor_manager->applyShape(output_ind, output_shape);
+      dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
      }
      assert(output->buffer() != nullptr);
    }
@@ -721,7 +735,7 @@ void DynamicShapeInferer::visit(const ir::operation::Reshape &op)
      if (output_shape != output->getShape() || output->buffer() == nullptr)
      {
        // change on output shape
-      _dynamic_tensor_manager->applyShape(output_ind, output_shape);
+      dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
      }
      assert(output->buffer() != nullptr);
    }
@@ -732,6 +746,31 @@ void DynamicShapeInferer::visit(const ir::operation::Reshape &op)
    }
  }
  
+void DynamicShapeInferer::visit(const ir::operation::ResizeBilinear &op)
+{
+  // check if output is not dynamic
+  auto output_ind = op.getOutputs().at(0);
+  auto output = _tensor_registry->getITensor(output_ind);
+
+  auto input_ind = op.getInputs().at(ir::operation::Reshape::Input::INPUT);
+  auto input = _tensor_registry->getITensor(input_ind);
+
+  if ((!input->is_dynamic()) && (!output->is_dynamic()))
+    return;
+
+  // getting output shape from input shape and Params
+  auto output_shape = shape_inference::inferResizeBilinearShape(
+      input->getShape(), op.param().height_out, op.param().width_out);
+
+  // if shape is changed, change output shape and reallocate output tensor memory
+  if (output_shape != output->getShape() || output->buffer() == nullptr)
+  {
+    // change on output shape
+    _dynamic_tensor_manager->applyShape(output_ind, output_shape);
+  }
+  assert(output->buffer() != nullptr);
+}
+
  void DynamicShapeInferer::visit(const ir::operation::Reverse &op)
  {
    handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Reverse::INPUT));
@@ -774,7 +813,7 @@ void DynamicShapeInferer::visit(const ir::operation::Select &op)
    auto output_ind = op.getOutputs().at(0);
    auto output = _tensor_registry->getITensor(output_ind);
  
-  _dynamic_tensor_manager->applyShape(output_ind, new_shape);
+  dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
    assert(output->buffer() != nullptr);
  }
  
@@ -793,7 +832,7 @@ void DynamicShapeInferer::visit(const ir::operation::Shape &op)
    ir::Shape output_shape;
    output_shape.append(input_shape.rank());
  
-  _dynamic_tensor_manager->applyShape(output_ind, output_shape);
+  dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
    assert(output->buffer() != nullptr);
  }
  
@@ -824,7 +863,7 @@ void DynamicShapeInferer::visit(const ir::operation::Slice &op)
  
    ir::Shape new_shape = shape_inference::inferSliceShape(input_shape, begins_buf, sizes_buf);
  
-  _dynamic_tensor_manager->applyShape(output_index, new_shape);
+  dynamicTensorManagerOf(output)->applyShape(output_index, new_shape);
    assert(output->buffer() != nullptr);
  }
  
@@ -861,7 +900,7 @@ void DynamicShapeInferer::visit(const ir::operation::SpaceToBatchND &op)
    ir::Shape new_shape = shape_inference::inferSpaceToBatchNDShape(
        input_shape, block_shape_shape, padding_shape, block_shape_data, padding_data);
  
-  _dynamic_tensor_manager->applyShape(output_idx, new_shape);
+  dynamicTensorManagerOf(output)->applyShape(output_idx, new_shape);
    assert(output->buffer() != nullptr);
  }
  
@@ -890,7 +929,7 @@ void DynamicShapeInferer::visit(const ir::operation::Split &op)
      auto output_ind = op.getOutputs().at(out_tensor_idx);
      auto output = _tensor_registry->getITensor(output_ind);
  
-    _dynamic_tensor_manager->applyShape(output_ind, new_shape);
+    dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
      assert(output->buffer() != nullptr);
    }
  }
@@ -919,7 +958,7 @@ void DynamicShapeInferer::visit(const ir::operation::Squeeze &op)
    auto output_ind = op.getOutputs().at(0);
    auto output = _tensor_registry->getITensor(output_ind);
  
-  _dynamic_tensor_manager->applyShape(output_ind, new_shape);
+  dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
    assert(output->buffer() != nullptr);
  }
  
@@ -960,7 +999,7 @@ void DynamicShapeInferer::visit(const ir::operation::StridedSlice &op)
    ir::Shape output_shape =
        onert::shape_inference::inferStridedSliceShape(input_shape, op_params, rank);
  
-  _dynamic_tensor_manager->applyShape(output_index, output_shape);
+  dynamicTensorManagerOf(output)->applyShape(output_index, output_shape);
    assert(output->buffer() != nullptr);
  }
  
@@ -996,7 +1035,7 @@ void DynamicShapeInferer::visit(const ir::operation::Tile &op)
    auto output_shape = shape_inference::inferTileShape(input_shape, multiplier_buffer);
  
    // set output shape and output buffer
-  _dynamic_tensor_manager->applyShape(output_ind, output_shape);
+  dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
    assert(output->buffer() != nullptr);
  }
  
@@ -1018,7 +1057,7 @@ void DynamicShapeInferer::visit(const ir::operation::Transpose &op)
    // set output shape, based on input and params
    ir::Shape new_shape = shape_inference::inferTransposeShape(input_shape, perm);
  
-  _dynamic_tensor_manager->applyShape(output_ind, new_shape);
+  dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
    assert(output->buffer() != nullptr);
  }
  
@@ -1046,7 +1085,7 @@ void DynamicShapeInferer::visit(const ir::operation::Unpack &op)
      auto output_ind = op.getOutputs().at(out_tensor_idx);
      auto output = _tensor_registry->getITensor(output_ind);
  
-    _dynamic_tensor_manager->applyShape(output_ind, new_shape);
+    dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
  
      assert(output->buffer() != nullptr);
    }
diff --git a/runtime/onert/core/src/exec/ExecutorBase.cc b/runtime/onert/core/src/exec/ExecutorBase.cc

index a7409b9..864ccb3 100644 (file)
--- a/runtime/onert/core/src/exec/ExecutorBase.cc
+++ b/runtime/onert/core/src/exec/ExecutorBase.cc
@@ -46,7 +46,7 @@ ExecutorBase::ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph,
          {
            auto tensor_registry = tensor_builder->tensorRegistry();
            assert(tensor_registry);
-          tensor = tensor_registry->getManagedITensor(ind);
+          tensor = tensor_registry->getNativeITensor(ind);
            if (tensor != nullptr)
            {
              if (tensor_builder->supportDynamicTensor())
@@ -71,7 +71,7 @@ ExecutorBase::ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph,
          {
            auto tensor_registry = tensor_builder->tensorRegistry();
            assert(tensor_registry);
-          tensor = tensor_registry->getManagedITensor(ind);
+          tensor = tensor_registry->getNativeITensor(ind);
            if (tensor != nullptr)
            {
              if (tensor_builder->supportDynamicTensor())
diff --git a/runtime/onert/core/src/exec/ExecutorBase.h b/runtime/onert/core/src/exec/ExecutorBase.h

index b82d0e8..080c9bb 100644 (file)
--- a/runtime/onert/core/src/exec/ExecutorBase.h
+++ b/runtime/onert/core/src/exec/ExecutorBase.h
@@ -29,7 +29,6 @@
  #include "ir/LowerInfoMap.h"
  #include "backend/IConfig.h"
  #include "backend/Backend.h"
-#include "compiler/OperandContext.h"
  #include "exec/ExecTime.h"
  #include "exec/IFunction.h"
  #include "backend/IDynamicTensorManager.h"
diff --git a/runtime/onert/core/src/interp/Tensor.h b/runtime/onert/core/src/interp/Tensor.h

index 8b72d53..008a4b9 100644 (file)
--- a/runtime/onert/core/src/interp/Tensor.h
+++ b/runtime/onert/core/src/interp/Tensor.h
@@ -171,6 +171,7 @@ public:
    int32_t data_offset() const override { return _info.typeInfo().offset(); }
    const ir::OperandInfo &tensorInfo() const override { return _info; }
    uint64_t num_elements() const override { return _info.shape().num_elements(); };
+  backend::IDynamicTensorManager *dynamic_tensor_manager() override { return nullptr; }
  
  private:
    const ir::OperandInfo _info;
diff --git a/runtime/onert/core/src/interp/operations/Pad.cc b/runtime/onert/core/src/interp/operations/Pad.cc

index d2e3627..c8dce69 100644 (file)
--- a/runtime/onert/core/src/interp/operations/Pad.cc
+++ b/runtime/onert/core/src/interp/operations/Pad.cc
@@ -69,8 +69,8 @@ void invoke(const ITensor *input_tensor, const ITensor *pad_tensor, const ITenso
    const int32_t *pad_ptr = reinterpret_cast<const int32_t *>(pad_buffer);
    float *output_ptr = reinterpret_cast<float *>(output_buffer);
  
-  nnfw::cker::Pad(pad_ptr, pad_rank, cker_input_shape, input_ptr, cker_output_shape, output_ptr,
-                  nullptr);
+  nnfw::cker::Pad<float>(pad_ptr, pad_rank, cker_input_shape, input_ptr, cker_output_shape,
+                         output_ptr, nullptr);
  }
  
  void invokePad(const ExecEnv *env, const ir::Operation &node)
diff --git a/runtime/onert/core/src/ir/Graph.cc b/runtime/onert/core/src/ir/Graph.cc

index 2628630..0db9b61 100644 (file)
--- a/runtime/onert/core/src/ir/Graph.cc
+++ b/runtime/onert/core/src/ir/Graph.cc
@@ -89,7 +89,7 @@ void Graph::initializeUseDef()
      auto outputs = node.getOutputs();
      for (auto output : outputs)
      {
-      operands().at(output).insertDef(index);
+      operands().at(output).setDef(index);
      }
  
      for (auto input : node.getInputs() | ir::Remove::UNDEFINED)
diff --git a/runtime/onert/core/src/ir/LoweredGraph.cc b/runtime/onert/core/src/ir/LoweredGraph.cc

index 6e93a23..8aedfbd 100644 (file)
--- a/runtime/onert/core/src/ir/LoweredGraph.cc
+++ b/runtime/onert/core/src/ir/LoweredGraph.cc
@@ -23,6 +23,7 @@
  #include "pass/ConstantLoweringPass.h"
  #include "pass/PermutationOperationPass.h"
  #include "pass/PermutationInsertionPass.h"
+#include "pass/PermutationEliminationPass.h"
  #include "ir/GraphIterator.h"
  #include "verifier/Verifier.h"
  #include "backend/Backend.h"
@@ -122,9 +123,9 @@ LoweredGraph::LoweredGraph(const Graph &graph, const compiler::CompilerOptions &
  
      pass::PermutationInsertionPass pi_pass(*this);
      pi_pass.run();
-    // Implemented code no longer works.
-    // pass::PermutationEliminationPass pe_pass(*this);
-    // pe_pass.run();
+
+    pass::PermutationEliminationPass pe_pass(*this);
+    pe_pass.run();
  
      _op_seqs.dump("merged and sorted operations with permutation", _graph.operations());
    }
@@ -414,7 +415,8 @@ void LoweredGraph::dumpLowerInfo()
  
        const auto lower_info = getLowerInfo(index);
        const auto &shape = object.shape();
-      std::string def_ops = operation_index_to_string(object.getDef());
+      std::string def_ops =
+          object.getDef().valid() ? std::to_string(object.getDef().value()) : "N/A";
        std::string use_ops = operation_index_to_string(object.getUses());
        std::string def_layouts = factors_to_string(lower_info->def_factors());
        std::string use_layouts = factors_to_string(lower_info->use_factors());
@@ -474,7 +476,8 @@ bool LoweredGraph::mergeable(const OpSequenceIndex &op_seq_index, const Operatio
      for (const auto &input : op_seq.getInputs() | Remove::DUPLICATED | ir::Remove::UNDEFINED)
      {
        const auto &input_obj = _graph.operands().at(input);
-      for (const auto &def : input_obj.getDef())
+      auto def = input_obj.getDef();
+      if (def.valid())
        {
          branched_set.insert(def);
          if (branched_set.size() > 1)
@@ -488,6 +491,12 @@ bool LoweredGraph::mergeable(const OpSequenceIndex &op_seq_index, const Operatio
      // Check for branching down
      for (const auto &output : node.getOutputs() | Remove::DUPLICATED)
      {
+      // TODO Fix this workaround for the case of model outputs that are used by another operation
+      //      This is needed since the branching is decided by operation, but for model outputs,
+      //      there is controlflow backen(use backend) but no actual use operation exists
+      if (_graph.getOutputs().contains(output))
+        return false;
+
        const auto &output_obj = _graph.operands().at(output);
        for (const auto &use : output_obj.getUses())
        {
diff --git a/runtime/onert/core/src/ir/Operand.cc b/runtime/onert/core/src/ir/Operand.cc

index cde7fb7..e29c7a6 100644 (file)
--- a/runtime/onert/core/src/ir/Operand.cc
+++ b/runtime/onert/core/src/ir/Operand.cc
@@ -42,20 +42,9 @@ void Operand::insertUse(const OperationIndex &idx) { _uses.insert(idx); }
  
  void Operand::removeUse(const OperationIndex &idx) { _uses.remove(idx); }
  
-void Operand::insertDef(const OperationIndex &idx)
-{
-  assert(!isConstant());
-  assert(_def.size() == 0);
-
-  _def.insert(idx);
-}
+void Operand::setDef(const OperationIndex &idx) { _def = idx; }
  
-void Operand::removeDef(const OperationIndex &idx)
-{
-  assert(_def.contains(idx));
-
-  _def.remove(idx);
-}
+void Operand::unsetDef() { _def = OperationIndex{}; }
  
  } // namespace ir
  } // namespace onert
diff --git a/runtime/onert/core/src/ir/OperationDumper.cc b/runtime/onert/core/src/ir/OperationDumper.cc

index c4b61f3..e3cbce5 100644 (file)
--- a/runtime/onert/core/src/ir/OperationDumper.cc
+++ b/runtime/onert/core/src/ir/OperationDumper.cc
@@ -613,6 +613,15 @@ void OperationDumper::visit(const SquaredDifference &node)
    VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
  }
  
+void OperationDumper::visit(const StatelessRandomUniform &node)
+{
+  VERBOSE(LIR) << "* StatelessRandomUniform" << std::endl;
+  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(StatelessRandomUniform::Input::SHAPE)
+               << ", " << node.getInputs().at(StatelessRandomUniform::Input::SEED) << ")"
+               << std::endl;
+  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+}
+
  void OperationDumper::visit(const Squeeze &node)
  {
    VERBOSE(LIR) << "* Squeeze" << std::endl;
diff --git a/runtime/onert/core/src/ir/OperationDumper.h b/runtime/onert/core/src/ir/OperationDumper.h

index 27cfffc..d83f149 100644 (file)
--- a/runtime/onert/core/src/ir/OperationDumper.h
+++ b/runtime/onert/core/src/ir/OperationDumper.h
@@ -96,6 +96,7 @@ public:
    void visit(const operation::Squeeze &) override;
    void visit(const operation::Slice &) override;
    void visit(const operation::StridedSlice &) override;
+  void visit(const operation::StatelessRandomUniform &) override;
    void visit(const operation::Sub &) override;
    void visit(const operation::Tanh &) override;
    void visit(const operation::Tile &) override;
diff --git a/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc b/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc

index 0b3955c..9ef2b12 100644 (file)
--- a/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc
+++ b/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc
@@ -31,7 +31,7 @@ void BatchToSpaceND::accept(OperationVisitor &v) const { v.visit(*this); }
  
  BatchToSpaceND::BatchToSpaceND(const OperandIndexSequence &inputs,
                                 const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+    : Operation{OperandConstraint::createExact(3u), inputs, outputs}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/Quantize.cc b/runtime/onert/core/src/ir/operation/Quantize.cc

new file mode 100644 (file)

index 0000000..0e3d5b6
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Quantize.cc
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Quantize.h"
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Quantize::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Quantize::Quantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/SplitV.cc b/runtime/onert/core/src/ir/operation/SplitV.cc

new file mode 100644 (file)

index 0000000..e638c9a
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/SplitV.cc
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "ir/operation/SplitV.h"
+#include <cassert>
+#include "ir/OperationVisitor.h"
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+void SplitV::accept(OperationVisitor &v) const { v.visit(*this); }
+SplitV::SplitV(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+               const Param &param)
+    : Operation{OperandConstraint::createExact(3u), inputs, outputs}, _param{param}
+{
+}
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/compiler/OperandContext.cc b/runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc

similarity index 52%

rename from runtime/onert/core/src/compiler/OperandContext.cc

rename to runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc

index cce555e..cbb0ff2 100644 (file)
--- a/runtime/onert/core/src/compiler/OperandContext.cc
+++ b/runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -14,32 +14,26 @@
   * limitations under the License.
   */
  
-#include "OperandContext.h"
+#include "ir/operation/StatelessRandomUniform.h"
  
  #include <cassert>
  
+#include "ir/OperationVisitor.h"
+
  namespace onert
  {
-namespace compiler
+namespace ir
  {
-
-OperandContext &OperandContext::set(const ir::OperandIndex &id,
-                                    const std::shared_ptr<backend::ITensor> &tensor)
+namespace operation
  {
-  // Only one tensor for an id
-  assert(_tensors.find(id) == _tensors.end());
-  _tensors[id] = tensor;
-  return (*this);
-}
+void StatelessRandomUniform::accept(OperationVisitor &v) const { v.visit(*this); }
  
-void OperandContext::iterate(
-    const std::function<void(const ir::OperandIndex &, backend::ITensor &)> &fn)
+StatelessRandomUniform::StatelessRandomUniform(const OperandIndexSequence &inputs,
+                                               const OperandIndexSequence &outputs)
+    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
  {
-  for (auto &e : _tensors)
-  {
-    fn(e.first, *e.second);
-  }
  }
  
-} // namespace compiler
+} // namespace operation
+} // namespace ir
  } // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/ConstantInsertionPass.cc b/runtime/onert/core/src/ir/pass/ConstantInsertionPass.cc

index 29275f1..1742a0d 100644 (file)
--- a/runtime/onert/core/src/ir/pass/ConstantInsertionPass.cc
+++ b/runtime/onert/core/src/ir/pass/ConstantInsertionPass.cc
@@ -45,8 +45,8 @@ void ConstantInsertionPass::callback(const OperationIndex &node_index, Operation
        if (_replace_operands_map.count(key) == 0)
        {
          auto new_object = object;
+        new_object.unsetDef();
          // TODO Remove const_case
-        const_cast<OperationIndexSet &>(new_object.getDef()).clear();
          const_cast<OperationIndexSet &>(new_object.getUses()).clear();
          const auto new_index = _graph.operands().emplace(new_object);
          _replace_operands_map[key] = new_index;
@@ -71,7 +71,7 @@ void ConstantInsertionPass::callback(const OperationIndex &node_index, Operation
  
        // Remove this node from uses of origin operand
        // Constant operand has no def.
-      assert(object.getDef().size() == 0);
+      assert(!object.getDef().valid());
        object.removeUse(node_index);
  
        // Remove origin operand
diff --git a/runtime/onert/core/src/ir/pass/PermutationEliminationPass.cc b/runtime/onert/core/src/ir/pass/PermutationEliminationPass.cc

index 9e0291e..2deccd4 100644 (file)
--- a/runtime/onert/core/src/ir/pass/PermutationEliminationPass.cc
+++ b/runtime/onert/core/src/ir/pass/PermutationEliminationPass.cc
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -15,11 +15,8 @@
   */
  
  #include "PermutationEliminationPass.h"
+#include "backend/controlflow/Config.h"
  
-#include "ir/Operand.h"
-#include "ir/operand/LowerInfo.h"
-#include "ir/Graph.h"
-#include "backend/IConfig.h"
  #include "util/logging.h"
  
  namespace onert
@@ -28,166 +25,129 @@ namespace ir
  {
  namespace pass
  {
-void PermutationEliminationPass::callback(const OperandIndex &inp_index, Operand &object)
-{
-  if (_graph.getInputs().contains(inp_index))
-  {
-    eliminateInput(inp_index, object);
-  }
-  else if (_graph.getOutputs().contains(inp_index))
-  {
-    eliminateOutput(inp_index, object);
-  }
-}
  
-void PermutationEliminationPass::eliminateInput(const OperandIndex &inp_index, Operand &object)
+void PermutationEliminationPass::callback(const OperationIndex &ind, Operation &node)
  {
-  auto &model_inputs = _graph.getInputs();
-
-  // get uses of the model's given input
-  auto uses = object.getUses();
+  _op_ind = ind;
+  node.accept(*this);
+};
  
-  // input must be used just by permutation
-  if (uses.size() != 1)
-  {
-    return;
-  }
+void PermutationEliminationPass::visit(const operation::Permute &node)
+{
+  auto in_operand = node.getInputs().at(0);
+  auto out_operand = node.getOutputs().at(0);
  
-  for (auto input_use : uses)
+  // Check if two tensors are both portable
+  // TODO Make this general, this is just a workaround to check two tensors are portable
    {
-    auto &perm_operation = _graph.operations().at(input_use);
-    auto perm_inputs = perm_operation.getInputs();
+    auto in_def_factor = _lowered_graph.getLowerInfo(in_operand)->def_factors().getOnlyElement();
+    auto out_def_factor = _lowered_graph.getLowerInfo(out_operand)->def_factors().getOnlyElement();
  
-    auto perm_outputs = perm_operation.getOutputs();
+    auto in_backend_id = in_def_factor.backend()->config()->id();
+    auto out_backend_id = out_def_factor.backend()->config()->id();
  
-    if (!isPermuteLayerToEliminate(perm_inputs, perm_outputs, true))
-    {
+    // TODO Fix this workaround that removes only Permute between cpu and controlflow backend.
+    //      This should be general.
+    if (!((in_backend_id == backend::controlflow::Config::ID && out_backend_id == "cpu") ||
+          (in_backend_id == "cpu" && out_backend_id == backend::controlflow::Config::ID)))
        return;
-    }
-
-    assert(perm_inputs.at(0) == inp_index);
-
-    VERBOSE(PermutationEliminationPass::EliminateInput) << "remove NHWC_TO_NCHW permutation\n";
-
-    // set model's new input, which was output of permutation
-    model_inputs.replace(inp_index, perm_outputs.at(0));
-
-    // remove model's input, which is also input of permutation
-    _graph.removeOperand(inp_index);
-
-    // remove permutation operation
-    assert(_lowered_graph.op_seqs().containsOperation(input_use));
-    auto op_seq_idx = _lowered_graph.op_seqs().getOperation(input_use);
-    _lowered_graph.op_seqs().remove(op_seq_idx);
-    _graph.operations().remove(input_use);
-
-    VERBOSE(PermutationEliminationPass::EliminateInput)
-        << inp_index.value() << " is model's input and is removed. New input is "
-        << perm_outputs.at(0).value() << "\n"
-        << input_use.value() << " is removed permutation operation\n";
-  }
-}
-
-void PermutationEliminationPass::eliminateOutput(const OperandIndex &out_index, Operand &object)
-{
-  auto &model_outputs = _graph.getOutputs();
-
-  // get defs of the model's given output
-  auto defs = object.getDef();
-
-  // output must use just permutation
-  if (defs.size() != 1)
-  {
-    return;
    }
  
-  for (auto output_def : defs)
+  if (_graph.getOutputs().contains(out_operand))
    {
-    auto &perm_operation = _graph.operations().at(output_def);
-    auto perm_outputs = perm_operation.getOutputs();
-
-    auto perm_inputs = perm_operation.getInputs();
-    if (!isPermuteLayerToEliminate(perm_inputs, perm_outputs, false))
+    // Exceptional case : When the output operand is a model output
+    // In this case we keep the output and remove the input
+
+    auto &out_operand_obj = _graph.operands().at(out_operand);
+    assert(out_operand_obj.getDef() == _op_ind);
+    out_operand_obj.unsetDef();
+    _lowered_graph.op_seqs().iterate([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
+      if (!op_seq.getOutputs().contains(in_operand))
+        return;
+
+      // Update OpSequence/Operation edges and Operand edges
+      op_seq.replaceOutputs(in_operand, out_operand);
+      for (auto op : op_seq.operations())
+      {
+        auto &operation_obj = _graph.operations().at(op);
+        if (operation_obj.getOutputs().contains(in_operand))
+        {
+          operation_obj.replaceOutputs(in_operand, out_operand);
+          out_operand_obj.setDef(op);
+        }
+      }
+    });
+
+    // Remove Permute operation, enclosing OpSequence and the operand
      {
-      return;
-    }
-
-    assert(perm_outputs.at(0) == out_index);
+      _graph.removeOperand(in_operand);
  
-    VERBOSE(PermutationEliminationPass::EliminateOutput) << "remove NCHW_TO_NHWC permutation\n";
-
-    // Update operations' output that is used by permute operand
-    for (auto perm_input_index : perm_inputs)
-    {
-      auto &perm_input_operand = _graph.operands().at(perm_input_index);
-      perm_input_operand.removeUse(output_def);
+      auto op_seq_ind = _lowered_graph.op_seqs().getOperation(_op_ind);
+      // Assumes enclosing OpSequence contatins just this Permute operation
+      assert(_lowered_graph.op_seqs().at(op_seq_ind).size() == 1);
+      _lowered_graph.op_seqs().remove(op_seq_ind);
+      _graph.operations().remove(_op_ind);
      }
  
-    // set model's new output, which was input of permutation
-    model_outputs.replace(out_index, perm_inputs.at(0));
-
-    // remove model's output, which is also output of permutation
-    _graph.removeOperand(out_index);
-
-    // remove permutation operation
-    assert(_lowered_graph.op_seqs().containsOperation(output_def));
-    auto op_seq_idx = _lowered_graph.op_seqs().getOperation(output_def);
-    _lowered_graph.op_seqs().remove(op_seq_idx);
-    _graph.operations().remove(output_def);
-
-    VERBOSE(PermutationEliminationPass::EliminateOutput)
-        << out_index.value() << " is model's output and is removed. New output is "
-        << perm_inputs.at(0).value() << "\n"
-        << output_def.value() << " is removed permutation operation\n";
+    _lowered_graph.op_seqs().iterate([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
+      if (!op_seq.getInputs().contains(in_operand))
+        return;
+
+      op_seq.replaceInputs(in_operand, out_operand);
+      for (auto op : op_seq.operations())
+      {
+        auto &operation_obj = _graph.operations().at(op);
+        if (operation_obj.getInputs().contains(in_operand))
+        {
+          operation_obj.replaceInputs(in_operand, out_operand);
+          out_operand_obj.insertUse(op);
+        }
+      }
+    });
+
+    VERBOSE(removePermute) << "Permute Op removed, node index : " << _op_ind << std::endl;
+    VERBOSE(removePermute) << "  - Input (removed) Operand : " << in_operand << std::endl;
+    VERBOSE(removePermute) << "  - Output(kept)    Operand : " << out_operand << std::endl;
    }
-}
-
-bool PermutationEliminationPass::isPermuteLayerToEliminate(const OperandIndexSequence &inp_indexes,
-                                                           const OperandIndexSequence &out_indexes,
-                                                           bool is_for_model_input)
-{
-  auto input_def_factors = _lowered_graph.getLowerInfo(inp_indexes.at(0))->def_factors();
-  auto output_def_factors = _lowered_graph.getLowerInfo(out_indexes.at(0))->def_factors();
-
-  auto input_layout = input_def_factors.getOnlyElement().layout();
-  auto output_layout = output_def_factors.getOnlyElement().layout();
-
-  if (input_def_factors.size() != 1 || output_def_factors.size() != 1)
-  {
-    return false;
-  }
-
-  // all operands' factor must be the same
-  for (auto index : inp_indexes)
-  {
-    auto op_factor_set = _lowered_graph.getLowerInfo(index)->def_factors();
-    if (op_factor_set.size() != 1 ||
-        input_layout != _lowered_graph.getLowerInfo(index)->def_factors().getOnlyElement().layout())
-    {
-      return false;
-    }
-  }
-  // all operands' factor must be the same
-  for (auto index : out_indexes)
+  else
    {
-    auto op_factor_set = _lowered_graph.getLowerInfo(index)->def_factors();
-    if (op_factor_set.size() != 1 ||
-        output_layout !=
-            _lowered_graph.getLowerInfo(index)->def_factors().getOnlyElement().layout())
+    // Otherwise keep the input and remove the output
+
+    auto &in_operand_obj = _graph.operands().at(in_operand);
+    in_operand_obj.removeUse(_op_ind);
+
+    // Make OpSequences(that use the output) use the input
+    _lowered_graph.op_seqs().iterate([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
+      if (!op_seq.getInputs().contains(out_operand))
+        return;
+
+      op_seq.replaceInputs(out_operand, in_operand);
+      for (auto op : op_seq.operations())
+      {
+        auto &operation_obj = _graph.operations().at(op);
+        if (operation_obj.getInputs().contains(out_operand))
+        {
+          operation_obj.replaceInputs(out_operand, in_operand);
+          in_operand_obj.insertUse(op);
+        }
+      }
+    });
+
+    // Remove Permute operation, enclosing OpSequence and the operand
      {
-      return false;
+      _graph.removeOperand(out_operand);
+
+      auto op_seq_ind = _lowered_graph.op_seqs().getOperation(_op_ind);
+      // Assumes enclosing OpSequence contatins just this Permute operation
+      assert(_lowered_graph.op_seqs().at(op_seq_ind).size() == 1);
+      _lowered_graph.op_seqs().remove(op_seq_ind);
+      _graph.operations().remove(_op_ind);
      }
-  }
  
-  if (is_for_model_input)
-  {
-    // check if this is NHWC_TO_NCHW permutation: must have single input, which is model's input
-    return (inp_indexes.size() == 1 && input_layout == Layout::NHWC &&
-            output_layout == Layout::NCHW);
+    VERBOSE(removePermute) << "Permute Op removed, node index : " << _op_ind << std::endl;
+    VERBOSE(removePermute) << "  - Input (kept)    Operand : " << in_operand << std::endl;
+    VERBOSE(removePermute) << "  - Output(removed) Operand : " << out_operand << std::endl;
    }
-
-  // check if this is NCHW_TO_NHWC permutation: must have single output, which is model's output
-  return (out_indexes.size() == 1 && input_layout == Layout::NCHW && output_layout == Layout::NHWC);
  }
  
  } // namespace pass
diff --git a/runtime/onert/core/src/ir/pass/PermutationEliminationPass.h b/runtime/onert/core/src/ir/pass/PermutationEliminationPass.h

index 1c84300..614e44c 100644 (file)
--- a/runtime/onert/core/src/ir/pass/PermutationEliminationPass.h
+++ b/runtime/onert/core/src/ir/pass/PermutationEliminationPass.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -17,9 +17,8 @@
  #ifndef __ONERT_GRAPH_PASS_PERMUTATION_ELIMINATION_PASS_H__
  #define __ONERT_GRAPH_PASS_PERMUTATION_ELIMINATION_PASS_H__
  
-#include "LoweredOperandPass.h"
-#include "ir/Operand.h"
-#include "ir/OperandIndexSequence.h"
+#include "ir/OperationVisitor.h"
+#include "LoweredOperationPass.h"
  
  namespace onert
  {
@@ -28,55 +27,35 @@ namespace ir
  namespace pass
  {
  
-class PermutationEliminationPass : public LoweredOperandPass
+/**
+ * @brief An optimization pass that removes Permute operations if possible
+ *
+ * There may be some Permute operations that are inserted by PermutationInsertionPass or other
+ * passes. This pass checks all Permute operations and eliminates them if Permute in/out tensors
+ * are compatible and layouts match.
+ *
+ * Permute input tensor is kept and the output is removed for all the cases, except model outputs.
+ * As all output tensors have to be controlflow backend, so the output is kept.
+ *
+ * @note This is an optimization pass which means that everything should work fine even if this pass
+ *       was skipped.
+ */
+class PermutationEliminationPass : public LoweredOperationPass, public OperationVisitor
  {
  public:
-  using LoweredOperandPass::LoweredOperandPass;
+  using LoweredOperationPass::LoweredOperationPass;
  
  public:
-  std::string id() override { return "PermutationEliminationPass"; }
+  std::string id() final { return "PermutationEliminationPass"; }
  
-  void callback(const OperandIndex &index, Operand &object) override;
+public:
+  void callback(const OperationIndex &i, Operation &n) final;
  
  private:
-  /**
-   * @brief Remove Permute operation that permutates input
-   *
-   * Note: This function aslo removes model's input and
-   * sets output of permutation as model's new input
-   *
-   * @param inp_index is the target operand index for the elimination
-   * @param object is the target operand object for the elimination
-   *
-   * @return
-   */
-  void eliminateInput(const OperandIndex &inp_index, Operand &object);
-
-  /**
-   * @brief Remove Permute operation that permutates output of a model
-   *
-   * Note: This function aslo removes model's output and
-   * sets input of permutation as model's new output
-   *
-   * @param out_index is the target operand index for the elimination
-   * @param object is the target operand object for the elimination
-   *
-   * @return
-   */
-  void eliminateOutput(const OperandIndex &out_index, Operand &object);
+  void visit(const operation::Permute &) final;
  
-  /**
-   * @brief Determine if passed operands are permute layer's input and output, that must be
-   * eliminated
-   *
-   * @param inp_index indexes of the input operand to operation
-   * @param out_index indexes of the output operand to operation
-   * @param is_for_model_input checking for model's input or output
-   *
-   * @return if it is permutation layer
-   */
-  bool isPermuteLayerToEliminate(const OperandIndexSequence &inp_indexes,
-                                 const OperandIndexSequence &out_indexes, bool is_for_model_input);
+private:
+  ir::OperationIndex _op_ind;
  };
  
  } // namespace pass
diff --git a/runtime/onert/core/src/ir/pass/PermutationInsertionPass.cc b/runtime/onert/core/src/ir/pass/PermutationInsertionPass.cc

index 7c3da52..3578af8 100644 (file)
--- a/runtime/onert/core/src/ir/pass/PermutationInsertionPass.cc
+++ b/runtime/onert/core/src/ir/pass/PermutationInsertionPass.cc
@@ -60,36 +60,8 @@ void PermutationInsertionPass::callback(const OperandIndex &index, Operand &obje
      }
  
      auto insert_set = operand_li->use_factors() - operand_li->def_factors();
-    auto def_factor = operand_li->def_factors().getOnlyElement();
-
-    auto compatible_backends = [](auto /* backend1 */, auto /* backend2 */) {
-      // TODO If other issues for Permute elimination are resolved, enable this
-      return false;
-      /*
-      // TODO This is a workaround for not inserting Permute between cpu and controlflow.
-      //      To be general, we need another way of checking they are compatible.
-      const auto cf = backend::controlflow::Config::ID;
-      const auto cpu = "cpu";
-      const auto id1 = backend1->config()->id();
-      const auto id2 = backend2->config()->id();
-      return (id1 == cpu && id2 == cf) // Allows no-Permute for Model inputs
-          || (id1 == cf && id2 == cpu); // Allows no-Permute for Model outputs
-          */
-    };
-
      for (auto factor : insert_set)
      {
-      if (factor.layout() == def_factor.layout() &&
-          compatible_backends(factor.backend(), def_factor.backend()))
-      {
-        // For this factor we can just reuse existing operand - Permute is not added.
-        VERBOSE(PermutationInsertionPass) << "Permutation Insertion is skipped for operand "
-                                          << index << " / as the tensor is compatible with backend "
-                                          << factor.backend()->config()->id() << std::endl;
-        factor_to_index.emplace(factor, index);
-        continue;
-      }
-
        const auto permute_operation_index = insertPermute(index, factor);
        permute_indexes.push_back(permute_operation_index);
        const auto &permute_operation = _graph.operations().at(permute_operation_index);
@@ -235,7 +207,7 @@ OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &opera
    // Update Use/Def info
    {
      _graph.operands().at(operand_index).insertUse(node_index);
-    _graph.operands().at(out_operand_index).insertDef(node_index);
+    _graph.operands().at(out_operand_index).setDef(node_index);
    }
    return node_index;
  }
diff --git a/runtime/onert/core/src/ir/pass/PermutationInsertionPass.h b/runtime/onert/core/src/ir/pass/PermutationInsertionPass.h

index 314a54c..6c30c6f 100644 (file)
--- a/runtime/onert/core/src/ir/pass/PermutationInsertionPass.h
+++ b/runtime/onert/core/src/ir/pass/PermutationInsertionPass.h
@@ -38,6 +38,7 @@ public:
    std::string id() override { return "PermutationInsertionPass"; }
    void callback(const OperandIndex &index, Operand &object) override;
  
+private:
    /**
     * @brief Insert Permute operation that has given operand as input
     *
@@ -48,8 +49,6 @@ public:
     */
    OperationIndex insertPermute(const OperandIndex &operand_index,
                                 const operand::PermuteFactor &factor);
-
-private:
  };
  
  } // namespace pass
diff --git a/runtime/onert/core/src/ir/pass/PermutationOperationPass.cc b/runtime/onert/core/src/ir/pass/PermutationOperationPass.cc

index 1d77b48..6eb412c 100644 (file)
--- a/runtime/onert/core/src/ir/pass/PermutationOperationPass.cc
+++ b/runtime/onert/core/src/ir/pass/PermutationOperationPass.cc
@@ -39,8 +39,8 @@ void PermutationOperationPass::applyExpandRanks(const Operation &node)
    const auto &output_ind = node.getOutputs().at(0);
    const auto &output = _graph.operands().at(output_ind);
  
-  assert(output.getDef().size() == 1);
-  const auto &node_index = *output.getDef().begin();
+  assert(output.getDef().valid());
+  const auto node_index = output.getDef();
    const auto &op_seq_index = _lowered_graph.op_seqs().getOperation(node_index);
    const auto frontend_layout = _lowered_graph.op_seqs().at(op_seq_index).getLayout();
    const auto backend_layout = _lowered_graph.getLowerInfo(op_seq_index)->layout();
@@ -80,8 +80,8 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node)
    const auto &output_ind = node.getOutputs().at(0);
    const auto &output_obj = _graph.operands().at(output_ind);
  
-  assert(output_obj.getDef().size() == 1);
-  const auto &node_index = *output_obj.getDef().begin();
+  assert(output_obj.getDef().valid());
+  const auto node_index = output_obj.getDef();
    const auto &op_seq_index = _lowered_graph.op_seqs().getOperation(node_index);
  
    const auto frontend_layout = _lowered_graph.op_seqs().at(op_seq_index).getLayout();
@@ -200,7 +200,7 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node)
        lower_info->addUsePermuteFactor(new_factor);
  
        // Whether if node's input is an input of model or a constant
-      if (_graph.operands().at(input).getDef().size() == 0 &&
+      if (!_graph.operands().at(input).getDef().valid() &&
            (lower_info->def_factors().size() == 1 &&
             lower_info->def_factors().getOnlyElement() == removed_factor))
        {
diff --git a/runtime/onert/core/src/ir/verifier/Verifier.cc b/runtime/onert/core/src/ir/verifier/Verifier.cc

index 9b83887..09cbdcf 100644 (file)
--- a/runtime/onert/core/src/ir/verifier/Verifier.cc
+++ b/runtime/onert/core/src/ir/verifier/Verifier.cc
@@ -32,7 +32,7 @@ namespace verifier
  // DAGChecker
  //
  
-bool DAGChecker::verify(const Graph &graph) const
+bool DAGChecker::verify(const Graph &graph) const noexcept
  {
    auto &operations = graph.operations();
    bool cyclic = false;
@@ -72,23 +72,59 @@ bool DAGChecker::verify(const Graph &graph) const
  // EdgeConsistencyVerifier
  //
  
-bool EdgeConsistencyChecker::verify(const Graph &graph) const
+bool EdgeConsistencyChecker::verify(const Graph &graph) const noexcept
  {
    auto &operations = graph.operations();
-  uint32_t mismatches = 0;
+  uint32_t errors = 0;
    operations.iterate([&](const OperationIndex &index, const Operation &node) {
      for (auto operand_index : node.getInputs() | ir::Remove::UNDEFINED)
      {
-      auto &operand = graph.operands().at(operand_index);
-      mismatches += (operand.getUses().contains(index) ? 0 : 1);
+      try
+      {
+        auto &operand = graph.operands().at(operand_index);
+        bool operand_has_use = operand.getUses().contains(index);
+        if (!operand_has_use)
+        {
+          VERBOSE(EdgeConsistencyChecker) << "[ERROR] EDGE MISMATCH : Missing USE edge - Operand "
+                                          << operand_index << " to Operation " << index
+                                          << std::endl;
+          errors += 1;
+        }
+      }
+      catch (const std::out_of_range &e)
+      {
+        VERBOSE(EdgeConsistencyChecker)
+            << "[ERROR] OPEARAND NOT FOUND : Operation " << index << " has Operand "
+            << operand_index << ", but the operand object is not present in the graph" << std::endl;
+        errors += 1;
+      }
      }
      for (auto operand_index : node.getOutputs())
      {
-      auto &operand = graph.operands().at(operand_index);
-      mismatches += (operand.getDef().contains(index) ? 0 : 1);
+      try
+      {
+        auto &operand = graph.operands().at(operand_index);
+        if (operand.getDef() != index)
+        {
+          VERBOSE(EdgeConsistencyChecker) << "[ERROR] EDGE MISMATCH : Missing DEF edge - Operand"
+                                          << operand_index << " to Operation " << index
+                                          << std::endl;
+          errors += 1;
+        }
+      }
+      catch (const std::out_of_range &e)
+      {
+        VERBOSE(EdgeConsistencyChecker)
+            << "[ERROR] OPEARAND NOT FOUND : Operation " << index << " has Operand "
+            << operand_index << ", but the operand object is not present in the graph" << std::endl;
+        errors += 1;
+      }
      }
    });
-  return mismatches == 0;
+
+  VERBOSE(EdgeConsistencyChecker) << "Total Number of errors : " << errors << std::endl;
+
+  return errors == 0;
  }
  
  } // namespace verifier
diff --git a/runtime/onert/core/src/ir/verifier/Verifier.h b/runtime/onert/core/src/ir/verifier/Verifier.h

index 0bc22bc..0c7b57b 100644 (file)
--- a/runtime/onert/core/src/ir/verifier/Verifier.h
+++ b/runtime/onert/core/src/ir/verifier/Verifier.h
@@ -35,7 +35,7 @@ namespace verifier
  struct IVerifier
  {
    virtual ~IVerifier() = default;
-  virtual bool verify(const Graph &graph) const = 0;
+  virtual bool verify(const Graph &graph) const noexcept = 0;
  };
  
  } // namespace verifier
@@ -52,13 +52,13 @@ namespace verifier
  class DAGChecker : public IVerifier
  {
  public:
-  bool verify(const Graph &graph) const override;
+  bool verify(const Graph &graph) const noexcept override;
  };
  
  class EdgeConsistencyChecker : public IVerifier
  {
  public:
-  bool verify(const Graph &graph) const override;
+  bool verify(const Graph &graph) const noexcept override;
  };
  
  } // namespace verifier
diff --git a/runtime/onert/core/src/util/EventCollector.cc b/runtime/onert/core/src/util/EventCollector.cc

index 9ecc7e3..de37276 100644 (file)
--- a/runtime/onert/core/src/util/EventCollector.cc
+++ b/runtime/onert/core/src/util/EventCollector.cc
@@ -54,7 +54,8 @@ private:
    std::string _ts;
  };
  
-void emit_rusage(EventRecorder *rec, const std::string &ts)
+#ifdef DEBUG
+inline void emit_rusage(EventRecorder *rec, const std::string &ts)
  {
    struct rusage ru;
  
@@ -81,6 +82,7 @@ void emit_rusage(EventRecorder *rec, const std::string &ts)
      rec->emit(evt);
    }
  }
+#endif
  
  } // namespace
  
@@ -99,6 +101,9 @@ void EventCollector::onEvent(const Event &event)
        break;
    }
  
-  // Trace resource usage per each event notification
+// TODO: Add resurece measurement(e.g. RSS)
+// when ready with low overhead in release build
+#ifdef DEBUG
    emit_rusage(_rec, ts);
+#endif
  }
diff --git a/runtime/onert/core/src/util/EventRecorder.h b/runtime/onert/core/src/util/EventRecorder.h

index 6ba0bc0..6eea069 100644 (file)
--- a/runtime/onert/core/src/util/EventRecorder.h
+++ b/runtime/onert/core/src/util/EventRecorder.h
@@ -74,7 +74,8 @@ private:
  
  private:
    std::mutex _mu;
-  WriteFormat _write_format{WriteFormat::CHROME_TRACING};
+  // TODO: Allow user to control write_format
+  WriteFormat _write_format{WriteFormat::SNPE_BENCHMARK};
    std::vector<DurationEvent> _duration_events;
    std::vector<CounterEvent> _counter_events;
  };
diff --git a/runtime/onert/core/src/util/ShapeInference.cc b/runtime/onert/core/src/util/ShapeInference.cc

index ac795bb..9a24f8c 100644 (file)
--- a/runtime/onert/core/src/util/ShapeInference.cc
+++ b/runtime/onert/core/src/util/ShapeInference.cc
@@ -486,6 +486,20 @@ ir::Shape inferPadShape(const ir::Shape &in_shape, const int32_t *pad_buf, const
    return ret;
  }
  
+ir::Shape inferResizeBilinearShape(const ir::Shape &in_shape, const int32_t output_height,
+                                   const int32_t output_width)
+{
+  assert(in_shape.rank() == 4);
+  ir::Shape ret(in_shape.rank());
+
+  ret.dim(0) = in_shape.dim(0);
+  ret.dim(1) = output_height;
+  ret.dim(2) = output_width;
+  ret.dim(3) = in_shape.dim(3);
+
+  return ret;
+}
+
  template <typename T> ir::Shape inferRangeShape(T start_val, T limit_val, T delta_val)
  {
    ir::Shape out_shape(static_cast<int>(1));
diff --git a/runtime/onert/frontend/base_loader/include/base_loader.h b/runtime/onert/frontend/base_loader/include/base_loader.h

index f5687ad..0f6a2a5 100644 (file)
--- a/runtime/onert/frontend/base_loader/include/base_loader.h
+++ b/runtime/onert/frontend/base_loader/include/base_loader.h
@@ -53,6 +53,8 @@ protected:
    using SubGraph = typename LoaderDomain::SubGraph;
    using Tensor = typename LoaderDomain::Tensor;
    using TensorType = typename LoaderDomain::TensorType;
+  using DimensionType = typename LoaderDomain::DimensionType;
+  using SparseIndexVector = typename LoaderDomain::SparseIndexVector;
  
  protected:
    bool isOptionalInputTensor(std::int32_t idx) { return idx == -1; }
@@ -75,6 +77,13 @@ public:
     * @param file_path
     */
    void loadFromFile(const char *file_path);
+  /**
+   * @brief Load a model from a buffer
+   *
+   * @param buffer buffer pointer
+   * @param size buffer size
+   */
+  void loadFromBuffer(uint8_t *buffer, size_t size);
  
  protected:
    ~BaseLoader() = default;
@@ -107,7 +116,6 @@ protected:
    void loadSoftmax(const Operator *op, ir::Graph &subg);
    void loadMaxPool2D(const Operator *op, ir::Graph &subg);
    void loadConcatenation(const Operator *op, ir::Graph &subg);
-  void loadInstanceNorm(const Operator *op, ir::Graph &subg);
    void loadFill(const Operator *op, ir::Graph &subg);
    void loadFC(const Operator *op, ir::Graph &subg);
    void loadAdd(const Operator *op, ir::Graph &subg);
@@ -140,6 +148,7 @@ protected:
    void loadSqueeze(const Operator *op, ir::Graph &subg);
    void loadPrelu(const Operator *op, ir::Graph &subg);
    void loadSplit(const Operator *op, ir::Graph &subg);
+  void loadSplitV(const Operator *op, ir::Graph &subg);
    void loadSlice(const Operator *op, ir::Graph &subg);
    void loadStridedSlice(const Operator *op, ir::Graph &subg);
    void loadUnpack(const Operator *op, ir::Graph &subg);
@@ -165,12 +174,13 @@ protected:
    void loadTile(const Operator *op, ir::Graph &subg);
    void loadLogicalOr(const Operator *op, ir::Graph &subg);
    void loadRange(const Operator *op, ir::Graph &subg);
-  void loadBCQFullyConnected(const Operator *op, ir::Graph &subg);
-  void loadBCQGather(const Operator *op, ir::Graph &subg);
    void loadMatrixBandPart(const Operator *op, ir::Graph &subg);
    void loadBroadcastTo(const Operator *op, ir::Graph &subg);
    void loadFusedBatchNorm(const Operator *op, ir::Graph &subg);
    void loadLogSoftmax(const Operator *op, ir::Graph &subg);
+  void loadQuantize(const Operator *op, ir::Graph &subg);
+  void loadSpaceToDepth(const Operator *op, ir::Graph &subg);
+  void loadStatelessRandomUniform(const Operator *op, ir::Graph &subg);
  
  protected:
    // Base address for mapped region for loading (if needed)
@@ -216,12 +226,20 @@ void BaseLoader<LoaderDomain, SpecificLoader>::BaseLoader::loadFromFile(const ch
    _verifier = std::make_unique<Verifier>(reinterpret_cast<const std::uint8_t *>(_base), size);
  
    loadModel();
-  munmap(_base, size);
  
    close(_fd);
  }
  
  template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::BaseLoader::loadFromBuffer(uint8_t *buffer,
+                                                                          size_t size)
+{
+  _base = buffer;
+  _verifier = std::make_unique<Verifier>(reinterpret_cast<const std::uint8_t *>(_base), size);
+  loadModel();
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
  ir::Activation BaseLoader<LoaderDomain, SpecificLoader>::BaseLoader::convertActivation(
      const ActivationFunctionType type)
  {
@@ -299,6 +317,23 @@ void BaseLoader<LoaderDomain, SpecificLoader>::BaseLoader::deallocateMmappedArea
    }
  }
  
+/* Copied from tensorflow lite. Need to append copyright */
+template <typename T> bool Copy(const T *data_ptr, std::vector<uint16_t> &arr)
+{
+  if (data_ptr->values() == nullptr)
+  {
+    return false;
+  }
+
+  int size = data_ptr->values()->size();
+  arr.reserve(size);
+  for (int i = 0; i < size; i++)
+  {
+    arr.emplace_back(static_cast<uint16_t>(data_ptr->values()->Get(i)));
+  }
+  return true;
+}
+
  template <typename LoaderDomain, typename SpecificLoader>
  ir::OperandIndex BaseLoader<LoaderDomain, SpecificLoader>::loadOperand(const Tensor *tensor,
                                                                         ir::Graph &subg)
@@ -355,6 +390,61 @@ ir::OperandIndex BaseLoader<LoaderDomain, SpecificLoader>::loadOperand(const Ten
    }
    // Create TypeInfo
    ir::TypeInfo type_info(data_type, scale, zero_point);
+  // Sparsity
+  auto src_sparsity = tensor->sparsity();
+  if (src_sparsity != nullptr)
+  {
+    std::vector<uint16_t> w1_segments;
+    std::vector<uint16_t> w1_indices;
+    // ignore traversal_order, block_map
+    // load metadata
+    const size_t dim_metadata_size = src_sparsity->dim_metadata()->size();
+    if (dim_metadata_size != 2)
+      throw std::runtime_error("sparse tensor is supported only for 2D");
+    const auto *src_metadata = src_sparsity->dim_metadata()->Get(0);
+    if (src_metadata->format() != DimensionType::DimensionType_DENSE)
+      throw std::runtime_error("sparse tensor dim[0] is not DENSE");
+    src_metadata = src_sparsity->dim_metadata()->Get(1);
+    if (src_metadata->format() != DimensionType::DimensionType_SPARSE_CSR)
+      throw std::runtime_error("sparse tensor dim[0] is not SPARSE_CSR");
+
+    auto ParseSparseIndexVector = [src_metadata, &w1_segments, &w1_indices]() {
+      if (src_metadata->array_segments() == nullptr || src_metadata->array_indices() == nullptr)
+        return false;
+      bool status = true;
+      switch (src_metadata->array_segments_type())
+      {
+        case SparseIndexVector::SparseIndexVector_Int32Vector:
+          status = Copy(src_metadata->array_segments_as_Int32Vector(), w1_segments);
+          break;
+        case SparseIndexVector::SparseIndexVector_Uint16Vector:
+          status = Copy(src_metadata->array_segments_as_Uint16Vector(), w1_segments);
+          break;
+        case SparseIndexVector::SparseIndexVector_Uint8Vector:
+          status = Copy(src_metadata->array_segments_as_Uint8Vector(), w1_segments);
+          break;
+        default:
+          return false;
+      }
+      if (status != true)
+        return false;
+      switch (src_metadata->array_indices_type())
+      {
+        case SparseIndexVector::SparseIndexVector_Int32Vector:
+          return Copy(src_metadata->array_indices_as_Int32Vector(), w1_indices);
+        case SparseIndexVector::SparseIndexVector_Uint16Vector:
+          return Copy(src_metadata->array_indices_as_Uint16Vector(), w1_indices);
+        case SparseIndexVector::SparseIndexVector_Uint8Vector:
+          return Copy(src_metadata->array_indices_as_Uint8Vector(), w1_indices);
+        default:
+          break;
+      }
+      return false;
+    };
+    if (ParseSparseIndexVector() == false)
+      throw std::runtime_error("Error during parsing sparsity index information");
+    type_info.sparse2DMetadata(std::move(w1_segments), std::move(w1_indices));
+  }
    // Create operand
    const auto operand_index = subg.addOperand(shape, type_info);
  
@@ -363,18 +453,17 @@ ir::OperandIndex BaseLoader<LoaderDomain, SpecificLoader>::loadOperand(const Ten
    if (data != nullptr)
    {
      using std::ptrdiff_t;
-    size_t data_size = data->size();
-    ptrdiff_t unaligned_offset_start = data->data() - _base;
-    ptrdiff_t offset_end = unaligned_offset_start + data_size;
-
-    // Calculated aligned offset from base address of mapped region
-    // munmap accepts memory address which is a multiple of the pagesize
-    ptrdiff_t aligned_offset_start = (unaligned_offset_start / _pagesize) * _pagesize;
-    size_t mmap_size = offset_end - aligned_offset_start;
-
-    auto ptr = std::make_unique<ir::MMapedData>(_fd, aligned_offset_start, mmap_size,
-                                                unaligned_offset_start, data_size);
-    subg.setOperandValue(operand_index, std::move(ptr));
+    std::unique_ptr<ir::Data> data_obj;
+    if (_fd == -1) // Model is from memory
+    {
+      data_obj = std::make_unique<ir::ExternalData>(data->data(), data->size());
+    }
+    else // Model is loaded(mmap'd) from a file
+    {
+      data_obj = std::make_unique<ir::CachedData>(data->data(), data->size());
+      deallocateMmappedArea(const_cast<uint8_t *>(data->data()), data->size());
+    }
+    subg.setOperandValue(operand_index, std::move(data_obj));
    }
  
    // Name unused
@@ -592,25 +681,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadConcatenation(const Operator
  }
  
  template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadInstanceNorm(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  ir::operation::InstanceNorm::Param param;
-  const auto *options = op->builtin_options_as_InstanceNormOptions();
-
-  param.activation = convertActivation(options->fused_activation_function());
-  // Use default value 1e-5 if value of epsilon is zero
-  param.epsilon = options->epsilon() == 0.f ? 1e-5 : options->epsilon();
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::InstanceNorm(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
  void BaseLoader<LoaderDomain, SpecificLoader>::loadFill(const Operator *op, ir::Graph &subg)
  {
    ir::OperandIndexSequence inputs;
@@ -778,6 +848,8 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadResizeBilinear(const Operator
    ir::operation::ResizeBilinear::Param param;
    param.height_out = size_v[0];
    param.width_out = size_v[1];
+  param.align_corners = op->builtin_options_as_ResizeBilinearOptions()->align_corners();
+  param.half_pixel_centers = op->builtin_options_as_ResizeBilinearOptions()->half_pixel_centers();
  
    std::unique_ptr<ir::Operation> new_op(new ir::operation::ResizeBilinear({input}, outputs, param));
    subg.addOperation(std::move(new_op));
@@ -1046,81 +1118,61 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadBatchToSpaceND(const Operator
    ir::OperandIndexSequence outputs;
  
    loadOperationIO(op, inputs, outputs);
-  auto input = inputs.at(0);
-  auto block_shape = inputs.at(1);
-  auto crops = inputs.at(2);
-
-  if (!subg.operands().at(crops).isConstant())
-    throw std::runtime_error("BatchToSpaceND: non-constant 'crops' is not supported.");
  
-  std::vector<std::int32_t> crops_v = subg.operands().at(crops).template asVector<std::int32_t>();
-  assert(crops_v.size() == 4);
-  if (crops_v != std::vector<std::int32_t>{0, 0, 0, 0})
-    throw std::runtime_error("BatchToSpaceND: 'crops' other than {0, 0, 0, 0} is not supported.");
-
-  std::unique_ptr<ir::Operation> new_op{
-      new ir::operation::BatchToSpaceND{{input, block_shape}, outputs}};
+  std::unique_ptr<ir::Operation> new_op{new ir::operation::BatchToSpaceND{inputs, outputs}};
    subg.addOperation(std::move(new_op));
  }
  
  template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadBCQGather(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadMatrixBandPart(const Operator *op,
+                                                                  ir::Graph &subg)
  {
    ir::OperandIndexSequence inputs;
    ir::OperandIndexSequence outputs;
  
    loadOperationIO(op, inputs, outputs);
  
-  ir::operation::BCQGather::Param param;
-  const auto *options = op->builtin_options_as_BCQGatherOptions();
-  param.input_hidden_size = options->input_hidden_size();
-  param.axis = options->axis();
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::BCQGather(inputs, outputs, param));
+  std::unique_ptr<ir::Operation> new_op(new ir::operation::MatrixBandPart(inputs, outputs));
    subg.addOperation(std::move(new_op));
  }
  
  template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadBCQFullyConnected(const Operator *op,
-                                                                     ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadBroadcastTo(const Operator *op, ir::Graph &subg)
  {
    ir::OperandIndexSequence inputs;
    ir::OperandIndexSequence outputs;
  
    loadOperationIO(op, inputs, outputs);
  
-  ir::operation::BCQFullyConnected::Param param;
-  const auto *options = op->builtin_options_as_BCQFullyConnectedOptions();
-  param.weights_hidden_size = options->weights_hidden_size();
-  param.activation = convertActivation(options->fused_activation_function());
-
-  std::unique_ptr<ir::Operation> new_op(
-      new ir::operation::BCQFullyConnected(inputs, outputs, param));
+  std::unique_ptr<ir::Operation> new_op(new ir::operation::BroadcastTo(inputs, outputs));
    subg.addOperation(std::move(new_op));
  }
-
  template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadMatrixBandPart(const Operator *op,
-                                                                  ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadSpaceToDepth(const Operator *op, ir::Graph &subg)
  {
    ir::OperandIndexSequence inputs;
    ir::OperandIndexSequence outputs;
+  ir::operation::SpaceToDepth::Param param;
+
+  const auto *options = op->builtin_options_as_SpaceToDepthOptions();
+
+  param.block_size = options->block_size();
  
    loadOperationIO(op, inputs, outputs);
  
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::MatrixBandPart(inputs, outputs));
+  std::unique_ptr<ir::Operation> new_op(new ir::operation::SpaceToDepth(inputs, outputs, param));
    subg.addOperation(std::move(new_op));
  }
  
  template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadBroadcastTo(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadStatelessRandomUniform(const Operator *op,
+                                                                          ir::Graph &subg)
  {
    ir::OperandIndexSequence inputs;
    ir::OperandIndexSequence outputs;
-
    loadOperationIO(op, inputs, outputs);
  
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::BroadcastTo(inputs, outputs));
+  std::unique_ptr<ir::Operation> new_op(new ir::operation::StatelessRandomUniform(inputs, outputs));
    subg.addOperation(std::move(new_op));
  }
  
@@ -1144,7 +1196,8 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir
      BatchMatMul,
      Einsum,
      BroadcastTo,
-    FusedBatchNorm
+    FusedBatchNorm,
+    StatelessRandomUniform
    };
  
    // Mapping from custom op name string to BuiltinOP enum
@@ -1156,6 +1209,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir
        {"Einsum", BuiltinOP::Einsum},
        {"FusedBatchNormV3", BuiltinOP::FusedBatchNorm},
        {"BroadcastTo", BuiltinOP::BroadcastTo},
+      {"StatelessRandomUniform", BuiltinOP::StatelessRandomUniform},
    };
  
    try
@@ -1185,6 +1239,9 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir
        case BuiltinOP::FusedBatchNorm:
          loadFusedBatchNorm(op, subg);
          break;
+      case BuiltinOP::StatelessRandomUniform:
+        loadStatelessRandomUniform(op, subg);
+        break;
        default:
          throw std::runtime_error{
              "Loader: Custom OP map is defined but operation loader function is not defined"};
@@ -1274,6 +1331,23 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadSplit(const Operator *op, ir:
  }
  
  template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadSplitV(const Operator *op, ir::Graph &subg)
+{
+  ir::OperandIndexSequence inputs;
+  ir::OperandIndexSequence outputs;
+
+  loadOperationIO(op, inputs, outputs);
+
+  ir::operation::SplitV::Param param{};
+
+  const auto *options = op->builtin_options_as_SplitVOptions();
+  param.num_splits = options->num_splits();
+
+  std::unique_ptr<ir::Operation> new_op(new ir::operation::SplitV(inputs, outputs, param));
+  subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
  void BaseLoader<LoaderDomain, SpecificLoader>::loadSlice(const Operator *op, ir::Graph &subg)
  {
    ir::OperandIndexSequence inputs;
@@ -1743,6 +1817,18 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadLogSoftmax(const Operator *op
  }
  
  template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadQuantize(const Operator *op, ir::Graph &subg)
+{
+  ir::OperandIndexSequence inputs;
+  ir::OperandIndexSequence outputs;
+
+  loadOperationIO(op, inputs, outputs);
+
+  std::unique_ptr<ir::Operation> new_op(new ir::operation::Quantize(inputs, outputs));
+  subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
  void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op, ir::Graph &subg)
  {
    const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
@@ -1870,6 +1956,9 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
      case BuiltinOperator::BuiltinOperator_SPLIT:
        loadSplit(op, subg);
        return;
+    case BuiltinOperator::BuiltinOperator_SPLIT_V:
+      loadSplitV(op, subg);
+      return;
      case BuiltinOperator::BuiltinOperator_SLICE:
        loadSlice(op, subg);
        return;
@@ -1959,6 +2048,12 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
      case BuiltinOperator::BuiltinOperator_LOG_SOFTMAX:
        loadLogSoftmax(op, subg);
        return;
+    case BuiltinOperator::BuiltinOperator_QUANTIZE:
+      loadQuantize(op, subg);
+      return;
+    case BuiltinOperator::BuiltinOperator_SPACE_TO_DEPTH:
+      loadSpaceToDepth(op, subg);
+      return;
      default:
        throw std::runtime_error(
            std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op)));
diff --git a/runtime/onert/frontend/circle/CMakeLists.txt b/runtime/onert/frontend/circle/CMakeLists.txt

index a112def..8bcf85d 100644 (file)
--- a/runtime/onert/frontend/circle/CMakeLists.txt
+++ b/runtime/onert/frontend/circle/CMakeLists.txt
@@ -10,5 +10,6 @@ target_include_directories(circle_loader PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/incl
  
  target_link_libraries(circle_loader PUBLIC onert_core)
  target_link_libraries(circle_loader PRIVATE base_loader nnfw_common nnfw_coverage)
+target_link_libraries(circle_loader PRIVATE circle_schema)
  
  install(TARGETS circle_loader DESTINATION lib)
diff --git a/runtime/onert/frontend/circle/include/circle_loader.h b/runtime/onert/frontend/circle/include/circle_loader.h

index 8ed4b0c..675a5b3 100644 (file)
--- a/runtime/onert/frontend/circle/include/circle_loader.h
+++ b/runtime/onert/frontend/circle/include/circle_loader.h
@@ -26,6 +26,7 @@ namespace onert
  namespace circle_loader
  {
  std::unique_ptr<ir::Subgraphs> loadModel(const char *filename);
+std::unique_ptr<ir::Subgraphs> loadModel(uint8_t *buffer, size_t size);
  } // namespace circle_loader
  } // namespace onert
  
diff --git a/runtime/onert/frontend/circle/src/circle_loader.cc b/runtime/onert/frontend/circle/src/circle_loader.cc

index 94222e9..96dd469 100644 (file)
--- a/runtime/onert/frontend/circle/src/circle_loader.cc
+++ b/runtime/onert/frontend/circle/src/circle_loader.cc
@@ -53,6 +53,8 @@ struct LoaderDomain
    using Tensor = circle::Tensor;
    using TensorType = circle::TensorType;
    using SubGraph = circle::SubGraph;
+  using DimensionType = circle::DimensionType;
+  using SparseIndexVector = circle::SparseIndexVector;
  
    static const char *EnumNameBuiltinOperator(BuiltinOperator e)
    {
@@ -69,6 +71,11 @@ struct LoaderDomain
  
  class CircleLoader final : public base_loader::BaseLoader<LoaderDomain, CircleLoader>
  {
+protected:
+  void loadInstanceNorm(const Operator *op, ir::Graph &subg);
+  void loadBCQFullyConnected(const Operator *op, ir::Graph &subg);
+  void loadBCQGather(const Operator *op, ir::Graph &subg);
+
  public:
    using BaseLoader::BaseLoader;
  
@@ -138,6 +145,57 @@ public:
    }
  };
  
+void CircleLoader::loadInstanceNorm(const Operator *op, ir::Graph &subg)
+{
+  ir::OperandIndexSequence inputs;
+  ir::OperandIndexSequence outputs;
+
+  loadOperationIO(op, inputs, outputs);
+
+  ir::operation::InstanceNorm::Param param;
+  const auto *options = op->builtin_options_as_InstanceNormOptions();
+
+  param.activation = convertActivation(options->fused_activation_function());
+  // Use default value 1e-5 if value of epsilon is zero
+  param.epsilon = options->epsilon() == 0.f ? 1e-5 : options->epsilon();
+
+  std::unique_ptr<ir::Operation> new_op(new ir::operation::InstanceNorm(inputs, outputs, param));
+  subg.addOperation(std::move(new_op));
+}
+
+void CircleLoader::loadBCQGather(const Operator *op, ir::Graph &subg)
+{
+  ir::OperandIndexSequence inputs;
+  ir::OperandIndexSequence outputs;
+
+  loadOperationIO(op, inputs, outputs);
+
+  ir::operation::BCQGather::Param param;
+  const auto *options = op->builtin_options_as_BCQGatherOptions();
+  param.input_hidden_size = options->input_hidden_size();
+  param.axis = options->axis();
+
+  std::unique_ptr<ir::Operation> new_op(new ir::operation::BCQGather(inputs, outputs, param));
+  subg.addOperation(std::move(new_op));
+}
+
+void CircleLoader::loadBCQFullyConnected(const Operator *op, ir::Graph &subg)
+{
+  ir::OperandIndexSequence inputs;
+  ir::OperandIndexSequence outputs;
+
+  loadOperationIO(op, inputs, outputs);
+
+  ir::operation::BCQFullyConnected::Param param;
+  const auto *options = op->builtin_options_as_BCQFullyConnectedOptions();
+  param.weights_hidden_size = options->weights_hidden_size();
+  param.activation = convertActivation(options->fused_activation_function());
+
+  std::unique_ptr<ir::Operation> new_op(
+      new ir::operation::BCQFullyConnected(inputs, outputs, param));
+  subg.addOperation(std::move(new_op));
+}
+
  } // namespace
  
  std::unique_ptr<ir::Subgraphs> loadModel(const char *filename)
@@ -148,5 +206,13 @@ std::unique_ptr<ir::Subgraphs> loadModel(const char *filename)
    return subgraphs;
  }
  
+std::unique_ptr<ir::Subgraphs> loadModel(uint8_t *buffer, size_t size)
+{
+  auto subgraphs = std::make_unique<ir::Subgraphs>();
+  CircleLoader loader(subgraphs);
+  loader.loadFromBuffer(buffer, size);
+  return subgraphs;
+}
+
  } // namespace circle_loader
  } // namespace onert
diff --git a/runtime/onert/frontend/circle_schema/CMakeLists.txt b/runtime/onert/frontend/circle_schema/CMakeLists.txt

new file mode 100644 (file)

index 0000000..208103f
--- /dev/null
+++ b/runtime/onert/frontend/circle_schema/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_library(circle_schema INTERFACE)
+
+nnfw_find_package(FlatBuffers REQUIRED)
+
+target_link_libraries(circle_schema INTERFACE flatbuffers::flatbuffers)
+
+target_include_directories(circle_schema INTERFACE include)
diff --git a/runtime/onert/frontend/circle/src/circle_schema_generated.h b/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h

similarity index 100%

rename from runtime/onert/frontend/circle/src/circle_schema_generated.h

rename to runtime/onert/frontend/circle_schema/include/circle_schema_generated.h
diff --git a/runtime/onert/frontend/nnapi/model.cc b/runtime/onert/frontend/nnapi/model.cc

index 337bc3a..8c7bd17 100644 (file)
--- a/runtime/onert/frontend/nnapi/model.cc
+++ b/runtime/onert/frontend/nnapi/model.cc
@@ -294,7 +294,7 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
    }
  
    const ANeuralNetworksOperationTypeEx FIRST_OPERATION = ANEURALNETWORKS_CAST_EX;
-  const ANeuralNetworksOperationTypeEx LAST_OPERATION = ANEURALNETWORKS_ADDV2_EX;
+  const ANeuralNetworksOperationTypeEx LAST_OPERATION = ANEURALNETWORKS_SPLIT_V_EX;
    if ((type < FIRST_OPERATION) || (type > LAST_OPERATION))
    {
      VERBOSE(NNAPI::Model) << "addOperation: Invalid operation type" << std::endl;
diff --git a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc

index 94791f8..8ff6cbb 100644 (file)
--- a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc
@@ -106,30 +106,122 @@ getReduceGenerator(const onert::ir::operation::Reduce::ReduceType reduce_type)
    };
  }
  
-} // namespace
+template <typename T>
+Operation *CreateSimpleUnaryOp(const OperationFactory::Param &init_param, Operands &)
+{
+  assert(init_param.input_count == 1 && init_param.output_count == 1);
  
-OperationFactory &OperationFactory::get()
+  OperandIndexSequence outputs{init_param.outputs[0]};
+
+  // Each input should be interpreted as follows:
+  //
+  //  0 -> Input Tensor Index
+  OperandIndexSequence inputs{init_param.inputs[0]};
+
+  return new T{inputs, outputs};
+}
+
+// A generator function for binary ops with no params
+template <typename T>
+Operation *createSimpleBinaryOp(const OperationFactory::Param &init_param, Operands &)
  {
-  static OperationFactory factory;
-  return factory;
+  assert(init_param.input_count == 2 && init_param.output_count == 1);
+
+  OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+  OperandIndexSequence outputs{init_param.outputs[0]};
+
+  return new T{inputs, outputs};
  }
  
-OperationFactory::OperationFactory()
+// A generator function for binary ops with no params
+template <typename T>
+Operation *createPool2DOp(const OperationFactory::Param &init_param, Operands &operands)
  {
-  _map[ANEURALNETWORKS_BATCH_TO_SPACE_ND] = [](const OperationFactory::Param &init_param,
-                                               Operands &) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
+  assert(init_param.input_count == 7 || init_param.input_count == 10);
+  assert(init_param.output_count == 1);
  
-    OperandIndexSequence outputs{init_param.outputs[0]};
+  // In common
+  //  0 -> IFM Tensor Index
+  OperandIndexSequence inputs{init_param.inputs[0]};
+  OperandIndexSequence outputs{init_param.outputs[0]};
  
+  typename T::Param param;
+  if (init_param.input_count == 7) // support implicit padding
+  {
      // Each input should be interpreted as follows:
      //
-    //  0 -> Input Tensor Index
-    //  1 -> Block size Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+    //  1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
+    //  2 -> Horizontal (over width) Stride Index
+    //  3 -> Vertial (over height) Stride Index
+    //  4 -> Filter Width Index
+    //  5 -> Filter Height Index
+    //  6 -> FuseCode (activation) Index
  
-    return new operation::BatchToSpaceND{inputs, outputs};
-  };
+    const auto padding_index = OperandIndex{init_param.inputs[1]};
+    const auto hstride_index = OperandIndex{init_param.inputs[2]};
+    const auto vstride_index = OperandIndex{init_param.inputs[3]};
+    const auto kw_index = OperandIndex{init_param.inputs[4]};
+    const auto kh_index = OperandIndex{init_param.inputs[5]};
+    const auto activation_index = OperandIndex{init_param.inputs[6]};
+
+    param.padding.type =
+        NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
+    param.stride = makeStride(operands, hstride_index, vstride_index);
+    param.kw = getUint32Scalar(operands, kw_index);
+    param.kh = operands.at(kh_index).asScalar<uint32_t>();
+    param.activation =
+        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+  }
+  else // support explicit padding
+  {
+    // Each input should be interpreted as follows:
+    //
+    //  1 -> Padding_left index
+    //  2 -> Padding_right index
+    //  3 -> Padding_top index
+    //  4 -> Padding_bottom index
+    //  5 -> Horizontal (over width) Stride Index
+    //  6 -> Vertial (over height) Stride Index
+    //  7 -> Filter Width Index
+    //  8 -> Filter Height Index
+    //  9 -> FuseCode (activation) Index
+
+    const auto padding_left_index = OperandIndex{init_param.inputs[1]};
+    const auto padding_right_index = OperandIndex{init_param.inputs[2]};
+    const auto padding_top_index = OperandIndex{init_param.inputs[3]};
+    const auto padding_bottom_index = OperandIndex{init_param.inputs[4]};
+    const auto hstride_index = OperandIndex{init_param.inputs[5]};
+    const auto vstride_index = OperandIndex{init_param.inputs[6]};
+    const auto kw_index = OperandIndex{init_param.inputs[7]};
+    const auto kh_index = OperandIndex{init_param.inputs[8]};
+    const auto activation_index = OperandIndex{init_param.inputs[9]};
+
+    param.padding.type = PaddingType::EXPLICIT;
+    param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
+                                              padding_top_index, padding_bottom_index);
+    param.stride = makeStride(operands, hstride_index, vstride_index);
+    param.kw = getUint32Scalar(operands, kw_index);
+    param.kh = getUint32Scalar(operands, kh_index);
+    param.activation =
+        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+  }
+  return new T{inputs, outputs, param};
+}
+
+} // namespace
+
+OperationFactory &OperationFactory::get()
+{
+  static OperationFactory factory;
+  return factory;
+}
+
+OperationFactory::OperationFactory()
+{
+  // Each input should be interpreted as follows:
+  //  0 -> Input Tensor Index
+  //  1 -> Block size Index
+  _map[ANEURALNETWORKS_BATCH_TO_SPACE_ND] = createSimpleBinaryOp<operation::BatchToSpaceND>;
  
    _map[ANEURALNETWORKS_DEPTHWISE_CONV_2D] = [](const OperationFactory::Param &init_param,
                                                 Operands &operands) {
@@ -203,153 +295,9 @@ OperationFactory::OperationFactory()
      return new operation::DepthwiseConv2D{inputs, outputs, param};
    };
  
-  _map[ANEURALNETWORKS_MAX_POOL_2D] = [](const OperationFactory::Param &init_param,
-                                         Operands &operands) {
-    assert(init_param.input_count == 7 || init_param.input_count == 10);
-    assert(init_param.output_count == 1);
+  _map[ANEURALNETWORKS_MAX_POOL_2D] = createPool2DOp<operation::MaxPool2D>;
  
-    // In common
-    //  0 -> IFM Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0]};
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    operation::MaxPool2D::Param param;
-    if (init_param.input_count == 7) // support implicit padding
-    {
-      // Each input should be interpreted as follows:
-      //
-      //  1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
-      //  2 -> Horizontal (over width) Stride Index
-      //  3 -> Vertial (over height) Stride Index
-      //  4 -> Filter Width Index
-      //  5 -> Filter Height Index
-      //  6 -> FuseCode (activation) Index
-
-      const auto padding_index = OperandIndex{init_param.inputs[1]};
-      const auto hstride_index = OperandIndex{init_param.inputs[2]};
-      const auto vstride_index = OperandIndex{init_param.inputs[3]};
-      const auto kw_index = OperandIndex{init_param.inputs[4]};
-      const auto kh_index = OperandIndex{init_param.inputs[5]};
-      const auto activation_index = OperandIndex{init_param.inputs[6]};
-
-      param.padding.type =
-          NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
-      param.stride = makeStride(operands, hstride_index, vstride_index);
-      param.kw = getUint32Scalar(operands, kw_index);
-      param.kh = operands.at(kh_index).asScalar<uint32_t>();
-      param.activation =
-          NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
-    }
-    else if (init_param.input_count == 10) // support explicit padding
-    {
-      // Each input should be interpreted as follows:
-      //
-      //  1 -> Padding_left index
-      //  2 -> Padding_right index
-      //  3 -> Padding_top index
-      //  4 -> Padding_bottom index
-      //  5 -> Horizontal (over width) Stride Index
-      //  6 -> Vertial (over height) Stride Index
-      //  7 -> Filter Width Index
-      //  8 -> Filter Height Index
-      //  9 -> FuseCode (activation) Index
-
-      const auto padding_left_index = OperandIndex{init_param.inputs[1]};
-      const auto padding_right_index = OperandIndex{init_param.inputs[2]};
-      const auto padding_top_index = OperandIndex{init_param.inputs[3]};
-      const auto padding_bottom_index = OperandIndex{init_param.inputs[4]};
-      const auto hstride_index = OperandIndex{init_param.inputs[5]};
-      const auto vstride_index = OperandIndex{init_param.inputs[6]};
-      const auto kw_index = OperandIndex{init_param.inputs[7]};
-      const auto kh_index = OperandIndex{init_param.inputs[8]};
-      const auto activation_index = OperandIndex{init_param.inputs[9]};
-
-      param.padding.type = PaddingType::EXPLICIT;
-      param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
-                                                padding_top_index, padding_bottom_index);
-      param.stride = makeStride(operands, hstride_index, vstride_index);
-      param.kw = getUint32Scalar(operands, kw_index);
-      param.kh = getUint32Scalar(operands, kh_index);
-      param.activation =
-          NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
-    }
-    return new operation::MaxPool2D{inputs, outputs, param};
-  };
-
-  _map[ANEURALNETWORKS_AVERAGE_POOL_2D] = [](const OperationFactory::Param &init_param,
-                                             Operands &operands) {
-    // TODO We may reuse code here for MAX_POOL_2D. Seems like these two are identical
-    assert(init_param.input_count == 7 || init_param.input_count == 10);
-    assert(init_param.output_count == 1);
-
-    // In common
-    //  0 -> IFM Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0]};
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    operation::AvgPool2D::Param param;
-    if (init_param.input_count == 7) // support implicit padding
-    {
-      // Each input should be interpreted as follows:
-      //
-      //  1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
-      //  2 -> Horizontal (over width) Stride Index
-      //  3 -> Vertial (over height) Stride Index
-      //  4 -> Filter Width Index
-      //  5 -> Filter Height Index
-      //  6 -> FuseCode (activation) Index
-
-      const auto padding_index = OperandIndex{init_param.inputs[1]};
-      const auto hstride_index = OperandIndex{init_param.inputs[2]};
-      const auto vstride_index = OperandIndex{init_param.inputs[3]};
-      const auto kw_index = OperandIndex{init_param.inputs[4]};
-      const auto kh_index = OperandIndex{init_param.inputs[5]};
-      const auto activation_index = OperandIndex{init_param.inputs[6]};
-
-      param.padding.type =
-          NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
-      param.stride = makeStride(operands, hstride_index, vstride_index);
-      param.kw = getUint32Scalar(operands, kw_index);
-      param.kh = getUint32Scalar(operands, kh_index);
-      param.activation =
-          NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
-    }
-    else if (init_param.input_count == 10) // support explicit padding
-    {
-      // Each input should be interpreted as follows:
-      //
-      //  1 -> Padding_left index
-      //  2 -> Padding_right index
-      //  3 -> Padding_top index
-      //  4 -> Padding_bottom index
-      //  5 -> Horizontal (over width) Stride Index
-      //  6 -> Vertial (over height) Stride Index
-      //  7 -> Filter Width Index
-      //  8 -> Filter Height Index
-      //  9 -> FuseCode (activation) Index
-
-      const auto padding_left_index = OperandIndex{init_param.inputs[1]};
-      const auto padding_right_index = OperandIndex{init_param.inputs[2]};
-      const auto padding_top_index = OperandIndex{init_param.inputs[3]};
-      const auto padding_bottom_index = OperandIndex{init_param.inputs[4]};
-      const auto hstride_index = OperandIndex{init_param.inputs[5]};
-      const auto vstride_index = OperandIndex{init_param.inputs[6]};
-      const auto kw_index = OperandIndex{init_param.inputs[7]};
-      const auto kh_index = OperandIndex{init_param.inputs[8]};
-      const auto activation_index = OperandIndex{init_param.inputs[9]};
-
-      param.padding.type = PaddingType::EXPLICIT;
-      param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
-                                                padding_top_index, padding_bottom_index);
-      param.stride = makeStride(operands, hstride_index, vstride_index);
-      param.kw = getUint32Scalar(operands, kw_index);
-      param.kh = getUint32Scalar(operands, kh_index);
-      param.activation =
-          NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
-    }
-
-    return new operation::AvgPool2D{inputs, outputs, param};
-  };
+  _map[ANEURALNETWORKS_AVERAGE_POOL_2D] = createPool2DOp<operation::AvgPool2D>;
  
    _map[ANEURALNETWORKS_CONCATENATION] = [](const OperationFactory::Param &init_param,
                                             Operands &operands) {
@@ -724,44 +672,11 @@ OperationFactory::OperationFactory()
      return new operation::Squeeze{inputs, outputs, param};
    };
  
-  _map[ANEURALNETWORKS_TANH] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 1 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> Input Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0]};
-
-    return new operation::Tanh{inputs, outputs};
-  };
-
-  _map[ANEURALNETWORKS_LOG] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 1 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> Input Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0]};
-
-    return new operation::Log{inputs, outputs};
-  };
-
-  _map[ANEURALNETWORKS_LOGISTIC] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 1 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
+  _map[ANEURALNETWORKS_TANH] = CreateSimpleUnaryOp<operation::Tanh>;
  
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> Input Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0]};
+  _map[ANEURALNETWORKS_LOG] = CreateSimpleUnaryOp<operation::Log>;
  
-    return new operation::Logistic{inputs, outputs};
-  };
+  _map[ANEURALNETWORKS_LOGISTIC] = CreateSimpleUnaryOp<operation::Logistic>;
  
    _map[ANEURALNETWORKS_DIV] = [](const OperationFactory::Param &init_param, Operands &operands) {
      assert(init_param.input_count == 3 && init_param.output_count == 1);
@@ -784,36 +699,16 @@ OperationFactory::OperationFactory()
      return new operation::Div{inputs, outputs, param};
    };
  
-  _map[ANEURALNETWORKS_EXP] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 1 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> Input Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0]};
-
-    return new operation::Exp{inputs, outputs};
-  };
+  _map[ANEURALNETWORKS_EXP] = CreateSimpleUnaryOp<operation::Exp>;
  
    // ANEURALNETWORKS_EXP_EX is deprecated
    // TODO Remove ANEURALNETWORKS_EXP_EX
    _map[ANEURALNETWORKS_EXP_EX] = _map[ANEURALNETWORKS_EXP];
  
-  _map[ANEURALNETWORKS_EXPAND_DIMS] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> Input Tensor Index
-    //  1 -> Axis Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    return new operation::ExpandDims{inputs, outputs};
-  };
+  // Each input should be interpreted as follows:
+  //  0 -> Input Tensor Index
+  //  1 -> Axis Tensor Index
+  _map[ANEURALNETWORKS_EXPAND_DIMS] = createSimpleBinaryOp<operation::ExpandDims>;
  
    _map[ANEURALNETWORKS_GREATER] = [](const OperationFactory::Param &init_param, Operands &) {
      assert(init_param.input_count == 2 && init_param.output_count == 1);
@@ -982,19 +877,7 @@ OperationFactory::OperationFactory()
      return new operation::Comparison{inputs, outputs, param};
    };
  
-  _map[ANEURALNETWORKS_LOGICAL_AND] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> input0 Tensor Index
-    //  1 -> input1 Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    return new operation::LogicalAnd{inputs, outputs};
-  };
+  _map[ANEURALNETWORKS_LOGICAL_AND] = createSimpleBinaryOp<operation::LogicalAnd>;
  
    // ANEURALNETWORKS_LOGICAL_AND_EX is deprecated
    // TODO Remove ANEURALNETWORKS_LOGICAL_AND_EX
@@ -1018,18 +901,7 @@ OperationFactory::OperationFactory()
      return new operation::LogicalAnd{inputs, outputs};
    };
  
-  _map[ANEURALNETWORKS_RSQRT] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 1 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> Input Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0]};
-
-    return new operation::RSQRT{inputs, outputs};
-  };
+  _map[ANEURALNETWORKS_RSQRT] = CreateSimpleUnaryOp<operation::RSQRT>;
  
    _map[ANEURALNETWORKS_SELECT] = [](const OperationFactory::Param &init_param, Operands &) {
      assert(init_param.input_count == 3 && init_param.output_count == 1);
@@ -1065,18 +937,7 @@ OperationFactory::OperationFactory()
    // TODO Remove ANEURALNETWORKS_RSQRT_EX
    _map[ANEURALNETWORKS_RSQRT_EX] = _map[ANEURALNETWORKS_RSQRT];
  
-  _map[ANEURALNETWORKS_RELU] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 1 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> Input Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0]};
-
-    return new operation::ReLU{inputs, outputs};
-  };
+  _map[ANEURALNETWORKS_RELU] = CreateSimpleUnaryOp<operation::ReLU>;
  
    _map[ANEURALNETWORKS_RESIZE_BILINEAR] = [](const OperationFactory::Param &init_param,
                                               Operands &operands) {
@@ -1094,35 +955,14 @@ OperationFactory::OperationFactory()
      operation::ResizeBilinear::Param param;
      param.height_out = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<int32_t>();
      param.width_out = operands.at(OperandIndex{init_param.inputs[2]}).asScalar<int32_t>();
-
+    param.align_corners = false;
+    param.half_pixel_centers = false;
      return new operation::ResizeBilinear{inputs, outputs, param};
    };
  
-  _map[ANEURALNETWORKS_RELU1] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 1 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> input Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0]};
-
-    return new operation::ReLU1{inputs, outputs};
-  };
-
-  _map[ANEURALNETWORKS_RELU6] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 1 && init_param.output_count == 1);
+  _map[ANEURALNETWORKS_RELU1] = CreateSimpleUnaryOp<operation::ReLU1>;
  
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> input Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0]};
-
-    return new operation::ReLU6{inputs, outputs};
-  };
+  _map[ANEURALNETWORKS_RELU6] = CreateSimpleUnaryOp<operation::ReLU6>;
  
    _map[ANEURALNETWORKS_REVERSE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
      assert(init_param.input_count == 2 && init_param.output_count == 1);
@@ -1219,76 +1059,7 @@ OperationFactory::OperationFactory()
      return new operation::SpaceToDepth{inputs, outputs, param};
    };
  
-  _map[ANEURALNETWORKS_L2_POOL_2D] = [](const OperationFactory::Param &init_param,
-                                        Operands &operands) {
-    assert(init_param.input_count == 10 || init_param.input_count == 7);
-    assert(init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> IFM Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0]};
-
-    operation::L2Pool2D::Param param;
-
-    if (init_param.input_count == 7) // Imlicit Padding case
-    {
-      //  1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
-      //  2 -> Horizontal (over width) Stride Index
-      //  3 -> Vertial (over height) Stride Index
-      //  4 -> Filter Width Index
-      //  5 -> Filter Height Index
-      //  6 -> FuseCode (activation) Index
-      const auto padding_index = OperandIndex{init_param.inputs[1]};
-      const auto hstride_index = OperandIndex{init_param.inputs[2]};
-      const auto vstride_index = OperandIndex{init_param.inputs[3]};
-      const auto kw_index = OperandIndex{init_param.inputs[4]};
-      const auto kh_index = OperandIndex{init_param.inputs[5]};
-      const auto activation_index = OperandIndex{init_param.inputs[6]};
-
-      param.padding.type =
-          NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
-      param.stride = makeStride(operands, hstride_index, vstride_index);
-      param.kw = getUint32Scalar(operands, kw_index);
-      param.kh = getUint32Scalar(operands, kh_index);
-      param.activation =
-          NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
-    }
-    else // Explicit Padding case
-    {
-      //  1 -> Padding_left index
-      //  2 -> Padding_right index
-      //  3 -> Padding_top index
-      //  4 -> Padding_bottom index
-      //  5 -> Horizontal (over width) Stride Index
-      //  6 -> Vertial (over height) Stride Index
-      //  7 -> Filter Width Index
-      //  8 -> Filter Height Index
-      //  9 -> FuseCode (activation) Index
-      const auto padding_left_index = OperandIndex{init_param.inputs[1]};
-      const auto padding_right_index = OperandIndex{init_param.inputs[2]};
-      const auto padding_top_index = OperandIndex{init_param.inputs[3]};
-      const auto padding_bottom_index = OperandIndex{init_param.inputs[4]};
-      const auto hstride_index = OperandIndex{init_param.inputs[5]};
-      const auto vstride_index = OperandIndex{init_param.inputs[6]};
-      const auto kw_index = OperandIndex{init_param.inputs[7]};
-      const auto kh_index = OperandIndex{init_param.inputs[8]};
-      const auto activation_index = OperandIndex{init_param.inputs[9]};
-
-      param.padding.type = PaddingType::EXPLICIT;
-      param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
-                                                padding_top_index, padding_bottom_index);
-      param.stride = makeStride(operands, hstride_index, vstride_index);
-      param.kw = getUint32Scalar(operands, kw_index);
-      param.kh = getUint32Scalar(operands, kh_index);
-      param.activation =
-          NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
-    }
-
-    return new operation::L2Pool2D{inputs, outputs, param};
-  };
+  _map[ANEURALNETWORKS_L2_POOL_2D] = createPool2DOp<operation::L2Pool2D>;
  
    _map[ANEURALNETWORKS_EMBEDDING_LOOKUP] = [](const OperationFactory::Param &init_param,
                                                Operands &) {
@@ -1438,18 +1209,7 @@ OperationFactory::OperationFactory()
      return new operation::LogicalOr{inputs, outputs};
    };
  
-  _map[ANEURALNETWORKS_LOGICAL_NOT] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 1 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> input Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0]};
-
-    return new operation::LogicalNot{inputs, outputs};
-  };
+  _map[ANEURALNETWORKS_LOGICAL_NOT] = CreateSimpleUnaryOp<operation::LogicalNot>;
  
    // ANEURALNETWORKS_LOGICAL_NOT_EX is deprecated
    // TODO Remove ANEURALNETWORKS_LOGICAL_NOT_EX
@@ -1649,35 +1409,13 @@ OperationFactory::OperationFactory()
    // TODO Remove ANEURALNETWORKS_GATHER_EX
    _map[ANEURALNETWORKS_GATHER_EX] = _map[ANEURALNETWORKS_GATHER];
  
-  _map[ANEURALNETWORKS_NEG] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 1 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> Input Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0]};
-
-    return new operation::Neg{inputs, outputs};
-  };
+  _map[ANEURALNETWORKS_NEG] = CreateSimpleUnaryOp<operation::Neg>;
  
    // ANEURALNETWORKS_NEG_EX is deprecated
    // TODO Remove ANEURALNETWORKS_NEG_EX
    _map[ANEURALNETWORKS_NEG_EX] = _map[ANEURALNETWORKS_NEG];
  
-  _map[ANEURALNETWORKS_ABS] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 1 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> Input Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0]};
-
-    return new operation::Abs{inputs, outputs};
-  };
+  _map[ANEURALNETWORKS_ABS] = CreateSimpleUnaryOp<operation::Abs>;
  
    // ANEURALNETWORKS_ABS_EX is deprecated
    // TODO Remove ANEURALNETWORKS_ABS_EX
@@ -1704,18 +1442,7 @@ OperationFactory::OperationFactory()
    // TODO Remove ANEURALNETWORKS_ARGMAX_EX
    _map[ANEURALNETWORKS_ARGMAX_EX] = _map[ANEURALNETWORKS_ARGMAX];
  
-  _map[ANEURALNETWORKS_DEQUANTIZE] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 1 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> Input Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0]};
-
-    return new operation::Dequantize{inputs, outputs};
-  };
+  _map[ANEURALNETWORKS_DEQUANTIZE] = CreateSimpleUnaryOp<operation::Dequantize>;
  
    _map[ANEURALNETWORKS_MEAN] = [](const OperationFactory::Param &init_param, Operands &operands) {
      assert(init_param.input_count == 3 && init_param.output_count == 1);
@@ -1816,6 +1543,23 @@ OperationFactory::OperationFactory()
      return new operation::Split{inputs, outputs, param};
    };
  
+  _map[ANEURALNETWORKS_SPLIT_V_EX] = [](const OperationFactory::Param &init_param,
+                                        Operands &operands) {
+    assert(init_param.input_count == 4);
+    assert(init_param.output_count >= 1); // At least one output tensor and axis
+
+    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1], init_param.inputs[2]};
+    OperandIndexSequence outputs;
+    for (uint32_t n = 0; n < init_param.output_count; ++n)
+    {
+      outputs.append(OperandIndex{init_param.outputs[n]});
+    }
+
+    operation::SplitV::Param param;
+    param.num_splits = operands.at(OperandIndex{init_param.inputs[3]}).asScalar<std::int32_t>();
+    return new operation::SplitV{inputs, outputs, param};
+  };
+
    // ANEURALNETWORKS_SPLIT_EX is deprecated
    // TODO Remove ANEURALNETWORKS_SPLIT_EX
    _map[ANEURALNETWORKS_SPLIT_EX] = _map[ANEURALNETWORKS_SPLIT];
@@ -1841,31 +1585,24 @@ OperationFactory::OperationFactory()
    };
  
    _map[ANEURALNETWORKS_PAD] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 2 && init_param.output_count >= 1);
+    assert(init_param.input_count >= 2 && init_param.input_count <= 3 &&
+           init_param.output_count >= 1);
  
      OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+    if (init_param.input_count == 3)
+    {
+      inputs.append(OperandIndex{init_param.inputs[2]});
+    }
      OperandIndexSequence outputs{init_param.outputs[0]};
  
      return new operation::Pad{inputs, outputs};
    };
  
-  _map[ANEURALNETWORKS_MINIMUM] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-    OperandIndexSequence outputs{init_param.outputs[0]};
+  _map[ANEURALNETWORKS_PAD_V2] = _map[ANEURALNETWORKS_PAD];
  
-    return new operation::Min{inputs, outputs};
-  };
-
-  _map[ANEURALNETWORKS_MAXIMUM] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-    OperandIndexSequence outputs{init_param.outputs[0]};
+  _map[ANEURALNETWORKS_MINIMUM] = createSimpleBinaryOp<operation::Min>;
  
-    return new operation::Max{inputs, outputs};
-  };
+  _map[ANEURALNETWORKS_MAXIMUM] = createSimpleBinaryOp<operation::Max>;
  
    _map[ANEURALNETWORKS_ONE_HOT_EX] = [](const OperationFactory::Param &init_param,
                                          Operands &operands) {
@@ -1948,34 +1685,15 @@ OperationFactory::OperationFactory()
      return new operation::Range{inputs, outputs};
    };
  
-  _map[ANEURALNETWORKS_POW] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> LHS Tensor Index
-    //  1 -> RHS Tensor Index
+  // Each input should be interpreted as follows:
+  //  0 -> LHS Tensor Index
+  //  1 -> RHS Tensor Index
+  _map[ANEURALNETWORKS_POW] = createSimpleBinaryOp<operation::Pow>;
  
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    return new operation::Pow{inputs, outputs};
-  };
-
-  _map[ANEURALNETWORKS_FILL_EX] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> A tensor, specifying the input.
-    //  1 -> A 1-D tensor, specifying the value
-
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    return new operation::Fill{inputs, outputs};
-  };
+  // Each input should be interpreted as follows:
+  //  0 -> A tensor, specifying the input.
+  //  1 -> A 1-D tensor, specifying the value
+  _map[ANEURALNETWORKS_FILL_EX] = createSimpleBinaryOp<operation::Fill>;
  
    _map[ANEURALNETWORKS_ZEROS_LIKE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
      assert(init_param.input_count == 1 && init_param.output_count == 1);
@@ -1989,20 +1707,10 @@ OperationFactory::OperationFactory()
      return new operation::ZerosLike{inputs, outputs};
    };
  
-  _map[ANEURALNETWORKS_TILE] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> Input Tensor Index
-    //  1 -> Multiple Tensor Index
-
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    return new operation::Tile{inputs, outputs};
-  };
+  // Each input should be interpreted as follows:
+  //  0 -> Input Tensor Index
+  //  1 -> Multiple Tensor Index
+  _map[ANEURALNETWORKS_TILE] = createSimpleBinaryOp<operation::Tile>;
  
    _map[ANEURALNETWORKS_MATRIX_BAND_PART_EX] = [](const OperationFactory::Param &init_param,
                                                   Operands &) {
@@ -2064,20 +1772,23 @@ OperationFactory::OperationFactory()
      return new operation::Einsum{inputs, outputs, param};
    };
  
-  _map[ANEURALNETWORKS_BROADCAST_TO_EX] = [](const OperationFactory::Param &init_param,
-                                             Operands &) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
+  //  0 -> Input Tensor Index
+  //  1 -> int32, int64, An 1-D int tensor Index
+  _map[ANEURALNETWORKS_BROADCAST_TO_EX] = createSimpleBinaryOp<operation::BroadcastTo>;
  
+  _map[ANEURALNETWORKS_STATELESS_RANDOM_UNIFORM_EX] = [](const OperationFactory::Param &init_param,
+                                                         Operands &) {
+    assert(init_param.input_count == 2 && init_param.output_count == 1);
      OperandIndexSequence outputs{init_param.outputs[0]};
  
      // Each input should be interpreted as follows:
      //
-    //  0 -> Input Tensor Index
+    //  0 -> Shape Tensor Index
      //  1 -> int32, int64, An 1-D int tensor Index
  
      OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
  
-    return new operation::BroadcastTo{inputs, outputs};
+    return new operation::StatelessRandomUniform{inputs, outputs};
    };
  
    _map[ANEURALNETWORKS_FUSED_BATCH_NORM_V3_EX] = [](const OperationFactory::Param &init_param,
@@ -2133,6 +1844,15 @@ OperationFactory::OperationFactory()
  
      return new operation::LogSoftmax{inputs, outputs, param};
    };
+
+  _map[ANEURALNETWORKS_QUANTIZE] = [](const OperationFactory::Param &init_param, Operands &) {
+    assert(init_param.input_count == 1 && init_param.output_count == 1);
+
+    OperandIndexSequence inputs{init_param.inputs[0]};
+    OperandIndexSequence outputs{init_param.outputs[0]};
+
+    return new operation::Quantize{inputs, outputs};
+  };
  }
  
  Operation *OperationFactory::create(ANeuralNetworksOperationType type,
diff --git a/runtime/onert/frontend/tflite/src/tflite_loader.cc b/runtime/onert/frontend/tflite/src/tflite_loader.cc

index 649ce9b..86c2c6b 100644 (file)
--- a/runtime/onert/frontend/tflite/src/tflite_loader.cc
+++ b/runtime/onert/frontend/tflite/src/tflite_loader.cc
@@ -40,6 +40,8 @@ struct LoaderDomain
    using Tensor = onert_tflite::Tensor;
    using TensorType = onert_tflite::TensorType;
    using SubGraph = onert_tflite::SubGraph;
+  using DimensionType = onert_tflite::DimensionType;
+  using SparseIndexVector = onert_tflite::SparseIndexVector;
  
    static const char *EnumNameBuiltinOperator(BuiltinOperator e)
    {
diff --git a/runtime/onert/sample/minimal/CMakeLists.txt b/runtime/onert/sample/minimal/CMakeLists.txt

index 6f4b027..e54223e 100644 (file)
--- a/runtime/onert/sample/minimal/CMakeLists.txt
+++ b/runtime/onert/sample/minimal/CMakeLists.txt
@@ -4,7 +4,7 @@ endif(NOT BUILD_MINIMAL_SAMPLE)
  
  list(APPEND MINIMAL_SRCS "src/minimal.cc")
  
-add_executable(minimal ${MINIMAL_SRCS})
-target_link_libraries(minimal nnfw-dev pthread dl)
+add_executable(onert-minimal-app ${MINIMAL_SRCS})
+target_link_libraries(onert-minimal-app nnfw-dev pthread dl)
  
-install(TARGETS minimal DESTINATION bin)
+install(TARGETS onert-minimal-app DESTINATION bin)
diff --git a/runtime/onert/sample/minimal/src/minimal.cc b/runtime/onert/sample/minimal/src/minimal.cc

index d55569b..0436b93 100644 (file)
--- a/runtime/onert/sample/minimal/src/minimal.cc
+++ b/runtime/onert/sample/minimal/src/minimal.cc
@@ -16,6 +16,7 @@
  
  #include "nnfw.h"
  #include <vector>
+#include <iostream>
  
  uint64_t num_elems(const nnfw_tensorinfo *ti)
  {
@@ -65,5 +66,6 @@ int main(const int argc, char **argv)
  
    nnfw_close_session(session);
  
+  std::cout << "nnpackage " << argv[1] << " runs successfully." << std::endl;
    return 0;
  }
diff --git a/runtime/onert/test/core/exec/ExecInstance.cc b/runtime/onert/test/core/exec/ExecInstance.cc

index cc04347..0fcf372 100644 (file)
--- a/runtime/onert/test/core/exec/ExecInstance.cc
+++ b/runtime/onert/test/core/exec/ExecInstance.cc
@@ -73,9 +73,8 @@ public:
      // Compile
      auto subgs = std::make_shared<onert::ir::Subgraphs>();
      subgs->push(onert::ir::SubgraphIndex{0}, graph);
-    auto compiler = new onert::compiler::Compiler{subgs};
-    executors = compiler->compile();
-    delete compiler;
+    onert::compiler::Compiler compiler{subgs};
+    executors = compiler.compile();
    }
  
  public:
@@ -98,19 +97,17 @@ TEST(ExecInstance, simple)
    float output_buffer[4] = {};
    const float output_expected[4] = {5, -2, 0, -1};
  
-  auto execution = new onert::exec::Execution(executors);
+  onert::exec::Execution execution{executors};
  
-  execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
-  execution->setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
-  execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
-  execution->execute();
+  execution.setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
+  execution.setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
+  execution.setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
+  execution.execute();
  
    for (auto i = 0; i < 4; i++)
    {
      EXPECT_EQ(output_buffer[i], output_expected[i]);
    }
-
-  delete execution;
  }
  
  TEST(ExecInstance, twoCompile)
@@ -118,7 +115,7 @@ TEST(ExecInstance, twoCompile)
    auto mockup = CompiledMockUpModel();
    auto graph = mockup.graph;
    auto executors1 = mockup.executors;
-  auto execution1 = new onert::exec::Execution(executors1);
+  onert::exec::Execution execution1{executors1};
  
    auto input1 = IOIndex{0};
    auto input2 = IOIndex{1};
@@ -129,38 +126,34 @@ TEST(ExecInstance, twoCompile)
    float exe1_output_buffer[4] = {};
    const float exe1_output_expected[4] = {5, -2, 0, -1};
  
-  execution1->setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
-  execution1->setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
-  execution1->setOutput(output, reinterpret_cast<void *>(exe1_output_buffer), 16);
+  execution1.setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
+  execution1.setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
+  execution1.setOutput(output, reinterpret_cast<void *>(exe1_output_buffer), 16);
  
    // Make new executor: compile again
    auto subgs = std::make_shared<onert::ir::Subgraphs>();
    subgs->push(onert::ir::SubgraphIndex{0}, graph);
-  auto compiler = new onert::compiler::Compiler{subgs};
-  std::shared_ptr<onert::exec::ExecutorMap> executors2 = compiler->compile();
-  auto execution2 = new onert::exec::Execution(executors2);
+  onert::compiler::Compiler compiler{subgs};
+  std::shared_ptr<onert::exec::ExecutorMap> executors2 = compiler.compile();
+  onert::exec::Execution execution2{executors2};
  
    const float exe2_input1_buffer[4] = {2, 1, -2, 0};
    const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
    float exe2_output_buffer[4] = {};
    const float exe2_output_expected[4] = {2, 5, -2, 7};
  
-  execution2->setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
-  execution2->setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
-  execution2->setOutput(output, reinterpret_cast<void *>(exe2_output_buffer), 16);
+  execution2.setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
+  execution2.setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
+  execution2.setOutput(output, reinterpret_cast<void *>(exe2_output_buffer), 16);
  
-  execution1->execute();
-  execution2->execute();
+  execution1.execute();
+  execution2.execute();
  
    for (auto i = 0; i < 4; i++)
    {
      EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
      EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
    }
-
-  delete compiler;
-  delete execution1;
-  delete execution2;
  }
  
  // Support two initialized execution instance then ordered execution
@@ -178,32 +171,29 @@ TEST(ExecInstance, twoExecution)
    const float exe1_output_expected[4] = {5, -2, 0, -1};
    const float exe2_output_expected[4] = {2, 5, -2, 7};
  
-  auto execution1 = new onert::exec::Execution(executors);
-  execution1->setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
-  execution1->setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
-  execution1->setOutput(output1, reinterpret_cast<void *>(exe1_output_buffer), 16);
+  onert::exec::Execution execution1{executors};
+  execution1.setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
+  execution1.setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
+  execution1.setOutput(output1, reinterpret_cast<void *>(exe1_output_buffer), 16);
  
    const float exe2_input1_buffer[4] = {2, 1, -2, 0};
    const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
    float exe2_output_buffer[4] = {};
  
    // Make new execution
-  auto execution2 = new onert::exec::Execution(executors);
-  execution2->setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
-  execution2->setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
-  execution2->setOutput(output1, reinterpret_cast<void *>(exe2_output_buffer), 16);
+  onert::exec::Execution execution2{executors};
+  execution2.setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
+  execution2.setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
+  execution2.setOutput(output1, reinterpret_cast<void *>(exe2_output_buffer), 16);
  
-  execution1->execute();
-  execution2->execute();
+  execution1.execute();
+  execution2.execute();
  
    for (auto i = 0; i < 4; i++)
    {
      EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
      EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
    }
-
-  delete execution1;
-  delete execution2;
  }
  
  class Inference
@@ -222,14 +212,12 @@ public:
      auto input2 = IOIndex{1};
      auto output1 = IOIndex{0};
  
-    auto execution = new onert::exec::Execution(_executors);
-    execution->setInput(input1, reinterpret_cast<const void *>(_input1), 16);
-    execution->setInput(input2, reinterpret_cast<const void *>(_input2), 16);
-    execution->setOutput(output1, reinterpret_cast<void *>(_output), 16);
+    onert::exec::Execution execution{_executors};
+    execution.setInput(input1, reinterpret_cast<const void *>(_input1), 16);
+    execution.setInput(input2, reinterpret_cast<const void *>(_input2), 16);
+    execution.setOutput(output1, reinterpret_cast<void *>(_output), 16);
  
-    execution->execute();
-
-    delete execution;
+    execution.execute();
    }
  
  private:
@@ -288,20 +276,18 @@ TEST(ExecInstance, async)
    float output_buffer[4] = {};
    const float output_expected[4] = {5, -2, 0, -1};
  
-  auto execution = new onert::exec::Execution(executors);
+  onert::exec::Execution execution{executors};
  
-  execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
-  execution->setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
-  execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
-  execution->startExecute();
-  execution->waitFinish();
+  execution.setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
+  execution.setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
+  execution.setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
+  execution.startExecute();
+  execution.waitFinish();
  
    for (auto i = 0; i < 4; i++)
    {
      EXPECT_EQ(output_buffer[i], output_expected[i]);
    }
-
-  delete execution;
  }
  
  } // namespace
diff --git a/runtime/onert/test/graph/operand/UseDef.cc b/runtime/onert/test/graph/operand/UseDef.cc

index 3e8b14b..cd2cdb7 100644 (file)
--- a/runtime/onert/test/graph/operand/UseDef.cc
+++ b/runtime/onert/test/graph/operand/UseDef.cc
@@ -65,12 +65,12 @@ TEST(graph_operand_usedef, usedef_test)
    ASSERT_EQ(verifier.verify(graph), true);
  
    // Check def
-  ASSERT_EQ(graph.operands().at(operand_index1).getDef().contains(mocknode_index1), true);
-  ASSERT_EQ(graph.operands().at(operand_index2).getDef().contains(mocknode_index2), true);
-  ASSERT_EQ(graph.operands().at(output_operand).getDef().contains(multiinput_index), true);
+  ASSERT_EQ(graph.operands().at(operand_index1).getDef(), mocknode_index1);
+  ASSERT_EQ(graph.operands().at(operand_index2).getDef(), mocknode_index2);
+  ASSERT_EQ(graph.operands().at(output_operand).getDef(), multiinput_index);
  
-  ASSERT_EQ(graph.operands().at(operand_index1).getDef().contains(mocknode_index2), false);
-  ASSERT_EQ(graph.operands().at(operand_index1).getDef().contains(multiinput_index), false);
+  ASSERT_NE(graph.operands().at(operand_index1).getDef(), mocknode_index2);
+  ASSERT_NE(graph.operands().at(operand_index1).getDef(), multiinput_index);
  
    // Check use
    ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index1), true);
diff --git a/tests/custom_op/FillFrom/CMakeLists.txt b/tests/custom_op/FillFrom/CMakeLists.txt

index ba03e30..91349de 100644 (file)
--- a/tests/custom_op/FillFrom/CMakeLists.txt
+++ b/tests/custom_op/FillFrom/CMakeLists.txt
@@ -2,6 +2,6 @@ add_nnfw_custom_op_kernel(FillFrom ON kernels/FillFromKernel.cc)
  add_nnfw_custom_op_app(FillFrom_runner
                         SOURCES FillFrom_runner.cc
                         KERNELS FillFrom)
-install(TARGETS FillFrom_runner DESTINATION tests)
-install(DIRECTORY nnpkgs/FillFrom DESTINATION tests/nnpkgs)
-install_nnfw_custom_op_kernel(FillFrom tests/nnpkgs/FillFrom)
+install(TARGETS FillFrom_runner DESTINATION test)
+install(DIRECTORY nnpkgs/FillFrom DESTINATION test/nnpkgs)
+install_nnfw_custom_op_kernel(FillFrom test/nnpkgs/FillFrom)
diff --git a/tests/custom_op/FillFrom/FillFrom_runner.cc b/tests/custom_op/FillFrom/FillFrom_runner.cc

index 82e25fa..7313086 100644 (file)
--- a/tests/custom_op/FillFrom/FillFrom_runner.cc
+++ b/tests/custom_op/FillFrom/FillFrom_runner.cc
@@ -15,7 +15,7 @@
   */
  
  #include "nnfw.h"
-#include "nnfw_dev.h"
+#include "nnfw_experimental.h"
  
  #include <cassert>
  #include <iostream>
diff --git a/tests/custom_op/FillFrom/kernels/FillFromKernel.cc b/tests/custom_op/FillFrom/kernels/FillFromKernel.cc

index 6771e68..6015b3b 100644 (file)
--- a/tests/custom_op/FillFrom/kernels/FillFromKernel.cc
+++ b/tests/custom_op/FillFrom/kernels/FillFromKernel.cc
@@ -14,7 +14,7 @@
   * limitations under the License.
   */
  
-#include "nnfw_dev.h"
+#include "nnfw_experimental.h"
  
  #include "flatbuffers/flexbuffers.h"
  
diff --git a/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_cl b/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_cl

index e50b941..3a6b40d 100644 (file)
--- a/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_cl
+++ b/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_cl
@@ -23,8 +23,8 @@ GeneratedTests.cast_float16_to_quant8
  GeneratedTests.cast_float16_to_quant8_overflow
  GeneratedTests.cast_float32_to_float16
  GeneratedTests.cast_float32_to_float16_relaxed
+GeneratedTests.cast_float32_to_int32_nnfw
  GeneratedTests.cast_int32_to_float16
-GeneratedTests.cast_int32_to_quant8_overflow
  GeneratedTests.cast_quant8_to_float16
  GeneratedTests.concat_dynamic_nnfw
  GeneratedTests.conv_dynamic_nnfw
@@ -68,6 +68,7 @@ GeneratedTests.gather_float16_7
  GeneratedTests.gather_float16_8
  GeneratedTests.greater_dynamic_float_nnfw
  GeneratedTests.greater_equal_dynamic_float_nnfw
+GeneratedTests.l2_normalization_quant8_nnfw
  GeneratedTests.less_dynamic_float_nnfw
  GeneratedTests.less_equal_dynamic_float_nnfw
  GeneratedTests.log_4D_float_nnfw
@@ -106,11 +107,26 @@ GeneratedTests.not_equal_dynamic_float_nnfw
  GeneratedTests.one_hot_ex_dynamic_nnfw
  GeneratedTests.pack_ex_dynamic_nnfw
  GeneratedTests.pad_dynamic_nnfw
+GeneratedTests.pad_v2_1_float
+GeneratedTests.pad_v2_1_quant8
+GeneratedTests.pad_v2_all_dims
+GeneratedTests.pad_v2_all_dims_quant8
+GeneratedTests.pad_v2_low_rank
+GeneratedTests.pad_v2_low_rank_quant8
  GeneratedTests.pow_2D_float_nnfw
  GeneratedTests.pow_broadcast_float_nnfw
  GeneratedTests.pow_broadcast_float_nnfw_2
  GeneratedTests.pow_broadcast_float_nnfw_3
  GeneratedTests.pow_dynamic_nnfw
+GeneratedTests.quantize_quant8
+GeneratedTests.quantize_quant8_2
+GeneratedTests.quantize_quant8_3
+GeneratedTests.quantize_quant8_4
+GeneratedTests.quantize_quant8_5
+GeneratedTests.quantize_quant8_6
+GeneratedTests.quantize_quant8_7
+GeneratedTests.quantize_quant8_8
+GeneratedTests.quantize_zero_sized
  GeneratedTests.range_ex_float_1
  GeneratedTests.range_ex_float_1_all_constant_inputs
  GeneratedTests.range_ex_float_1_dynamic_nnfw
@@ -184,9 +200,21 @@ GeneratedTests.slice_zero_sized_quant8
  GeneratedTests.softmax_dynamic_nnfw
  GeneratedTests.space_to_batch_dynamic_float_nnfw
  GeneratedTests.split_dynamic_float_nnfw
+GeneratedTests.split_v_ex_1D_float_1_nnfw
+GeneratedTests.split_v_ex_1D_float_2_nnfw
+GeneratedTests.split_v_ex_1D_int32_nnfw
+GeneratedTests.split_v_ex_4D_float_1_nnfw
+GeneratedTests.split_v_ex_4D_float_2_nnfw
+GeneratedTests.split_v_ex_4D_float_3_nnfw
+GeneratedTests.split_v_ex_4D_float_4_nnfw
+GeneratedTests.split_v_ex_4D_int32_1_nnfw
+GeneratedTests.split_v_ex_4D_int32_2_nnfw
+GeneratedTests.split_v_ex_4D_int32_3_nnfw
+GeneratedTests.split_v_ex_4D_int32_4_nnfw
  GeneratedTests.sqrt_
  GeneratedTests.squared_difference_ex_dynamic_nnfw
  GeneratedTests.squeeze_dynamic_float_nnfw
+GeneratedTests.stateless_random_uniform_ex_nnfw
  GeneratedTests.strided_slice_dynamic_nnfw
  GeneratedTests.sub_dynamic_nnfw
  GeneratedTests.sub_v1_2_zero_sized
diff --git a/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon b/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon

index c9edee5..f4bd48b 100644 (file)
--- a/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon
+++ b/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon
@@ -23,10 +23,7 @@ GeneratedTests.cast_float16_to_quant8
  GeneratedTests.cast_float16_to_quant8_overflow
  GeneratedTests.cast_float32_to_float16
  GeneratedTests.cast_float32_to_float16_relaxed
-GeneratedTests.cast_float32_to_quant8_overflow
-GeneratedTests.cast_float32_to_quant8_overflow_relaxed
  GeneratedTests.cast_int32_to_float16
-GeneratedTests.cast_int32_to_quant8_overflow
  GeneratedTests.cast_quant8_to_float16
  GeneratedTests.concat_dynamic_nnfw
  GeneratedTests.conv_dynamic_nnfw
@@ -73,6 +70,7 @@ GeneratedTests.gather_float16_8
  GeneratedTests.greater_dynamic_float_nnfw
  GeneratedTests.greater_equal_boolean
  GeneratedTests.greater_equal_dynamic_float_nnfw
+GeneratedTests.l2_normalization_quant8_nnfw
  GeneratedTests.less_boolean
  GeneratedTests.less_dynamic_float_nnfw
  GeneratedTests.less_equal_dynamic_float_nnfw
@@ -112,11 +110,26 @@ GeneratedTests.not_equal_dynamic_float_nnfw
  GeneratedTests.one_hot_ex_dynamic_nnfw
  GeneratedTests.pack_ex_dynamic_nnfw
  GeneratedTests.pad_dynamic_nnfw
+GeneratedTests.pad_v2_1_float
+GeneratedTests.pad_v2_1_quant8
+GeneratedTests.pad_v2_all_dims
+GeneratedTests.pad_v2_all_dims_quant8
+GeneratedTests.pad_v2_low_rank
+GeneratedTests.pad_v2_low_rank_quant8
  GeneratedTests.pow_2D_float_nnfw
  GeneratedTests.pow_broadcast_float_nnfw
  GeneratedTests.pow_broadcast_float_nnfw_2
  GeneratedTests.pow_broadcast_float_nnfw_3
  GeneratedTests.pow_dynamic_nnfw
+GeneratedTests.quantize_quant8
+GeneratedTests.quantize_quant8_2
+GeneratedTests.quantize_quant8_3
+GeneratedTests.quantize_quant8_4
+GeneratedTests.quantize_quant8_5
+GeneratedTests.quantize_quant8_6
+GeneratedTests.quantize_quant8_7
+GeneratedTests.quantize_quant8_8
+GeneratedTests.quantize_zero_sized
  GeneratedTests.range_ex_float_1
  GeneratedTests.range_ex_float_1_all_constant_inputs
  GeneratedTests.range_ex_float_1_dynamic_nnfw
@@ -199,9 +212,21 @@ GeneratedTests.space_to_batch_quant8_2
  GeneratedTests.space_to_batch_quant8_2_nnfw
  GeneratedTests.space_to_batch_quant8_3
  GeneratedTests.split_dynamic_float_nnfw
+GeneratedTests.split_v_ex_1D_float_1_nnfw
+GeneratedTests.split_v_ex_1D_float_2_nnfw
+GeneratedTests.split_v_ex_1D_int32_nnfw
+GeneratedTests.split_v_ex_4D_float_1_nnfw
+GeneratedTests.split_v_ex_4D_float_2_nnfw
+GeneratedTests.split_v_ex_4D_float_3_nnfw
+GeneratedTests.split_v_ex_4D_float_4_nnfw
+GeneratedTests.split_v_ex_4D_int32_1_nnfw
+GeneratedTests.split_v_ex_4D_int32_2_nnfw
+GeneratedTests.split_v_ex_4D_int32_3_nnfw
+GeneratedTests.split_v_ex_4D_int32_4_nnfw
  GeneratedTests.sqrt_
  GeneratedTests.squared_difference_ex_dynamic_nnfw
  GeneratedTests.squeeze_dynamic_float_nnfw
+GeneratedTests.stateless_random_uniform_ex_nnfw
  GeneratedTests.strided_slice_dynamic_nnfw
  GeneratedTests.sub_dynamic_nnfw
  GeneratedTests.sub_v1_2_zero_sized
diff --git a/tests/nnapi/nnapi_gtest.skip.aarch64-linux.cpu b/tests/nnapi/nnapi_gtest.skip.aarch64-linux.cpu

index 3cce4f3..e98007e 100644 (file)
--- a/tests/nnapi/nnapi_gtest.skip.aarch64-linux.cpu
+++ b/tests/nnapi/nnapi_gtest.skip.aarch64-linux.cpu
@@ -1,7 +1,4 @@
  GeneratedTests.abs_
-GeneratedTests.batch_to_space
-GeneratedTests.batch_to_space_float_1
-GeneratedTests.batch_to_space_quant8_1
  GeneratedTests.cast_float16_to_float16
  GeneratedTests.cast_float16_to_float32
  GeneratedTests.cast_float16_to_float32_relaxed
@@ -38,9 +35,6 @@ GeneratedTests.gather_float16_8
  GeneratedTests.hashtable_lookup_float
  GeneratedTests.hashtable_lookup_float_4D_nnfw
  GeneratedTests.hashtable_lookup_quant8
-GeneratedTests.l2_normalization
-GeneratedTests.l2_normalization_2
-GeneratedTests.l2_normalization_large
  GeneratedTests.l2_pool_float
  GeneratedTests.l2_pool_float_2
  GeneratedTests.l2_pool_float_large
@@ -79,7 +73,6 @@ GeneratedTests.minimum_simple_quant8
  GeneratedTests.neg
  GeneratedTests.neg_3D_int_nnfw
  GeneratedTests.neg_4D_int_nnfw
-GeneratedTests.pad_quant8_nnfw
  GeneratedTests.prelu
  GeneratedTests.prelu_broadcast_float_1_nnfw
  GeneratedTests.prelu_broadcast_quant8_1_nnfw
@@ -94,6 +87,11 @@ GeneratedTests.prelu_weight_as_input_quant8
  GeneratedTests.prelu_weight_as_input_quant8_2
  GeneratedTests.prelu_weight_as_input_quant8_3
  GeneratedTests.prelu_weight_as_input_quant8_4
+GeneratedTests.quantize_quant8_5
+GeneratedTests.quantize_quant8_6
+GeneratedTests.quantize_quant8_7
+GeneratedTests.quantize_quant8_8
+GeneratedTests.quantize_zero_sized
  GeneratedTests.reduce_max_quant8
  GeneratedTests.reduce_max_quant8_1_nnfw
  GeneratedTests.reduce_max_quant8_2
@@ -108,14 +106,10 @@ GeneratedTests.relu1_float_1
  GeneratedTests.relu1_float_2
  GeneratedTests.relu1_quant8_1
  GeneratedTests.relu1_quant8_2
-GeneratedTests.relu6_float_1
-GeneratedTests.relu6_float_2
  GeneratedTests.relu6_quant8_1
  GeneratedTests.relu6_quant8_2
  GeneratedTests.relu_quant8_1
  GeneratedTests.relu_quant8_2
-GeneratedTests.resize_bilinear
-GeneratedTests.resize_bilinear_2
  GeneratedTests.rnn
  GeneratedTests.rnn_state
  GeneratedTests.rsqrt
@@ -125,15 +119,9 @@ GeneratedTests.select_v1_2_one_dim_quant8
  GeneratedTests.select_v1_2_two_dim_quant8
  GeneratedTests.slice_5
  GeneratedTests.slice_6
-GeneratedTests.slice_7
  GeneratedTests.slice_8
  GeneratedTests.slice_zero_sized
  GeneratedTests.slice_zero_sized_quant8
-GeneratedTests.space_to_depth_float_1
-GeneratedTests.space_to_depth_float_2
-GeneratedTests.space_to_depth_float_3
-GeneratedTests.space_to_depth_quant8_1
-GeneratedTests.space_to_depth_quant8_2
  GeneratedTests.sqrt_
  GeneratedTests.sqrt_1D_float_nnfw
  GeneratedTests.sqrt_2D_float_nnfw
diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_cl b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_cl

index e50b941..3a6b40d 100644 (file)
--- a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_cl
+++ b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_cl
@@ -23,8 +23,8 @@ GeneratedTests.cast_float16_to_quant8
  GeneratedTests.cast_float16_to_quant8_overflow
  GeneratedTests.cast_float32_to_float16
  GeneratedTests.cast_float32_to_float16_relaxed
+GeneratedTests.cast_float32_to_int32_nnfw
  GeneratedTests.cast_int32_to_float16
-GeneratedTests.cast_int32_to_quant8_overflow
  GeneratedTests.cast_quant8_to_float16
  GeneratedTests.concat_dynamic_nnfw
  GeneratedTests.conv_dynamic_nnfw
@@ -68,6 +68,7 @@ GeneratedTests.gather_float16_7
  GeneratedTests.gather_float16_8
  GeneratedTests.greater_dynamic_float_nnfw
  GeneratedTests.greater_equal_dynamic_float_nnfw
+GeneratedTests.l2_normalization_quant8_nnfw
  GeneratedTests.less_dynamic_float_nnfw
  GeneratedTests.less_equal_dynamic_float_nnfw
  GeneratedTests.log_4D_float_nnfw
@@ -106,11 +107,26 @@ GeneratedTests.not_equal_dynamic_float_nnfw
  GeneratedTests.one_hot_ex_dynamic_nnfw
  GeneratedTests.pack_ex_dynamic_nnfw
  GeneratedTests.pad_dynamic_nnfw
+GeneratedTests.pad_v2_1_float
+GeneratedTests.pad_v2_1_quant8
+GeneratedTests.pad_v2_all_dims
+GeneratedTests.pad_v2_all_dims_quant8
+GeneratedTests.pad_v2_low_rank
+GeneratedTests.pad_v2_low_rank_quant8
  GeneratedTests.pow_2D_float_nnfw
  GeneratedTests.pow_broadcast_float_nnfw
  GeneratedTests.pow_broadcast_float_nnfw_2
  GeneratedTests.pow_broadcast_float_nnfw_3
  GeneratedTests.pow_dynamic_nnfw
+GeneratedTests.quantize_quant8
+GeneratedTests.quantize_quant8_2
+GeneratedTests.quantize_quant8_3
+GeneratedTests.quantize_quant8_4
+GeneratedTests.quantize_quant8_5
+GeneratedTests.quantize_quant8_6
+GeneratedTests.quantize_quant8_7
+GeneratedTests.quantize_quant8_8
+GeneratedTests.quantize_zero_sized
  GeneratedTests.range_ex_float_1
  GeneratedTests.range_ex_float_1_all_constant_inputs
  GeneratedTests.range_ex_float_1_dynamic_nnfw
@@ -184,9 +200,21 @@ GeneratedTests.slice_zero_sized_quant8
  GeneratedTests.softmax_dynamic_nnfw
  GeneratedTests.space_to_batch_dynamic_float_nnfw
  GeneratedTests.split_dynamic_float_nnfw
+GeneratedTests.split_v_ex_1D_float_1_nnfw
+GeneratedTests.split_v_ex_1D_float_2_nnfw
+GeneratedTests.split_v_ex_1D_int32_nnfw
+GeneratedTests.split_v_ex_4D_float_1_nnfw
+GeneratedTests.split_v_ex_4D_float_2_nnfw
+GeneratedTests.split_v_ex_4D_float_3_nnfw
+GeneratedTests.split_v_ex_4D_float_4_nnfw
+GeneratedTests.split_v_ex_4D_int32_1_nnfw
+GeneratedTests.split_v_ex_4D_int32_2_nnfw
+GeneratedTests.split_v_ex_4D_int32_3_nnfw
+GeneratedTests.split_v_ex_4D_int32_4_nnfw
  GeneratedTests.sqrt_
  GeneratedTests.squared_difference_ex_dynamic_nnfw
  GeneratedTests.squeeze_dynamic_float_nnfw
+GeneratedTests.stateless_random_uniform_ex_nnfw
  GeneratedTests.strided_slice_dynamic_nnfw
  GeneratedTests.sub_dynamic_nnfw
  GeneratedTests.sub_v1_2_zero_sized
diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon

index 55cfe39..fcd8b3e 100644 (file)
--- a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon
+++ b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon
@@ -23,10 +23,7 @@ GeneratedTests.cast_float16_to_quant8
  GeneratedTests.cast_float16_to_quant8_overflow
  GeneratedTests.cast_float32_to_float16
  GeneratedTests.cast_float32_to_float16_relaxed
-GeneratedTests.cast_float32_to_quant8_overflow
-GeneratedTests.cast_float32_to_quant8_overflow_relaxed
  GeneratedTests.cast_int32_to_float16
-GeneratedTests.cast_int32_to_quant8_overflow
  GeneratedTests.cast_quant8_to_float16
  GeneratedTests.concat_dynamic_nnfw
  GeneratedTests.conv_dynamic_nnfw
@@ -73,6 +70,7 @@ GeneratedTests.greater_dynamic_float_nnfw
  GeneratedTests.greater_equal_boolean
  GeneratedTests.greater_equal_dynamic_float_nnfw
  GeneratedTests.less_boolean
+GeneratedTests.l2_normalization_quant8_nnfw
  GeneratedTests.less_dynamic_float_nnfw
  GeneratedTests.less_equal_dynamic_float_nnfw
  GeneratedTests.log_4D_float_nnfw
@@ -111,11 +109,26 @@ GeneratedTests.not_equal_dynamic_float_nnfw
  GeneratedTests.one_hot_ex_dynamic_nnfw
  GeneratedTests.pack_ex_dynamic_nnfw
  GeneratedTests.pad_dynamic_nnfw
+GeneratedTests.pad_v2_1_float
+GeneratedTests.pad_v2_1_quant8
+GeneratedTests.pad_v2_all_dims
+GeneratedTests.pad_v2_all_dims_quant8
+GeneratedTests.pad_v2_low_rank
+GeneratedTests.pad_v2_low_rank_quant8
  GeneratedTests.pow_2D_float_nnfw
  GeneratedTests.pow_broadcast_float_nnfw
  GeneratedTests.pow_broadcast_float_nnfw_2
  GeneratedTests.pow_broadcast_float_nnfw_3
  GeneratedTests.pow_dynamic_nnfw
+GeneratedTests.quantize_quant8
+GeneratedTests.quantize_quant8_2
+GeneratedTests.quantize_quant8_3
+GeneratedTests.quantize_quant8_4
+GeneratedTests.quantize_quant8_5
+GeneratedTests.quantize_quant8_6
+GeneratedTests.quantize_quant8_7
+GeneratedTests.quantize_quant8_8
+GeneratedTests.quantize_zero_sized
  GeneratedTests.range_ex_float_1
  GeneratedTests.range_ex_float_1_all_constant_inputs
  GeneratedTests.range_ex_float_1_dynamic_nnfw
@@ -191,9 +204,21 @@ GeneratedTests.slice_zero_sized_quant8
  GeneratedTests.softmax_dynamic_nnfw
  GeneratedTests.space_to_batch_dynamic_float_nnfw
  GeneratedTests.split_dynamic_float_nnfw
+GeneratedTests.split_v_ex_1D_float_1_nnfw
+GeneratedTests.split_v_ex_1D_float_2_nnfw
+GeneratedTests.split_v_ex_1D_int32_nnfw
+GeneratedTests.split_v_ex_4D_float_1_nnfw
+GeneratedTests.split_v_ex_4D_float_2_nnfw
+GeneratedTests.split_v_ex_4D_float_3_nnfw
+GeneratedTests.split_v_ex_4D_float_4_nnfw
+GeneratedTests.split_v_ex_4D_int32_1_nnfw
+GeneratedTests.split_v_ex_4D_int32_2_nnfw
+GeneratedTests.split_v_ex_4D_int32_3_nnfw
+GeneratedTests.split_v_ex_4D_int32_4_nnfw
  GeneratedTests.sqrt_
  GeneratedTests.squared_difference_ex_dynamic_nnfw
  GeneratedTests.squeeze_dynamic_float_nnfw
+GeneratedTests.stateless_random_uniform_ex_nnfw
  GeneratedTests.strided_slice_dynamic_nnfw
  GeneratedTests.sub_dynamic_nnfw
  GeneratedTests.sub_v1_2_zero_sized
diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.cpu b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.cpu

index 3cce4f3..e98007e 100644 (file)
--- a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.cpu
+++ b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.cpu
@@ -1,7 +1,4 @@
  GeneratedTests.abs_
-GeneratedTests.batch_to_space
-GeneratedTests.batch_to_space_float_1
-GeneratedTests.batch_to_space_quant8_1
  GeneratedTests.cast_float16_to_float16
  GeneratedTests.cast_float16_to_float32
  GeneratedTests.cast_float16_to_float32_relaxed
@@ -38,9 +35,6 @@ GeneratedTests.gather_float16_8
  GeneratedTests.hashtable_lookup_float
  GeneratedTests.hashtable_lookup_float_4D_nnfw
  GeneratedTests.hashtable_lookup_quant8
-GeneratedTests.l2_normalization
-GeneratedTests.l2_normalization_2
-GeneratedTests.l2_normalization_large
  GeneratedTests.l2_pool_float
  GeneratedTests.l2_pool_float_2
  GeneratedTests.l2_pool_float_large
@@ -79,7 +73,6 @@ GeneratedTests.minimum_simple_quant8
  GeneratedTests.neg
  GeneratedTests.neg_3D_int_nnfw
  GeneratedTests.neg_4D_int_nnfw
-GeneratedTests.pad_quant8_nnfw
  GeneratedTests.prelu
  GeneratedTests.prelu_broadcast_float_1_nnfw
  GeneratedTests.prelu_broadcast_quant8_1_nnfw
@@ -94,6 +87,11 @@ GeneratedTests.prelu_weight_as_input_quant8
  GeneratedTests.prelu_weight_as_input_quant8_2
  GeneratedTests.prelu_weight_as_input_quant8_3
  GeneratedTests.prelu_weight_as_input_quant8_4
+GeneratedTests.quantize_quant8_5
+GeneratedTests.quantize_quant8_6
+GeneratedTests.quantize_quant8_7
+GeneratedTests.quantize_quant8_8
+GeneratedTests.quantize_zero_sized
  GeneratedTests.reduce_max_quant8
  GeneratedTests.reduce_max_quant8_1_nnfw
  GeneratedTests.reduce_max_quant8_2
@@ -108,14 +106,10 @@ GeneratedTests.relu1_float_1
  GeneratedTests.relu1_float_2
  GeneratedTests.relu1_quant8_1
  GeneratedTests.relu1_quant8_2
-GeneratedTests.relu6_float_1
-GeneratedTests.relu6_float_2
  GeneratedTests.relu6_quant8_1
  GeneratedTests.relu6_quant8_2
  GeneratedTests.relu_quant8_1
  GeneratedTests.relu_quant8_2
-GeneratedTests.resize_bilinear
-GeneratedTests.resize_bilinear_2
  GeneratedTests.rnn
  GeneratedTests.rnn_state
  GeneratedTests.rsqrt
@@ -125,15 +119,9 @@ GeneratedTests.select_v1_2_one_dim_quant8
  GeneratedTests.select_v1_2_two_dim_quant8
  GeneratedTests.slice_5
  GeneratedTests.slice_6
-GeneratedTests.slice_7
  GeneratedTests.slice_8
  GeneratedTests.slice_zero_sized
  GeneratedTests.slice_zero_sized_quant8
-GeneratedTests.space_to_depth_float_1
-GeneratedTests.space_to_depth_float_2
-GeneratedTests.space_to_depth_float_3
-GeneratedTests.space_to_depth_quant8_1
-GeneratedTests.space_to_depth_quant8_2
  GeneratedTests.sqrt_
  GeneratedTests.sqrt_1D_float_nnfw
  GeneratedTests.sqrt_2D_float_nnfw
diff --git a/tests/nnapi/nnapi_gtest.skip.noarch.interp b/tests/nnapi/nnapi_gtest.skip.noarch.interp

index 08118ca..a0ae9d3 100644 (file)
--- a/tests/nnapi/nnapi_gtest.skip.noarch.interp
+++ b/tests/nnapi/nnapi_gtest.skip.noarch.interp
@@ -188,6 +188,7 @@ GeneratedTests.hashtable_lookup_quant8
  GeneratedTests.l2_normalization
  GeneratedTests.l2_normalization_2
  GeneratedTests.l2_normalization_large
+GeneratedTests.l2_normalization_quant8_nnfw
  GeneratedTests.l2_pool_float
  GeneratedTests.l2_pool_float_2
  GeneratedTests.l2_pool_float_large
@@ -312,6 +313,12 @@ GeneratedTests.pack_ex_2D_int_2
  GeneratedTests.pack_ex_dynamic_nnfw
  GeneratedTests.pad_dynamic_nnfw
  GeneratedTests.pad_quant8_nnfw
+GeneratedTests.pad_v2_1_float
+GeneratedTests.pad_v2_1_quant8
+GeneratedTests.pad_v2_all_dims
+GeneratedTests.pad_v2_all_dims_quant8
+GeneratedTests.pad_v2_low_rank
+GeneratedTests.pad_v2_low_rank_quant8
  GeneratedTests.pow_2D_float_nnfw
  GeneratedTests.pow_broadcast_float_nnfw
  GeneratedTests.pow_broadcast_float_nnfw_2
@@ -331,6 +338,15 @@ GeneratedTests.prelu_weight_as_input_quant8
  GeneratedTests.prelu_weight_as_input_quant8_2
  GeneratedTests.prelu_weight_as_input_quant8_3
  GeneratedTests.prelu_weight_as_input_quant8_4
+GeneratedTests.quantize_quant8
+GeneratedTests.quantize_quant8_2
+GeneratedTests.quantize_quant8_3
+GeneratedTests.quantize_quant8_4
+GeneratedTests.quantize_quant8_5
+GeneratedTests.quantize_quant8_6
+GeneratedTests.quantize_quant8_7
+GeneratedTests.quantize_quant8_8
+GeneratedTests.quantize_zero_sized
  GeneratedTests.range_ex_float_1
  GeneratedTests.range_ex_float_1_all_constant_inputs
  GeneratedTests.range_ex_float_1_dynamic_nnfw
@@ -407,6 +423,7 @@ GeneratedTests.relu_quant8_2
  GeneratedTests.reshape_dynamic_nnfw
  GeneratedTests.resize_bilinear
  GeneratedTests.resize_bilinear_2
+GeneratedTests.resize_bilinear_quant8_nnfw
  GeneratedTests.reverse_ex_1d
  GeneratedTests.reverse_ex_3d
  GeneratedTests.reverse_ex_dynamic_1D
@@ -499,6 +516,17 @@ GeneratedTests.split_quant8_2
  GeneratedTests.split_quant8_2_relaxed
  GeneratedTests.split_quant8_3
  GeneratedTests.split_quant8_4
+GeneratedTests.split_v_ex_1D_float_1_nnfw
+GeneratedTests.split_v_ex_1D_float_2_nnfw
+GeneratedTests.split_v_ex_1D_int32_nnfw
+GeneratedTests.split_v_ex_4D_float_1_nnfw
+GeneratedTests.split_v_ex_4D_float_2_nnfw
+GeneratedTests.split_v_ex_4D_float_3_nnfw
+GeneratedTests.split_v_ex_4D_float_4_nnfw
+GeneratedTests.split_v_ex_4D_int32_1_nnfw
+GeneratedTests.split_v_ex_4D_int32_2_nnfw
+GeneratedTests.split_v_ex_4D_int32_3_nnfw
+GeneratedTests.split_v_ex_4D_int32_4_nnfw
  GeneratedTests.sqrt_
  GeneratedTests.sqrt_1D_float_nnfw
  GeneratedTests.sqrt_2D_float_nnfw
@@ -518,6 +546,7 @@ GeneratedTests.squeeze_float_1
  GeneratedTests.squeeze_float_1_relaxed
  GeneratedTests.squeeze_quant8_1
  GeneratedTests.squeeze_relaxed
+GeneratedTests.stateless_random_uniform_ex_nnfw
  GeneratedTests.strided_slice
  GeneratedTests.strided_slice_dynamic_nnfw
  GeneratedTests.strided_slice_float_1
diff --git a/tests/nnapi/nnapi_gtest.skip.x86_64-linux.cpu b/tests/nnapi/nnapi_gtest.skip.x86_64-linux.cpu

index 3cce4f3..e98007e 100644 (file)
--- a/tests/nnapi/nnapi_gtest.skip.x86_64-linux.cpu
+++ b/tests/nnapi/nnapi_gtest.skip.x86_64-linux.cpu
@@ -1,7 +1,4 @@
  GeneratedTests.abs_
-GeneratedTests.batch_to_space
-GeneratedTests.batch_to_space_float_1
-GeneratedTests.batch_to_space_quant8_1
  GeneratedTests.cast_float16_to_float16
  GeneratedTests.cast_float16_to_float32
  GeneratedTests.cast_float16_to_float32_relaxed
@@ -38,9 +35,6 @@ GeneratedTests.gather_float16_8
  GeneratedTests.hashtable_lookup_float
  GeneratedTests.hashtable_lookup_float_4D_nnfw
  GeneratedTests.hashtable_lookup_quant8
-GeneratedTests.l2_normalization
-GeneratedTests.l2_normalization_2
-GeneratedTests.l2_normalization_large
  GeneratedTests.l2_pool_float
  GeneratedTests.l2_pool_float_2
  GeneratedTests.l2_pool_float_large
@@ -79,7 +73,6 @@ GeneratedTests.minimum_simple_quant8
  GeneratedTests.neg
  GeneratedTests.neg_3D_int_nnfw
  GeneratedTests.neg_4D_int_nnfw
-GeneratedTests.pad_quant8_nnfw
  GeneratedTests.prelu
  GeneratedTests.prelu_broadcast_float_1_nnfw
  GeneratedTests.prelu_broadcast_quant8_1_nnfw
@@ -94,6 +87,11 @@ GeneratedTests.prelu_weight_as_input_quant8
  GeneratedTests.prelu_weight_as_input_quant8_2
  GeneratedTests.prelu_weight_as_input_quant8_3
  GeneratedTests.prelu_weight_as_input_quant8_4
+GeneratedTests.quantize_quant8_5
+GeneratedTests.quantize_quant8_6
+GeneratedTests.quantize_quant8_7
+GeneratedTests.quantize_quant8_8
+GeneratedTests.quantize_zero_sized
  GeneratedTests.reduce_max_quant8
  GeneratedTests.reduce_max_quant8_1_nnfw
  GeneratedTests.reduce_max_quant8_2
@@ -108,14 +106,10 @@ GeneratedTests.relu1_float_1
  GeneratedTests.relu1_float_2
  GeneratedTests.relu1_quant8_1
  GeneratedTests.relu1_quant8_2
-GeneratedTests.relu6_float_1
-GeneratedTests.relu6_float_2
  GeneratedTests.relu6_quant8_1
  GeneratedTests.relu6_quant8_2
  GeneratedTests.relu_quant8_1
  GeneratedTests.relu_quant8_2
-GeneratedTests.resize_bilinear
-GeneratedTests.resize_bilinear_2
  GeneratedTests.rnn
  GeneratedTests.rnn_state
  GeneratedTests.rsqrt
@@ -125,15 +119,9 @@ GeneratedTests.select_v1_2_one_dim_quant8
  GeneratedTests.select_v1_2_two_dim_quant8
  GeneratedTests.slice_5
  GeneratedTests.slice_6
-GeneratedTests.slice_7
  GeneratedTests.slice_8
  GeneratedTests.slice_zero_sized
  GeneratedTests.slice_zero_sized_quant8
-GeneratedTests.space_to_depth_float_1
-GeneratedTests.space_to_depth_float_2
-GeneratedTests.space_to_depth_float_3
-GeneratedTests.space_to_depth_quant8_1
-GeneratedTests.space_to_depth_quant8_2
  GeneratedTests.sqrt_
  GeneratedTests.sqrt_1D_float_nnfw
  GeneratedTests.sqrt_2D_float_nnfw
diff --git a/tests/nnapi/specs/Ex/split_v_ex_1D_float_1_nnfw.mod.py b/tests/nnapi/specs/Ex/split_v_ex_1D_float_1_nnfw.mod.py

new file mode 100644 (file)

index 0000000..6a2b716
--- /dev/null
+++ b/tests/nnapi/specs/Ex/split_v_ex_1D_float_1_nnfw.mod.py
@@ -0,0 +1,47 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_FLOAT32", "{8}")
+size_splits = Input("size_splits", "TENSOR_INT32", "{8}")
+split_dim = Input("split_dim", "TENSOR_INT32", "{1}")
+num_splits = Int32Scalar("num_splits", 8)
+
+i2 = Output("op2", "TENSOR_FLOAT32", "{1}")
+i3 = Output("op3", "TENSOR_FLOAT32", "{1}")
+i4 = Output("op4", "TENSOR_FLOAT32", "{1}")
+i5 = Output("op5", "TENSOR_FLOAT32", "{1}")
+i6 = Output("op6", "TENSOR_FLOAT32", "{1}")
+i7 = Output("op7", "TENSOR_FLOAT32", "{1}")
+i8 = Output("op8", "TENSOR_FLOAT32", "{1}")
+i9 = Output("op9", "TENSOR_FLOAT32", "{1}")
+
+model = model.Operation("SPLIT_V_EX", i1, size_splits, split_dim, num_splits).To([i2, i3, i4, i5, i6, i7, i8, i9])
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+          [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
+          size_splits:
+          [1, 1, 1, 1, 1, 1, 1, 1],
+          split_dim:
+          [0]
+          }
+
+output0 = {
+    i2: # output 0
+          [1.0],
+    i3: # output 1
+          [2.0],
+    i4: # output 2
+          [3.0],
+    i5: # output 3
+          [4.0],
+    i6: # output 4
+          [5.0],
+    i7: # output 5
+          [6.0],
+    i8: # output 6
+          [7.0],
+    i9: # output 7
+          [8.0]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/split_v_ex_1D_float_2_nnfw.mod.py b/tests/nnapi/specs/Ex/split_v_ex_1D_float_2_nnfw.mod.py

new file mode 100644 (file)

index 0000000..6224852
--- /dev/null
+++ b/tests/nnapi/specs/Ex/split_v_ex_1D_float_2_nnfw.mod.py
@@ -0,0 +1,25 @@
+# model
+input0 = Input("input0", "TENSOR_FLOAT32", "{12}")
+size_splits = Input("size_splits", "TENSOR_INT32", "{3}")
+split_dim = Input("split_dim", "TENSOR_INT32", "{1}") 
+num_splits = Int32Scalar("num_splits",3);
+
+output0 = Output("output0", "TENSOR_FLOAT32", "{3}")
+output1 = Output("output1", "TENSOR_FLOAT32", "{5}")
+output2 = Output("output2", "TENSOR_FLOAT32", "{4}")
+
+model = Model().Operation("SPLIT_V_EX", input0, size_splits, split_dim, num_splits).To((output0, output1, output2))
+
+# Example 1.
+input_dict = {
+    input0: [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0],
+    size_splits: [3, 5, 4],
+    split_dim: [0]
+}
+output_dict = {
+    output0: [1.0, 2.0, 3.0],
+    output1: [4.0, 5.0, 6.0, 7.0, 8.0],
+    output2: [9.0, 10.0, 11.0, 12.0]
+}
+
+Example((input_dict, output_dict))
diff --git a/tests/nnapi/specs/Ex/split_v_ex_1D_int32_nnfw.mod.py b/tests/nnapi/specs/Ex/split_v_ex_1D_int32_nnfw.mod.py

new file mode 100644 (file)

index 0000000..2dea4d6
--- /dev/null
+++ b/tests/nnapi/specs/Ex/split_v_ex_1D_int32_nnfw.mod.py
@@ -0,0 +1,47 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_INT32", "{8}")
+size_splits = Input("size_splits", "TENSOR_INT32", "{8}")
+split_dim = Input("split_dim", "TENSOR_INT32", "{1}")
+num_splits = Int32Scalar("num_splits", 8)
+
+i2 = Output("op2", "TENSOR_INT32", "{1}")
+i3 = Output("op3", "TENSOR_INT32", "{1}")
+i4 = Output("op4", "TENSOR_INT32", "{1}")
+i5 = Output("op5", "TENSOR_INT32", "{1}")
+i6 = Output("op6", "TENSOR_INT32", "{1}")
+i7 = Output("op7", "TENSOR_INT32", "{1}")
+i8 = Output("op8", "TENSOR_INT32", "{1}")
+i9 = Output("op9", "TENSOR_INT32", "{1}")
+
+model = model.Operation("SPLIT_V_EX", i1, size_splits, split_dim, num_splits).To([i2, i3, i4, i5, i6, i7, i8, i9])
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+          [1, 2, 3, 4, 5, 6, 7, 8],
+          size_splits:
+          [1, 1, 1, 1, 1, 1, 1, 1],
+          split_dim:
+          [0]
+          }
+
+output0 = {
+    i2: # output 0
+          [1],
+    i3: # output 1
+          [2],
+    i4: # output 2
+          [3],
+    i5: # output 3
+          [4],
+    i6: # output 4
+          [5],
+    i7: # output 5
+          [6],
+    i8: # output 6
+          [7],
+    i9: # output 7
+          [8]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/split_v_ex_4D_float_1_nnfw.mod.py b/tests/nnapi/specs/Ex/split_v_ex_4D_float_1_nnfw.mod.py

new file mode 100644 (file)

index 0000000..c53ae1c
--- /dev/null
+++ b/tests/nnapi/specs/Ex/split_v_ex_4D_float_1_nnfw.mod.py
@@ -0,0 +1,28 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_FLOAT32", "{2,2,2,2}")
+size_splits = Input("size_splits", "TENSOR_INT32", "{2}")
+split_dim = Input("split_dim", "TENSOR_INT32", "{1}")
+num_splits = Int32Scalar("num_splits", 2)
+
+i2 = Output("op2", "TENSOR_FLOAT32", "{1,2,2,2}")
+i3 = Output("op3", "TENSOR_FLOAT32", "{1,2,2,2}")
+model = model.Operation("SPLIT_V_EX", i1, size_splits, split_dim, num_splits).To([i2, i3])
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+          [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0],
+          size_splits:
+          [8, 8],
+          split_dim:
+          [0]
+          }
+
+output0 = {
+    i2: # output 0
+          [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
+    i3: # output 1
+          [9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/split_v_ex_4D_float_2_nnfw.mod.py b/tests/nnapi/specs/Ex/split_v_ex_4D_float_2_nnfw.mod.py

new file mode 100644 (file)

index 0000000..593412d
--- /dev/null
+++ b/tests/nnapi/specs/Ex/split_v_ex_4D_float_2_nnfw.mod.py
@@ -0,0 +1,27 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_FLOAT32", "{2,2,2,2}")
+size_splits = Input("size_splits", "TENSOR_INT32", "{2}")
+split_dim = Input("split_dim", "TENSOR_INT32", "{1}")
+num_splits = Int32Scalar("num_splits", 2)
+
+i2 = Output("op2", "TENSOR_FLOAT32", "{2,2,2,1}")
+i3 = Output("op3", "TENSOR_FLOAT32", "{2,2,2,1}")
+model = model.Operation("SPLIT_V_EX", i1, size_splits, split_dim, num_splits).To([i2, i3])
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+          [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0],
+          size_splits:
+          [8, 8],
+          split_dim:
+          [3]}
+
+output0 = {
+    i2: # output 0
+          [1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0],
+    i3: # output 1
+          [2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/split_v_ex_4D_float_3_nnfw.mod.py b/tests/nnapi/specs/Ex/split_v_ex_4D_float_3_nnfw.mod.py

new file mode 100644 (file)

index 0000000..ef77536
--- /dev/null
+++ b/tests/nnapi/specs/Ex/split_v_ex_4D_float_3_nnfw.mod.py
@@ -0,0 +1,28 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_FLOAT32", "{2,2,2,2}")
+size_splits = Input("size_splits", "TENSOR_INT32", "{2}")
+split_dim = Input("split_dim", "TENSOR_INT32", "{1}")
+num_splits = Int32Scalar("num_splits", 2)
+
+i2 = Output("op2", "TENSOR_FLOAT32", "{1,2,2,2}")
+i3 = Output("op3", "TENSOR_FLOAT32", "{1,2,2,2}")
+model = model.Operation("SPLIT_V_EX", i1, size_splits, split_dim, num_splits).To([i2, i3])
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+          [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0],
+          size_splits:
+          [8, 8],
+          split_dim:
+          [-4]
+          }
+
+output0 = {
+    i2: # output 0
+          [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
+    i3: # output 1
+          [9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/split_v_ex_4D_float_4_nnfw.mod.py b/tests/nnapi/specs/Ex/split_v_ex_4D_float_4_nnfw.mod.py

new file mode 100644 (file)

index 0000000..b995f9e
--- /dev/null
+++ b/tests/nnapi/specs/Ex/split_v_ex_4D_float_4_nnfw.mod.py
@@ -0,0 +1,32 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_FLOAT32", "{4,1,1,8}")
+size_splits = Input("size_splits", "TENSOR_INT32", "{3}")
+split_dim = Input("split_dim", "TENSOR_INT32", "{1}")
+num_splits = Int32Scalar("num_splits", 3)
+
+i2 = Output("op2", "TENSOR_FLOAT32", "{4,1,1,2}")
+i3 = Output("op3", "TENSOR_FLOAT32", "{4,1,1,4}")
+i4 = Output("op4", "TENSOR_FLOAT32", "{4,1,1,2}")
+
+model = model.Operation("SPLIT_V_EX", i1, size_splits, split_dim, num_splits).To([i2, i3, i4])
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+          [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+            17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0],
+          size_splits:
+          [2,4,2],
+          split_dim:
+          [3]
+          }
+
+output0 = {
+    i2: # output 0
+          [1.0, 2.0, 9.0, 10.0, 17.0, 18.0, 25.0, 26.0],
+    i3: # output 1
+          [3.0, 4.0, 5.0, 6.0, 11.0, 12.0, 13.0, 14.0, 19.0, 20.0, 21.0, 22.0, 27.0, 28.0, 29.0, 30.0],
+    i4: [7.0, 8.0, 15.0, 16.0, 23.0, 24.0, 31.0, 32.0]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/split_v_ex_4D_int32_1_nnfw.mod.py b/tests/nnapi/specs/Ex/split_v_ex_4D_int32_1_nnfw.mod.py

new file mode 100644 (file)

index 0000000..f544d0a
--- /dev/null
+++ b/tests/nnapi/specs/Ex/split_v_ex_4D_int32_1_nnfw.mod.py
@@ -0,0 +1,27 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_INT32", "{2,2,2,2}")
+size_splits = Input("size_splits", "TENSOR_INT32", "{2}")
+split_dim = Input("split_dim", "TENSOR_INT32", "{1}")
+num_splits = Int32Scalar("num_splits", 2)
+
+i2 = Output("op2", "TENSOR_INT32", "{1,2,2,2}")
+i3 = Output("op3", "TENSOR_INT32", "{1,2,2,2}")
+model = model.Operation("SPLIT_V_EX", i1, size_splits, split_dim, num_splits).To([i2, i3])
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+          [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+          size_splits:
+          [8, 8],
+          split_dim:
+          [0]}
+
+output0 = {
+    i2: # output 0
+          [1, 2, 3, 4, 5, 6, 7, 8],
+    i3: # output 1
+           [9, 10, 11, 12, 13, 14, 15, 16]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/split_v_ex_4D_int32_2_nnfw.mod.py b/tests/nnapi/specs/Ex/split_v_ex_4D_int32_2_nnfw.mod.py

new file mode 100644 (file)

index 0000000..5ed0165
--- /dev/null
+++ b/tests/nnapi/specs/Ex/split_v_ex_4D_int32_2_nnfw.mod.py
@@ -0,0 +1,28 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_INT32", "{2,2,2,2}")
+size_splits = Input("size_splits", "TENSOR_INT32", "{2}")
+split_dim = Input("split_dim", "TENSOR_INT32", "{1}")
+num_splits = Int32Scalar("num_splits", 2)
+
+i2 = Output("op2", "TENSOR_INT32", "{2,1,2,2}")
+i3 = Output("op3", "TENSOR_INT32", "{2,1,2,2}")
+
+model = model.Operation("SPLIT_V_EX", i1, size_splits, split_dim, num_splits).To([i2, i3])
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+          [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+          size_splits:
+          [8, 8],
+          split_dim:
+          [1]}
+
+output0 = {
+    i2: # output 0
+          [1, 2, 3, 4, 9, 10, 11, 12],
+    i3: # output 1
+            [5, 6, 7, 8, 13, 14, 15, 16]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/split_v_ex_4D_int32_3_nnfw.mod.py b/tests/nnapi/specs/Ex/split_v_ex_4D_int32_3_nnfw.mod.py

new file mode 100644 (file)

index 0000000..99f3b4f
--- /dev/null
+++ b/tests/nnapi/specs/Ex/split_v_ex_4D_int32_3_nnfw.mod.py
@@ -0,0 +1,28 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_INT32", "{2,2,2,2}")
+
+size_splits = Input("size_splits", "TENSOR_INT32", "{2}")
+split_dim = Input("split_dim", "TENSOR_INT32", "{1}")
+num_splits = Int32Scalar("num_splits", 2)
+
+i2 = Output("op2", "TENSOR_INT32", "{2,2,1,2}")
+i3 = Output("op3", "TENSOR_INT32", "{2,2,1,2}")
+model = model.Operation("SPLIT_V_EX", i1, size_splits, split_dim, num_splits).To([i2, i3])
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+          [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+          size_splits:
+          [8, 8],
+          split_dim:
+          [2]}
+
+output0 = {
+    i2: # output 0
+          [1, 2, 5, 6, 9, 10, 13, 14],
+    i3: # output 1
+            [3, 4, 7, 8, 11, 12, 15, 16]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/split_v_ex_4D_int32_4_nnfw.mod.py b/tests/nnapi/specs/Ex/split_v_ex_4D_int32_4_nnfw.mod.py

new file mode 100644 (file)

index 0000000..38ae4da
--- /dev/null
+++ b/tests/nnapi/specs/Ex/split_v_ex_4D_int32_4_nnfw.mod.py
@@ -0,0 +1,28 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_INT32", "{2,2,2,2}")
+
+size_splits = Input("size_splits", "TENSOR_INT32", "{2}")
+split_dim = Input("split_dim", "TENSOR_INT32", "{1}")
+num_splits = Int32Scalar("num_splits", 2)
+
+i2 = Output("op2", "TENSOR_INT32", "{2,2,2,1}")
+i3 = Output("op3", "TENSOR_INT32", "{2,2,2,1}")
+model = model.Operation("SPLIT_V_EX", i1, size_splits, split_dim, num_splits).To([i2, i3])
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+          [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+          size_splits:
+          [8, 8],
+          split_dim:
+          [3]}
+
+output0 = {
+    i2: # output 0
+          [1, 3, 5, 7, 9, 11, 13, 15],
+    i3: # output 1
+            [2, 4, 6, 8, 10, 12, 14, 16]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/Ex/stateless_random_uniform_ex_nnfw.mod.py b/tests/nnapi/specs/Ex/stateless_random_uniform_ex_nnfw.mod.py

new file mode 100644 (file)

index 0000000..9c29555
--- /dev/null
+++ b/tests/nnapi/specs/Ex/stateless_random_uniform_ex_nnfw.mod.py
@@ -0,0 +1,40 @@
+#
+# Copyright (C) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+model = Model()
+
+i1 = Input("input1", "TENSOR_INT32", "{1}")
+i2 = Input("input2", "TENSOR_INT32", "{2}")
+
+o1 = Output("output0", "TENSOR_FLOAT32", "{10}")
+
+model = model.Operation("STATELESS_RANDOM_UNIFORM_EX", i1, i2).To(o1)
+
+# Example.
+input0 = {
+  i1 : [10],  #input1
+  i2 : [1, 1] #input2
+}
+
+output0 = {
+  o1: [0.09827709, 0.14063823, 0.4553436,
+      0.10658443, 0.2075988, 0.30841374,
+      0.7489233, 0.90613365, 0.63342273, 
+      0.37854457]
+}
+
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_0/l2_normalization_quant8_nnfw.mod.py b/tests/nnapi/specs/V1_0/l2_normalization_quant8_nnfw.mod.py

new file mode 100644 (file)

index 0000000..ca3770c
--- /dev/null
+++ b/tests/nnapi/specs/V1_0/l2_normalization_quant8_nnfw.mod.py
@@ -0,0 +1,30 @@
+#
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+model = Model()
+in0 = Input("op1", "TENSOR_QUANT8_ASYMM", "{1, 1, 1, 3}, 2e-7, 128")
+out0 = Output("op2", "TENSOR_QUANT8_ASYMM", "{1, 1, 1, 3}, 2e-7, 128")
+model = model.Operation("L2_NORMALIZATION", in0).To(out0)
+
+# Example 1. Input in operand 0,
+input0 = {in0: # input 0
+          [0, 5, 12]}
+output0 = {out0: # output 0
+               [51, 54, 58]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/V1_0/resize_bilinear_quant8_nnfw.mod.py b/tests/nnapi/specs/V1_0/resize_bilinear_quant8_nnfw.mod.py

new file mode 100644 (file)

index 0000000..182e046
--- /dev/null
+++ b/tests/nnapi/specs/V1_0/resize_bilinear_quant8_nnfw.mod.py
@@ -0,0 +1,18 @@
+# model
+model = Model()
+i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 1}, 0.8, 5")
+i2 = Output("op2", "TENSOR_QUANT8_ASYMM", "{1, 3, 3, 1}, 0.8, 5")
+w = Int32Scalar("width", 3)
+h = Int32Scalar("height", 3)
+model = model.Operation("RESIZE_BILINEAR", i1, w, h).To(i2)
+
+# Example 1. Input in operand 0,
+input0 = {i1: # input 0
+          [1, 1, 2, 2]}
+output0 = {i2: # output 0
+           [1, 1, 1,
+            2, 2, 2,
+            2, 2, 2]}
+
+# Instantiate an example
+Example((input0, output0))
diff --git a/tests/nnapi/specs/skip/V1_2/pad_v2_1_float.mod.py b/tests/nnapi/specs/V1_2/pad_v2_1_float.mod.py

similarity index 100%

rename from tests/nnapi/specs/skip/V1_2/pad_v2_1_float.mod.py

rename to tests/nnapi/specs/V1_2/pad_v2_1_float.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/pad_v2_1_quant8.mod.py b/tests/nnapi/specs/V1_2/pad_v2_1_quant8.mod.py

similarity index 100%

rename from tests/nnapi/specs/skip/V1_2/pad_v2_1_quant8.mod.py

rename to tests/nnapi/specs/V1_2/pad_v2_1_quant8.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/pad_v2_all_dims.mod.py b/tests/nnapi/specs/V1_2/pad_v2_all_dims.mod.py

similarity index 100%

rename from tests/nnapi/specs/skip/V1_2/pad_v2_all_dims.mod.py

rename to tests/nnapi/specs/V1_2/pad_v2_all_dims.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/pad_v2_all_dims_quant8.mod.py b/tests/nnapi/specs/V1_2/pad_v2_all_dims_quant8.mod.py

similarity index 100%

rename from tests/nnapi/specs/skip/V1_2/pad_v2_all_dims_quant8.mod.py

rename to tests/nnapi/specs/V1_2/pad_v2_all_dims_quant8.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/pad_v2_low_rank.mod.py b/tests/nnapi/specs/V1_2/pad_v2_low_rank.mod.py

similarity index 100%

rename from tests/nnapi/specs/skip/V1_2/pad_v2_low_rank.mod.py

rename to tests/nnapi/specs/V1_2/pad_v2_low_rank.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/pad_v2_low_rank_quant8.mod.py b/tests/nnapi/specs/V1_2/pad_v2_low_rank_quant8.mod.py

similarity index 100%

rename from tests/nnapi/specs/skip/V1_2/pad_v2_low_rank_quant8.mod.py

rename to tests/nnapi/specs/V1_2/pad_v2_low_rank_quant8.mod.py
diff --git a/tests/nnapi/specs/skip/V1_2/quantize.mod.py b/tests/nnapi/specs/V1_2/quantize.mod.py

similarity index 100%

rename from tests/nnapi/specs/skip/V1_2/quantize.mod.py

rename to tests/nnapi/specs/V1_2/quantize.mod.py
diff --git a/tests/nnfw_api/CMakeLists.txt b/tests/nnfw_api/CMakeLists.txt

index e8d46cb..6e0696d 100644 (file)
--- a/tests/nnfw_api/CMakeLists.txt
+++ b/tests/nnfw_api/CMakeLists.txt
@@ -21,5 +21,6 @@ target_include_directories(${RUNTIME_NNFW_API_TEST} PRIVATE ${RUNTIME_NNFW_API_T
  target_link_libraries(${RUNTIME_NNFW_API_TEST} nnfw-dev)
  target_link_libraries(${RUNTIME_NNFW_API_TEST} gtest gmock)
  target_link_libraries(${RUNTIME_NNFW_API_TEST} ${LIB_PTHREAD} dl)
+target_link_libraries(${RUNTIME_NNFW_API_TEST} circle_schema)
  
  install(TARGETS ${RUNTIME_NNFW_API_TEST} DESTINATION unittest_standalone)
diff --git a/tests/nnfw_api/README.md b/tests/nnfw_api/README.md

index 25fbc6e..7e14fc4 100644 (file)
--- a/tests/nnfw_api/README.md
+++ b/tests/nnfw_api/README.md
@@ -6,6 +6,8 @@ This test framework consists of 3 kinds of tests:
  
  - Validation Tests (fixture format `ValidationTest???`)
      - Basic positive/negative tests with simple nnpackages
+- Generated Model Tests (fixture format `GenModelTest`)
+    - One-time inference test with variety of generated models
  - Regression Tests (fixture format `RegressionTest`, test format `GitHub###`)
      - When you see bugs/crashes while using those API
      - Must refer a github issue
diff --git a/tests/nnfw_api/src/CircleGen.h b/tests/nnfw_api/src/CircleGen.h

new file mode 100644 (file)

index 0000000..899c800
--- /dev/null
+++ b/tests/nnfw_api/src/CircleGen.h
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_API_TEST_CIRCLE_GEN_H__
+#define __NNFW_API_TEST_CIRCLE_GEN_H__
+
+#include <circle_schema_generated.h>
+
+#include <vector>
+
+/**
+ * @brief Class for storing flatbuffer buffer
+ *
+ * This is a simple wrapper for a finished FlatBufferBuilder. It owns the buffer and a user can
+ * get the buffer pointer and size.
+ */
+class CircleBuffer
+{
+public:
+  CircleBuffer() = default;
+  explicit CircleBuffer(flatbuffers::FlatBufferBuilder &&fbb) : _fbb{std::move(fbb)}
+  {
+    _fbb.Finished(); // The build must have been finished, so check that here
+  }
+
+  uint8_t *buffer() { return _fbb.GetBufferPointer(); }
+  size_t size() { return _fbb.GetSize(); }
+
+private:
+  flatbuffers::FlatBufferBuilder _fbb;
+};
+
+/**
+ * @brief Circle flatbuffer file generator
+ *
+ * This is a helper class for generating circle file.
+ *
+ */
+class CircleGen
+{
+public:
+  struct TensorParams
+  {
+    std::vector<int32_t> shape;
+    circle::TensorType tensor_type = circle::TensorType::TensorType_FLOAT32;
+    uint32_t buffer = 0;
+    std::string name;
+  };
+
+  struct OperatorParams
+  {
+    std::vector<int32_t> inputs;
+    std::vector<int32_t> outputs;
+    int version = 1;
+  };
+
+public:
+  CircleGen()
+  {
+    // 0th buffer is always the empty buffer for non-const tensors
+    addBuffer(nullptr, 0);
+  }
+
+  template <typename T> uint32_t addBuffer(const std::vector<T> &buf_vec)
+  {
+    auto buf = reinterpret_cast<const uint8_t *>(buf_vec.data());
+    auto size = buf_vec.size() * sizeof(T);
+    return addBuffer(buf, size);
+  }
+
+  uint32_t addBuffer(const uint8_t *buf, size_t size)
+  {
+    uint32_t ind = _buffers.size();
+    _buffers.emplace_back(buildBuffer(buf, size));
+    return ind;
+  }
+
+  uint32_t addTensor(const TensorParams &params)
+  {
+    int ind = _tensors.size();
+    _tensors.emplace_back(buildTensor(params));
+    return ind;
+  }
+
+  uint32_t setInputsAndOutputs(const std::vector<int> &inputs, const std::vector<int> &outputs)
+  {
+    _inputs = inputs;
+    _outputs = outputs;
+  }
+
+  CircleBuffer finish()
+  {
+    // TODO Support multiple subgraphs, for now only single subgraph model is supported.
+    std::vector<flatbuffers::Offset<circle::SubGraph>> subgraphs{buildSubGraph()};
+    auto model =
+        circle::CreateModelDirect(_fbb, 3, &_opcodes, &subgraphs, "CircleGen generated", &_buffers);
+    _fbb.Finish(model);
+    return CircleBuffer{std::move(_fbb)};
+  }
+
+  // ===== Add Operator methods begin =====
+
+  uint32_t addOperatorAdd(const OperatorParams &params, circle::ActivationFunctionType actfn)
+  {
+    auto options = circle::CreateAddOptions(_fbb, actfn).Union();
+    return addOperatorWithOptions(params, circle::BuiltinOperator_ADD,
+                                  circle::BuiltinOptions_AddOptions, options);
+  }
+
+  uint32_t addOperatorAveragePool2D(const OperatorParams &params, circle::Padding padding,
+                                    int stride_w, int stride_h, int filter_w, int filter_h,
+                                    circle::ActivationFunctionType actfn)
+  {
+    auto options =
+        circle::CreatePool2DOptions(_fbb, padding, stride_w, stride_h, filter_w, filter_h, actfn)
+            .Union();
+    return addOperatorWithOptions(params, circle::BuiltinOperator_AVERAGE_POOL_2D,
+                                  circle::BuiltinOptions_Pool2DOptions, options);
+  }
+
+  // NOTE Please add addOperator functions ABOVE this lie
+  //
+  // %  How to add a new addOperatorXXX fuction
+  // 0. Copy code from one of the existing addOperatorXXX function
+  // 1. Change the function signature (need BuiltinOperator params)
+  // 2. Change enum BuiltinOperator
+  // 3. Change enum BuiltinOptions
+  // 4. Change CreateXXXOptions accordingly
+
+  // ===== Add Operator methods end =====
+
+private:
+  uint32_t addOperatorWithOptions(const OperatorParams &params, circle::BuiltinOperator opcode,
+                                  circle::BuiltinOptions options_type,
+                                  flatbuffers::Offset<void> options)
+  {
+    uint32_t opcode_ind = addOperatorCode(opcode);
+    auto op = circle::CreateOperatorDirect(_fbb, opcode_ind, &params.inputs, &params.outputs,
+                                           options_type, options);
+
+    uint32_t ind = _operators.size();
+    _operators.emplace_back(op);
+    return ind;
+  }
+
+  uint32_t addOperatorCode(circle::BuiltinOperator opcode)
+  {
+    // TODO If the same OperatorCode is registered already, just return it
+    uint32_t ind = _opcodes.size();
+    _opcodes.emplace_back(circle::CreateOperatorCode(_fbb, opcode));
+    return ind;
+  }
+
+  flatbuffers::Offset<circle::Buffer> buildBuffer(const uint8_t *buf, size_t size)
+  {
+    if (buf == nullptr && size == 0)
+      return circle::CreateBuffer(_fbb);
+    auto buffer = _fbb.CreateVector(buf, size);
+    return circle::CreateBuffer(_fbb, buffer);
+  }
+
+  flatbuffers::Offset<circle::Tensor> buildTensor(const TensorParams &params)
+  {
+    auto shape = _fbb.CreateVector(params.shape);
+    auto name = _fbb.CreateString(params.name);
+    return circle::CreateTensor(_fbb, shape, params.tensor_type, params.buffer, name,
+                                0 /* QuantParam */, false /* is_variable */, 0 /* sparsity */,
+                                0 /* shape_signature */);
+  }
+
+  flatbuffers::Offset<circle::SubGraph> buildSubGraph()
+  {
+    return circle::CreateSubGraphDirect(_fbb, &_tensors, &_inputs, &_outputs, &_operators, nullptr);
+  }
+
+private:
+  flatbuffers::FlatBufferBuilder _fbb{1024};
+  std::vector<flatbuffers::Offset<circle::Buffer>> _buffers;
+  std::vector<flatbuffers::Offset<circle::OperatorCode>> _opcodes;
+
+  // per-subgraph
+  std::vector<int> _inputs;
+  std::vector<int> _outputs;
+  std::vector<flatbuffers::Offset<circle::Tensor>> _tensors;
+  std::vector<flatbuffers::Offset<circle::Operator>> _operators;
+};
+
+#endif // __NNFW_API_TEST_CIRCLE_GEN_H__
diff --git a/tests/nnfw_api/src/GenModelTests.cc b/tests/nnfw_api/src/GenModelTests.cc

new file mode 100644 (file)

index 0000000..2bd839a
--- /dev/null
+++ b/tests/nnfw_api/src/GenModelTests.cc
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <nnfw_internal.h>
+
+#include <fstream>
+
+#include "CircleGen.h"
+#include "fixtures.h"
+
+/**
+ * @brief Generated Model test fixture for a one time inference
+ *
+ * This fixture is for one-time inference test with variety of generated models.
+ * It is the user's responsiblity to create @c _cbuf , @c _ref_inputs and @c _ref_outputs in the
+ * test body, which are generated circle buffer, model input data and output data respectively.
+ * The rest(calling API functions for execution) is done by @c Setup and @c TearDown .
+ *
+ */
+class GenModelTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { NNFW_ENSURE_SUCCESS(nnfw_create_session(&_so.session)); }
+
+  void TearDown() override
+  {
+    NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(_so.session, _cbuf.buffer(), _cbuf.size()));
+    NNFW_ENSURE_SUCCESS(nnfw_prepare(_so.session));
+
+    // In/Out buffer settings
+    {
+      uint32_t num_inputs;
+      NNFW_ENSURE_SUCCESS(nnfw_input_size(_so.session, &num_inputs));
+      _so.inputs.resize(num_inputs);
+      for (uint32_t ind = 0; ind < _so.inputs.size(); ind++)
+      {
+        nnfw_tensorinfo ti;
+        NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(_so.session, ind, &ti));
+        uint64_t input_elements = num_elems(&ti);
+        _so.inputs[ind].resize(input_elements);
+
+        ASSERT_EQ(nnfw_set_input(_so.session, ind, ti.dtype, _so.inputs[ind].data(),
+                                 sizeof(float) * input_elements),
+                  NNFW_STATUS_NO_ERROR);
+      }
+
+      uint32_t num_outputs;
+      NNFW_ENSURE_SUCCESS(nnfw_output_size(_so.session, &num_outputs));
+      _so.outputs.resize(num_outputs);
+      for (uint32_t ind = 0; ind < _so.outputs.size(); ind++)
+      {
+        nnfw_tensorinfo ti;
+        NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_so.session, ind, &ti));
+        uint64_t output_elements = num_elems(&ti);
+        _so.outputs[ind].resize(output_elements);
+        ASSERT_EQ(nnfw_set_output(_so.session, ind, ti.dtype, _so.outputs[ind].data(),
+                                  sizeof(float) * output_elements),
+                  NNFW_STATUS_NO_ERROR);
+      }
+    }
+
+    // Set input values, run, and check output values
+    {
+      ASSERT_EQ(_so.inputs.size(), _ref_inputs.size());
+      for (uint32_t i = 0; i < _so.inputs.size(); i++)
+      {
+        // Fill the values
+        ASSERT_EQ(_so.inputs[i].size(), _ref_inputs[i].size());
+        memcpy(_so.inputs[i].data(), _ref_inputs[i].data(), _so.inputs[i].size() * sizeof(float));
+      }
+
+      NNFW_ENSURE_SUCCESS(nnfw_run(_so.session));
+
+      ASSERT_EQ(_so.outputs.size(), _ref_outputs.size());
+      for (uint32_t i = 0; i < _so.outputs.size(); i++)
+      {
+        // Check output tensor values
+        auto &ref_output = _ref_outputs[i];
+        auto &output = _so.outputs[i];
+        ASSERT_EQ(output.size(), ref_output.size());
+        for (uint32_t e = 0; e < ref_output.size(); e++)
+          ASSERT_FLOAT_EQ(ref_output[e], output[e]);
+      }
+    }
+
+    NNFW_ENSURE_SUCCESS(nnfw_close_session(_so.session));
+  }
+
+protected:
+  SessionObject _so;
+  CircleBuffer _cbuf;
+  std::vector<std::vector<float>> _ref_inputs;
+  std::vector<std::vector<float>> _ref_outputs;
+};
+
+TEST_F(GenModelTest, OneOp_Add_VarToConst)
+{
+  CircleGen cgen;
+  std::vector<float> rhs_data{5, 4, 7, 4};
+  uint32_t rhs_buf = cgen.addBuffer(rhs_data);
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32, rhs_buf});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs}, {out});
+  _cbuf = cgen.finish();
+
+  _ref_inputs = {{1, 3, 2, 4}};
+  _ref_outputs = {{6, 7, 9, 8}};
+}
+
+TEST_F(GenModelTest, OneOp_Add_VarToVar)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+  _cbuf = cgen.finish();
+
+  _ref_inputs = {{1, 3, 2, 4}, {5, 4, 7, 4}};
+  _ref_outputs = {{6, 7, 9, 8}};
+}
+
+TEST_F(GenModelTest, OneOp_AvgPool2D)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2,
+                                circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+  _cbuf = cgen.finish();
+
+  _ref_inputs = {{1, 3, 2, 4}};
+  _ref_outputs = {{2.5}};
+}
diff --git a/tests/nnfw_api/src/ModelTestDynamicTensor.cc b/tests/nnfw_api/src/ModelTestDynamicTensor.cc

index 2f9ef31..c1f4369 100644 (file)
--- a/tests/nnfw_api/src/ModelTestDynamicTensor.cc
+++ b/tests/nnfw_api/src/ModelTestDynamicTensor.cc
@@ -15,7 +15,7 @@
   */
  
  #include <gtest/gtest.h>
-#include <nnfw_debug.h>
+#include <nnfw_internal.h>
  
  #include "common.h"
  #include "fixtures.h"
@@ -67,22 +67,22 @@ protected:
    {
      NNFW_STATUS res = nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_INT32, new_shape.data(),
                                       sizeof(int) * new_shape.size());
-    ASSERT_EQ(res, NNFW_STATUS_NO_ERROR);
+    NNFW_ENSURE_SUCCESS(res);
  
      res = nnfw_set_output(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, actual_output->data(),
                            sizeof(float) * actual_output_size);
-    ASSERT_EQ(res, NNFW_STATUS_NO_ERROR);
+    NNFW_ENSURE_SUCCESS(res);
    }
  
    void prepare_and_set_input_output(const std::vector<int> &new_shape, int actual_output_size,
                                      std::vector<float> *actual_output)
    {
-    ASSERT_EQ(nnfw_set_available_backends(_session, "cpu"), NNFW_STATUS_NO_ERROR);
+    NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
  
      NNFW_STATUS res = NNFW_STATUS_ERROR;
  
      res = nnfw_prepare(_session);
-    ASSERT_EQ(res, NNFW_STATUS_NO_ERROR);
+    NNFW_ENSURE_SUCCESS(res);
  
      set_input_output(new_shape, actual_output_size, actual_output);
      // real test case should start from calling nnfw_run()
@@ -102,11 +102,11 @@ protected:
  
      if (no_run_error)
      {
-      ASSERT_EQ(res, NNFW_STATUS_NO_ERROR);
+      NNFW_ENSURE_SUCCESS(res);
  
        // output shape check
        nnfw_tensorinfo info;
-      ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, &info), NNFW_STATUS_NO_ERROR);
+      NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_session, 0, &info));
        ASSERT_EQ(info.rank, new_shape.size());
        for (uint32_t d = 0; d < info.rank; ++d)
          ASSERT_EQ(info.dims[d], new_shape[d]);
@@ -137,7 +137,7 @@ TEST_F(TestDynamicTensorReshapeModelLoaded, reshape_to_3x2)
  
    // Do inference
    NNFW_STATUS res = nnfw_run(_session);
-  ASSERT_EQ(res, NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(res);
  
    // output value check
    for (int i = 0; i < expected.size(); ++i)
@@ -163,10 +163,10 @@ TEST_F(TestDynamicTensorReshapeModelLoaded, neg_reshape_to_wrong_3x3)
  
  TEST_F(TestDynamicTensorReshapeModelLoaded, reshape_multiple_executions)
  {
-  ASSERT_EQ(nnfw_set_available_backends(_session, "cpu"), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
  
    NNFW_STATUS res = nnfw_prepare(_session);
-  ASSERT_EQ(res, NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(res);
  
    std::vector<int> new_shape;
    std::vector<float> expected = {-1.5, -1.0, -0.5, 0.5, 1.0, 1.5};
@@ -184,10 +184,10 @@ TEST_F(TestDynamicTensorReshapeModelLoaded, reshape_multiple_executions)
  
  TEST_F(TestDynamicTensorReshapeModelLoaded, neg_reshape_multiple_executions)
  {
-  ASSERT_EQ(nnfw_set_available_backends(_session, "cpu"), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
  
    NNFW_STATUS res = nnfw_prepare(_session);
-  ASSERT_EQ(res, NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(res);
  
    std::vector<int> new_shape;
    std::vector<float> expected = {-1.5, -1.0, -0.5, 0.5, 1.0, 1.5};
@@ -217,8 +217,8 @@ protected:
                                        const std::vector<float> &input1,
                                        std::vector<float> *actual_output, nnfw_tensorinfo input0_ti)
    {
-    ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_NO_ERROR);
-    ASSERT_EQ(nnfw_set_input_tensorinfo(_session, 0, &input0_ti), NNFW_STATUS_NO_ERROR);
+    NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
+    NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &input0_ti));
  
      ASSERT_EQ(nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, input0.data(),
                               sizeof(float) * input0.size()),
@@ -250,7 +250,7 @@ protected:
   */
  TEST_F(TestInputUnknownDimInputConcatModelLoaded, concat_input0_to_2x3)
  {
-  ASSERT_EQ(nnfw_set_available_backends(_session, "cpu"), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
  
    const std::vector<float> input0 = {1, 2, 3};          // of shape [1, 3]
    const std::vector<float> input1 = {4, 5, 6, 7, 8, 9}; // of shape [2, 3]
@@ -260,14 +260,14 @@ TEST_F(TestInputUnknownDimInputConcatModelLoaded, concat_input0_to_2x3)
  
    // input reshaping to [1, 3]
    nnfw_tensorinfo ti = {NNFW_TYPE_TENSOR_FLOAT32, 2, {1, 3}};
-  ASSERT_EQ(nnfw_set_input_tensorinfo(_session, 0, &ti), NNFW_STATUS_NO_ERROR);
-  ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &ti));
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
  
    set_input_output(_session, input0, input1, actual_output);
  
    // Do inference
    NNFW_STATUS res = nnfw_run(_session);
-  ASSERT_EQ(res, NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(res);
  
    // output value check
    for (int i = 0; i < expected.size(); ++i)
@@ -291,7 +291,7 @@ TEST_F(TestInputUnknownDimInputConcatModelLoaded, concat_input0_to_2x3)
   */
  TEST_F(TestInputUnknownDimInputConcatModelLoaded, neg_concat_input0_to_wrong_shape)
  {
-  ASSERT_EQ(nnfw_set_available_backends(_session, "cpu"), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
  
    const std::vector<float> input0 = {1, 2, 3};          // of shape [3, 1], wrong shape
    const std::vector<float> input1 = {4, 5, 6, 7, 8, 9}; // of shape [2, 3]
@@ -300,7 +300,7 @@ TEST_F(TestInputUnknownDimInputConcatModelLoaded, neg_concat_input0_to_wrong_sha
  
    // input reshaping to [3, 1]
    nnfw_tensorinfo ti = {NNFW_TYPE_TENSOR_FLOAT32, 2, {3, 1}};
-  ASSERT_EQ(nnfw_set_input_tensorinfo(_session, 0, &ti), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &ti));
  
    ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_ERROR);
  }
@@ -330,7 +330,7 @@ using TestDynamicTensorApplyTensorInfoBinaryOp =
  
  TEST_F(TestDynamicTensorApplyTensorInfoBinaryOp, set_input_tensorinfo_after_compilation_add)
  {
-  ASSERT_EQ(nnfw_set_available_backends(_session, "cpu"), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
  
    // input reshaping to [2, 2, 3]
    nnfw_tensorinfo input0_ti = {NNFW_TYPE_TENSOR_FLOAT32, 3, {2, 2, 3}};
@@ -341,15 +341,15 @@ TEST_F(TestDynamicTensorApplyTensorInfoBinaryOp, set_input_tensorinfo_after_comp
    std::vector<float> expected_output = {1.1 * 2, 2.1 * 2, 3.1 * 2, 4.1 * 2,  5.1 * 2,  6.1 * 2,
                                          7.1 * 2, 8.1 * 2, 9.1 * 2, 10.1 * 2, 11.1 * 2, 12.1 * 2};
  
-  ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
  
-  ASSERT_EQ(nnfw_set_input_tensorinfo(_session, 0, &input0_ti), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &input0_ti));
  
    set_input_output(_session, input0, input1, actual_output);
  
    // Do inference
    NNFW_STATUS res = nnfw_run(_session);
-  ASSERT_EQ(res, NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(res);
  
    // output value check
    for (int i = 0; i < expected_output.size(); ++i)
@@ -374,7 +374,7 @@ using TestDynamicTensorApplyTensorInfoUnaryOp = ValidationTestModelLoaded<NNPack
  
  TEST_F(TestDynamicTensorApplyTensorInfoUnaryOp, set_input_tensorinfo_after_compilation_neg)
  {
-  ASSERT_EQ(nnfw_set_available_backends(_session, "cpu"), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
  
    nnfw_tensorinfo input0_ti_original = {NNFW_TYPE_TENSOR_FLOAT32, 2, {4, 4}};
  
@@ -397,21 +397,21 @@ TEST_F(TestDynamicTensorApplyTensorInfoUnaryOp, set_input_tensorinfo_after_compi
      expected_output[i] = -1 * input0[i];
    }
  
-  ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
  
    // input shape check
    {
      nnfw_tensorinfo ti = {};
-    ASSERT_EQ(nnfw_input_tensorinfo(_session, 0, &ti), NNFW_STATUS_NO_ERROR);
+    NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(_session, 0, &ti));
      ASSERT_TRUE(tensorInfoEqual(input0_ti_original, ti));
    }
  
-  ASSERT_EQ(nnfw_set_input_tensorinfo(_session, 0, &input0_ti), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &input0_ti));
  
    // input shape check
    {
      nnfw_tensorinfo ti = {};
-    ASSERT_EQ(nnfw_input_tensorinfo(_session, 0, &ti), NNFW_STATUS_NO_ERROR);
+    NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(_session, 0, &ti));
      ASSERT_TRUE(tensorInfoEqual(input0_ti, ti));
    }
  
@@ -419,7 +419,7 @@ TEST_F(TestDynamicTensorApplyTensorInfoUnaryOp, set_input_tensorinfo_after_compi
  
    // Do inference
    NNFW_STATUS res = nnfw_run(_session);
-  ASSERT_EQ(res, NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(res);
  
    // output value check
    for (int i = 0; i < expected_output.size(); ++i)
@@ -435,20 +435,20 @@ const static std::vector<float> while_dynamic_output0{ 0.0388205424, 0.042615629
  
  TEST_F(TestWhileDynamicModelLoaded, run_verify)
  {
-  ASSERT_EQ(nnfw_set_available_backends(_session, "cpu"), NNFW_STATUS_NO_ERROR);
-  ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
  
    std::vector<float> actual_output0(10);
  
    nnfw_tensorinfo ti = {NNFW_TYPE_TENSOR_FLOAT32, 3, {1, 28, 28}};
-  ASSERT_EQ(nnfw_set_input_tensorinfo(_session, 0, &ti), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &ti));
  
    set_input_output(_session, while_dynamic_input0, actual_output0);
  
-  ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_run(_session));
  
    nnfw_tensorinfo ti_output0_expected = {NNFW_TYPE_TENSOR_FLOAT32, 2, {1, 10}};
-  ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, &ti), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_session, 0, &ti));
    ASSERT_TRUE(tensorInfoEqual(ti, ti_output0_expected));
  
    // output value check
@@ -458,11 +458,11 @@ TEST_F(TestWhileDynamicModelLoaded, run_verify)
  
  TEST_F(TestWhileDynamicModelLoaded, neg_run_verify)
  {
-  ASSERT_EQ(nnfw_set_available_backends(_session, "cpu"), NNFW_STATUS_NO_ERROR);
-  ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
  
    nnfw_tensorinfo ti = {NNFW_TYPE_TENSOR_FLOAT32, 3, {1, 28, 28}};
-  ASSERT_EQ(nnfw_set_input_tensorinfo(_session, 0, &ti), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &ti));
  
    // Insufficient size of output (10 or more is sufficient)
    std::vector<float> actual_output0(9);
@@ -482,27 +482,27 @@ const static std::vector<float> if_dynamic_output0{ 0.0444660522, 0.0271649156,
  
  TEST_F(TestIfDynamicModelLoaded, run_verify)
  {
-  ASSERT_EQ(nnfw_set_available_backends(_session, "cpu"), NNFW_STATUS_NO_ERROR);
-  ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
  
    nnfw_tensorinfo ti_output0_expected = {NNFW_TYPE_TENSOR_FLOAT32, 2, {1, 10}};
  
    // Output tensor sizes are inferenced after `nnfw_prepare`
    {
      nnfw_tensorinfo ti;
-    ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, &ti), NNFW_STATUS_NO_ERROR);
+    NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_session, 0, &ti));
      ASSERT_TRUE(tensorInfoEqual(ti, ti_output0_expected));
    }
  
    std::vector<float> actual_output0(10);
    set_input_output(_session, if_dynamic_input0, actual_output0);
  
-  ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_run(_session));
  
    // Check output tensor sizes again
    {
      nnfw_tensorinfo ti;
-    ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, &ti), NNFW_STATUS_NO_ERROR);
+    NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_session, 0, &ti));
      ASSERT_TRUE(tensorInfoEqual(ti, ti_output0_expected));
    }
  
diff --git a/tests/nnfw_api/src/ModelTestInputReshaping.cc b/tests/nnfw_api/src/ModelTestInputReshaping.cc

index f5053e3..bfe347f 100644 (file)
--- a/tests/nnfw_api/src/ModelTestInputReshaping.cc
+++ b/tests/nnfw_api/src/ModelTestInputReshaping.cc
@@ -15,7 +15,7 @@
   */
  
  #include <gtest/gtest.h>
-#include <nnfw_debug.h>
+#include <nnfw_internal.h>
  
  #include "fixtures.h"
  #include "NNPackages.h"
@@ -35,8 +35,8 @@ TEST_F(TestInputReshapingAddModelLoaded, reshaping_2x2_to_4x2)
  {
    NNFW_STATUS res = NNFW_STATUS_ERROR;
  
-  ASSERT_EQ(nnfw_set_available_backends(_session, "cpu"), NNFW_STATUS_NO_ERROR);
-  ASSERT_EQ(nnfw_set_config(_session, "EXECUTOR", "Linear"), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
+  NNFW_ENSURE_SUCCESS(nnfw_set_config(_session, "EXECUTOR", "Linear"));
  
    // input and output values
    const std::vector<float> input1 = {0, 1, 2, 3, 4, 5, 6, 7}; // of changed shape [4, 2]
@@ -56,7 +56,7 @@ TEST_F(TestInputReshapingAddModelLoaded, reshaping_2x2_to_4x2)
    res = nnfw_set_input_tensorinfo(_session, 0, &ti);
  
    res = nnfw_prepare(_session);
-  ASSERT_EQ(res, NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(res);
  
    nnfw_tensorinfo ti_input = {}; // Static inference result will be stored
    nnfw_input_tensorinfo(_session, 0, &ti_input);
@@ -68,21 +68,21 @@ TEST_F(TestInputReshapingAddModelLoaded, reshaping_2x2_to_4x2)
  
    res = nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, input1.data(),
                         sizeof(float) * input1.size());
-  ASSERT_EQ(res, NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(res);
    res = nnfw_set_input(_session, 1, NNFW_TYPE_TENSOR_FLOAT32, input2.data(),
                         sizeof(float) * input2.size());
-  ASSERT_EQ(res, NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(res);
  
    uint64_t output_num_elements = tensorInfoNumElements(ti_output);
    ASSERT_EQ(output_num_elements, expected.size());
    std::vector<float> actual_output(output_num_elements);
    res = nnfw_set_output(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, actual_output.data(),
                          sizeof(float) * actual_output.size());
-  ASSERT_EQ(res, NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(res);
  
    // Do inference
    res = nnfw_run(_session);
-  ASSERT_EQ(res, NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(res);
  
    // compare
    for (int i = 0; i < expected.size(); ++i)
diff --git a/tests/nnfw_api/src/RegressionTests.cc b/tests/nnfw_api/src/RegressionTests.cc

index 54ebc96..e4dfa91 100644 (file)
--- a/tests/nnfw_api/src/RegressionTests.cc
+++ b/tests/nnfw_api/src/RegressionTests.cc
@@ -22,17 +22,17 @@ TEST_F(RegressionTest, github_1535)
    auto package_path = NNPackages::get().getModelAbsolutePath(NNPackages::ADD);
  
    nnfw_session *session1 = nullptr;
-  ASSERT_EQ(nnfw_create_session(&session1), NNFW_STATUS_NO_ERROR);
-  ASSERT_EQ(nnfw_load_model_from_file(session1, package_path.c_str()), NNFW_STATUS_NO_ERROR);
-  ASSERT_EQ(nnfw_set_available_backends(session1, "cpu;acl_cl;acl_neon"), NNFW_STATUS_NO_ERROR);
-  ASSERT_EQ(nnfw_prepare(session1), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_create_session(&session1));
+  NNFW_ENSURE_SUCCESS(nnfw_load_model_from_file(session1, package_path.c_str()));
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session1, "cpu;acl_cl;acl_neon"));
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(session1));
  
    nnfw_session *session2 = nullptr;
-  ASSERT_EQ(nnfw_create_session(&session2), NNFW_STATUS_NO_ERROR);
-  ASSERT_EQ(nnfw_load_model_from_file(session2, package_path.c_str()), NNFW_STATUS_NO_ERROR);
-  ASSERT_EQ(nnfw_set_available_backends(session2, "cpu"), NNFW_STATUS_NO_ERROR);
-  ASSERT_EQ(nnfw_prepare(session2), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_create_session(&session2));
+  NNFW_ENSURE_SUCCESS(nnfw_load_model_from_file(session2, package_path.c_str()));
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session2, "cpu"));
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(session2));
  
-  ASSERT_EQ(nnfw_close_session(session1), NNFW_STATUS_NO_ERROR);
-  ASSERT_EQ(nnfw_close_session(session2), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_close_session(session1));
+  NNFW_ENSURE_SUCCESS(nnfw_close_session(session2));
  }
diff --git a/tests/nnfw_api/src/ValidationTestAddModelLoaded.cc b/tests/nnfw_api/src/ValidationTestAddModelLoaded.cc

index 67f2467..11c6034 100644 (file)
--- a/tests/nnfw_api/src/ValidationTestAddModelLoaded.cc
+++ b/tests/nnfw_api/src/ValidationTestAddModelLoaded.cc
@@ -19,61 +19,63 @@
  
  using ValidationTestAddModelLoaded = ValidationTestModelLoaded<NNPackages::ADD>;
  
-TEST_F(ValidationTestAddModelLoaded, prepare_001)
-{
-  ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_NO_ERROR);
-}
+TEST_F(ValidationTestAddModelLoaded, prepare_001) { NNFW_ENSURE_SUCCESS(nnfw_prepare(_session)); }
  
  TEST_F(ValidationTestAddModelLoaded, set_available_backends_001)
  {
-  ASSERT_EQ(nnfw_set_available_backends(_session, "cpu"), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
  }
  
  TEST_F(ValidationTestAddModelLoaded, get_input_size)
  {
    uint32_t size = 0;
-  ASSERT_EQ(nnfw_input_size(_session, &size), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_input_size(_session, &size));
    ASSERT_EQ(size, 1);
  }
  
  TEST_F(ValidationTestAddModelLoaded, get_output_size)
  {
    uint32_t size = 0;
-  ASSERT_EQ(nnfw_output_size(_session, &size), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_output_size(_session, &size));
    ASSERT_EQ(size, 1);
  }
  
  TEST_F(ValidationTestAddModelLoaded, output_tensorinfo)
  {
    nnfw_tensorinfo tensor_info;
-  ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, &tensor_info), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_session, 0, &tensor_info));
    ASSERT_EQ(tensor_info.rank, 1);
    ASSERT_EQ(tensor_info.dims[0], 1);
  }
  
-TEST_F(ValidationTestAddModelLoaded, neg_run_001)
+TEST_F(ValidationTestAddModelLoaded, neg_run)
  {
-  ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_ERROR);
+  // nnfw_prepare is not called
+  ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_INVALID_STATE);
  }
  
-TEST_F(ValidationTestAddModelLoaded, neg_set_input_001)
+TEST_F(ValidationTestAddModelLoaded, neg_set_input)
  {
-  ASSERT_EQ(nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, nullptr, 0), NNFW_STATUS_ERROR);
+  // nnfw_prepare is not called
+  ASSERT_EQ(nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, nullptr, 0),
+            NNFW_STATUS_INVALID_STATE);
  }
  
-TEST_F(ValidationTestAddModelLoaded, neg_set_output_001)
+TEST_F(ValidationTestAddModelLoaded, neg_set_output)
  {
-  ASSERT_EQ(nnfw_set_output(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, nullptr, 0), NNFW_STATUS_ERROR);
+  // nnfw_prepare is not called
+  ASSERT_EQ(nnfw_set_output(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, nullptr, 0),
+            NNFW_STATUS_INVALID_STATE);
  }
  
  TEST_F(ValidationTestAddModelLoaded, neg_get_input_size)
  {
-  ASSERT_EQ(nnfw_input_size(_session, nullptr), NNFW_STATUS_ERROR);
+  ASSERT_EQ(nnfw_input_size(_session, nullptr), NNFW_STATUS_UNEXPECTED_NULL);
  }
  
  TEST_F(ValidationTestAddModelLoaded, neg_get_output_size)
  {
-  ASSERT_EQ(nnfw_output_size(_session, nullptr), NNFW_STATUS_ERROR);
+  ASSERT_EQ(nnfw_output_size(_session, nullptr), NNFW_STATUS_UNEXPECTED_NULL);
  }
  
  TEST_F(ValidationTestAddModelLoaded, neg_load_model)
@@ -81,11 +83,11 @@ TEST_F(ValidationTestAddModelLoaded, neg_load_model)
    // load model twice
    ASSERT_EQ(nnfw_load_model_from_file(
                  _session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD).c_str()),
-            NNFW_STATUS_ERROR);
+            NNFW_STATUS_INVALID_STATE);
  }
  
  TEST_F(ValidationTestAddModelLoaded, neg_output_tensorinfo)
  {
    // tensor_info is null
-  ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, nullptr), NNFW_STATUS_ERROR);
+  ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, nullptr), NNFW_STATUS_UNEXPECTED_NULL);
  }
diff --git a/tests/nnfw_api/src/ValidationTestAddSessionPrepared.cc b/tests/nnfw_api/src/ValidationTestAddSessionPrepared.cc

index 1bb4182..f19bb78 100644 (file)
--- a/tests/nnfw_api/src/ValidationTestAddSessionPrepared.cc
+++ b/tests/nnfw_api/src/ValidationTestAddSessionPrepared.cc
@@ -23,7 +23,7 @@ TEST_F(ValidationTestAddSessionPrepared, run)
  {
    SetInOutBuffers();
    _input[0] = 3.0;
-  ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_run(_session));
    ASSERT_FLOAT_EQ(_output[0], 5.0);
  }
  
@@ -31,11 +31,11 @@ TEST_F(ValidationTestAddSessionPrepared, run_twice)
  {
    SetInOutBuffers();
    _input[0] = 4.0;
-  ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_run(_session));
    ASSERT_FLOAT_EQ(_output[0], 6.0);
  
    _input[0] = 5.0f;
-  ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_run(_session));
    ASSERT_FLOAT_EQ(_output[0], 7.0);
  }
  
@@ -43,8 +43,8 @@ TEST_F(ValidationTestAddSessionPrepared, run_async)
  {
    SetInOutBuffers();
    _input[0] = 3.0;
-  ASSERT_EQ(nnfw_run_async(_session), NNFW_STATUS_NO_ERROR);
-  ASSERT_EQ(nnfw_await(_session), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_run_async(_session));
+  NNFW_ENSURE_SUCCESS(nnfw_await(_session));
    ASSERT_FLOAT_EQ(_output[0], 5.0);
  }
  
@@ -58,21 +58,21 @@ TEST_F(ValidationTestAddSessionPrepared, set_input_001)
  TEST_F(ValidationTestAddSessionPrepared, get_input_size)
  {
    uint32_t size = 0;
-  ASSERT_EQ(nnfw_input_size(_session, &size), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_input_size(_session, &size));
    ASSERT_EQ(size, 1);
  }
  
  TEST_F(ValidationTestAddSessionPrepared, get_output_size)
  {
    uint32_t size = 0;
-  ASSERT_EQ(nnfw_output_size(_session, &size), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_output_size(_session, &size));
    ASSERT_EQ(size, 1);
  }
  
  TEST_F(ValidationTestAddSessionPrepared, output_tensorinfo)
  {
    nnfw_tensorinfo tensor_info;
-  ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, &tensor_info), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_session, 0, &tensor_info));
    ASSERT_EQ(tensor_info.rank, 1);
    ASSERT_EQ(tensor_info.dims[0], 1);
  }
@@ -86,24 +86,24 @@ TEST_F(ValidationTestAddSessionPrepared, neg_await_without_async_run)
  TEST_F(ValidationTestAddSessionPrepared, neg_await_after_sync_run)
  {
    SetInOutBuffers();
-  ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_run(_session));
    ASSERT_EQ(nnfw_await(_session), NNFW_STATUS_ERROR);
  }
  
  TEST_F(ValidationTestAddSessionPrepared, neg_await_twice)
  {
    SetInOutBuffers();
-  ASSERT_EQ(nnfw_run_async(_session), NNFW_STATUS_NO_ERROR);
-  ASSERT_EQ(nnfw_await(_session), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_run_async(_session));
+  NNFW_ENSURE_SUCCESS(nnfw_await(_session));
    ASSERT_EQ(nnfw_await(_session), NNFW_STATUS_ERROR);
  }
  
  TEST_F(ValidationTestAddSessionPrepared, neg_run_during_async_run)
  {
    SetInOutBuffers();
-  ASSERT_EQ(nnfw_run_async(_session), NNFW_STATUS_NO_ERROR);
-  EXPECT_EQ(nnfw_run(_session), NNFW_STATUS_ERROR);
-  ASSERT_EQ(nnfw_await(_session), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_run_async(_session));
+  EXPECT_EQ(nnfw_run(_session), NNFW_STATUS_INVALID_STATE);
+  NNFW_ENSURE_SUCCESS(nnfw_await(_session));
  }
  
  TEST_F(ValidationTestAddSessionPrepared, neg_set_input_001)
@@ -139,12 +139,12 @@ TEST_F(ValidationTestAddSessionPrepared, neg_set_output_002)
  
  TEST_F(ValidationTestAddSessionPrepared, neg_get_input_size)
  {
-  ASSERT_EQ(nnfw_input_size(_session, nullptr), NNFW_STATUS_ERROR);
+  ASSERT_EQ(nnfw_input_size(_session, nullptr), NNFW_STATUS_UNEXPECTED_NULL);
  }
  
  TEST_F(ValidationTestAddSessionPrepared, neg_get_output_size)
  {
-  ASSERT_EQ(nnfw_output_size(_session, nullptr), NNFW_STATUS_ERROR);
+  ASSERT_EQ(nnfw_output_size(_session, nullptr), NNFW_STATUS_UNEXPECTED_NULL);
  }
  
  TEST_F(ValidationTestAddSessionPrepared, neg_load_model)
@@ -152,13 +152,13 @@ TEST_F(ValidationTestAddSessionPrepared, neg_load_model)
    // Load model twice
    ASSERT_EQ(nnfw_load_model_from_file(
                  _session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD).c_str()),
-            NNFW_STATUS_ERROR);
+            NNFW_STATUS_INVALID_STATE);
  }
  
  TEST_F(ValidationTestAddSessionPrepared, neg_prepare)
  {
    // Call Prepare twice
-  ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_ERROR);
+  ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_INVALID_STATE);
  }
  
  // TODO Validation check when "nnfw_run" is called without input & output tensor setting
diff --git a/tests/nnfw_api/src/ValidationTestFourAddModelsSetInput.cc b/tests/nnfw_api/src/ValidationTestFourAddModelsSetInput.cc

index b3fb9c6..4e2a905 100644 (file)
--- a/tests/nnfw_api/src/ValidationTestFourAddModelsSetInput.cc
+++ b/tests/nnfw_api/src/ValidationTestFourAddModelsSetInput.cc
@@ -21,8 +21,8 @@ using ValidationTestFourAddModelsSetInput = ValidationTestFourModelsSetInput<NNP
  
  TEST_F(ValidationTestFourAddModelsSetInput, run_001)
  {
-  ASSERT_EQ(nnfw_run(_objects[0].session), NNFW_STATUS_NO_ERROR);
-  ASSERT_EQ(nnfw_run(_objects[1].session), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_run(_objects[0].session));
+  NNFW_ENSURE_SUCCESS(nnfw_run(_objects[1].session));
  }
  
  TEST_F(ValidationTestFourAddModelsSetInput, run_002)
@@ -31,14 +31,14 @@ TEST_F(ValidationTestFourAddModelsSetInput, run_002)
    while (rep--)
    {
      for (auto obj : _objects)
-      ASSERT_EQ(nnfw_run(obj.session), NNFW_STATUS_NO_ERROR);
+      NNFW_ENSURE_SUCCESS(nnfw_run(obj.session));
    }
  }
  
  TEST_F(ValidationTestFourAddModelsSetInput, run_async)
  {
    for (auto obj : _objects)
-    ASSERT_EQ(nnfw_run_async(obj.session), NNFW_STATUS_NO_ERROR);
+    NNFW_ENSURE_SUCCESS(nnfw_run_async(obj.session));
    for (auto obj : _objects)
-    ASSERT_EQ(nnfw_await(obj.session), NNFW_STATUS_NO_ERROR);
+    NNFW_ENSURE_SUCCESS(nnfw_await(obj.session));
  }
diff --git a/tests/nnfw_api/src/ValidationTestSessionCreated.cc b/tests/nnfw_api/src/ValidationTestSessionCreated.cc

index 2675aa7..dafcd36 100644 (file)
--- a/tests/nnfw_api/src/ValidationTestSessionCreated.cc
+++ b/tests/nnfw_api/src/ValidationTestSessionCreated.cc
@@ -27,8 +27,8 @@ TEST_F(ValidationTestSessionCreated, load_session_001)
  
  TEST_F(ValidationTestSessionCreated, close_and_create_again)
  {
-  ASSERT_EQ(nnfw_close_session(_session), NNFW_STATUS_NO_ERROR);
-  ASSERT_EQ(nnfw_create_session(&_session), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
+  NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
  }
  
  TEST_F(ValidationTestSessionCreated, neg_load_session_1)
@@ -40,7 +40,7 @@ TEST_F(ValidationTestSessionCreated, neg_load_session_1)
  
  TEST_F(ValidationTestSessionCreated, neg_load_session_2)
  {
-  ASSERT_EQ(nnfw_load_model_from_file(_session, nullptr), NNFW_STATUS_ERROR);
+  ASSERT_EQ(nnfw_load_model_from_file(_session, nullptr), NNFW_STATUS_UNEXPECTED_NULL);
  }
  
  TEST_F(ValidationTestSessionCreated, neg_load_session_3)
@@ -58,7 +58,7 @@ TEST_F(ValidationTestSessionCreated, neg_load_invalid_package_1)
        nnfw_load_model_from_file(
            _session, NNPackages::get().getModelAbsolutePath(NNPackages::ADD_NO_MANIFEST).c_str()),
        NNFW_STATUS_ERROR);
-  ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_ERROR);
+  ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_INVALID_STATE);
  }
  
  TEST_F(ValidationTestSessionCreated, neg_load_invalid_package_2)
@@ -67,52 +67,52 @@ TEST_F(ValidationTestSessionCreated, neg_load_invalid_package_2)
                  _session,
                  NNPackages::get().getModelAbsolutePath(NNPackages::ADD_INVALID_MANIFEST).c_str()),
              NNFW_STATUS_ERROR);
-  ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_ERROR);
+  ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_INVALID_STATE);
  }
  
  TEST_F(ValidationTestSessionCreated, neg_prepare_001)
  {
    // nnfw_load_model_from_file was not called
-  ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_ERROR);
+  ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_INVALID_STATE);
  }
  
  TEST_F(ValidationTestSessionCreated, neg_run_001)
  {
    // nnfw_load_model_from_file and nnfw_prepare was not called
-  ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_ERROR);
+  ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_INVALID_STATE);
  }
  
  TEST_F(ValidationTestSessionCreated, neg_set_input_001)
  {
-  // Invalid state
-  ASSERT_EQ(nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, nullptr, 0), NNFW_STATUS_ERROR);
+  ASSERT_EQ(nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, nullptr, 0),
+            NNFW_STATUS_INVALID_STATE);
  }
  
  TEST_F(ValidationTestSessionCreated, neg_set_output_001)
  {
-  // Invalid state
-  ASSERT_EQ(nnfw_set_output(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, nullptr, 0), NNFW_STATUS_ERROR);
+  ASSERT_EQ(nnfw_set_output(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, nullptr, 0),
+            NNFW_STATUS_INVALID_STATE);
  }
  
  TEST_F(ValidationTestSessionCreated, neg_get_input_size)
  {
    uint32_t size = 10000;
-  ASSERT_EQ(nnfw_input_size(_session, &size), NNFW_STATUS_ERROR);
-  ASSERT_EQ(size, 10000);
+  ASSERT_EQ(nnfw_input_size(_session, &size), NNFW_STATUS_INVALID_STATE);
+  ASSERT_EQ(size, 10000); // Remain unchanged
  }
  
  TEST_F(ValidationTestSessionCreated, neg_get_output_size)
  {
    uint32_t size = 10000;
-  ASSERT_EQ(nnfw_output_size(_session, &size), NNFW_STATUS_ERROR);
-  ASSERT_EQ(size, 10000);
+  ASSERT_EQ(nnfw_output_size(_session, &size), NNFW_STATUS_INVALID_STATE);
+  ASSERT_EQ(size, 10000); // Remain unchanged
  }
  
  TEST_F(ValidationTestSessionCreated, neg_output_tensorinfo)
  {
    nnfw_tensorinfo tensor_info;
    // model is not loaded
-  ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, &tensor_info), NNFW_STATUS_ERROR);
+  ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, &tensor_info), NNFW_STATUS_INVALID_STATE);
    // model is not loaded and tensor_info is null
-  ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, nullptr), NNFW_STATUS_ERROR);
+  ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, nullptr), NNFW_STATUS_INVALID_STATE);
  }
diff --git a/tests/nnfw_api/src/ValidationTestSingleSession.cc b/tests/nnfw_api/src/ValidationTestSingleSession.cc

index 2241e81..5e6027f 100644 (file)
--- a/tests/nnfw_api/src/ValidationTestSingleSession.cc
+++ b/tests/nnfw_api/src/ValidationTestSingleSession.cc
@@ -19,14 +19,14 @@
  
  TEST_F(ValidationTestSingleSession, create_001)
  {
-  ASSERT_EQ(nnfw_create_session(&_session), NNFW_STATUS_NO_ERROR);
-  ASSERT_EQ(nnfw_close_session(_session), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
+  NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
  }
  
  TEST_F(ValidationTestSingleSession, query_info_u32)
  {
    uint32_t val = 0;
-  ASSERT_EQ(nnfw_query_info_u32(nullptr, NNFW_INFO_ID_VERSION, &val), NNFW_STATUS_NO_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_query_info_u32(nullptr, NNFW_INFO_ID_VERSION, &val));
  }
  
  TEST_F(ValidationTestSingleSession, neg_create_001)
diff --git a/tests/nnfw_api/src/fixtures.h b/tests/nnfw_api/src/fixtures.h

index f5bcf40..8fe5c16 100644 (file)
--- a/tests/nnfw_api/src/fixtures.h
+++ b/tests/nnfw_api/src/fixtures.h
@@ -23,6 +23,8 @@
  
  #include "NNPackages.h"
  
+#define NNFW_ENSURE_SUCCESS(EXPR) ASSERT_EQ((EXPR), NNFW_STATUS_NO_ERROR)
+
  inline uint64_t num_elems(const nnfw_tensorinfo *ti)
  {
    uint64_t n = 1;
diff --git a/tests/scripts/CMakeLists.txt b/tests/scripts/CMakeLists.txt

index f4c9c6b..82235d9 100644 (file)
--- a/tests/scripts/CMakeLists.txt
+++ b/tests/scripts/CMakeLists.txt
@@ -2,18 +2,25 @@ if(NOT INSTALL_TEST_SCRIPTS)
    return()
  endif(NOT INSTALL_TEST_SCRIPTS)
  
-# Install test scripts
-file(GLOB TEST_SCRIPTS "*.sh")
-install(PROGRAMS ${TEST_SCRIPTS} DESTINATION tests/scripts)
+# Install test driver
+file(GLOB TEST_DRIVER_SCRIPT onert-test)
+install(PROGRAMS ${TEST_DRIVER_SCRIPT} DESTINATION test)
  
-# Install test list
-file(GLOB TEST_LISTS "list/*.txt")
-install(FILES ${TEST_LISTS} DESTINATION tests/scripts/list)
+# Commands don't have execute permission itself
+install(DIRECTORY command DESTINATION test)
+
+# Install models test script
+file(GLOB MODEL_TEST_SCRIPT "models/run_test.sh")
+install(PROGRAMS ${MODEL_TEST_SCRIPT} DESTINATION test/models)
  
-# Install framework test script
-file(GLOB FRAMEWORKTEST_SCRIPT "framework/run_test.sh")
-install(PROGRAMS ${FRAMEWORKTEST_SCRIPT} DESTINATION tests/scripts/framework)
+# Install models test list file
+file(GLOB MODEL_TEST_DIR models/config)
+install(DIRECTORY ${MODEL_TEST_DIR} DESTINATION test/models)
  
-# Install framework test list file
-file(GLOB FRAMEWORKTEST_DIR framework/tests)
-install(DIRECTORY ${FRAMEWORKTEST_DIR} DESTINATION tests/scripts/framework)
+# Install nnpackage test config
+file(GLOB MODEL_TEST_DIR LIST_DIRECTORIES true nnfw_api_gtest/models/*)
+install(DIRECTORY ${MODEL_TEST_DIR} DESTINATION test/models/nnpackage)
+
+# Install test list
+file(GLOB TEST_LIST_DIR list)
+install(DIRECTORY ${TEST_LIST_DIR} DESTINATION test)
diff --git a/tests/scripts/benchmark_nnapi.sh b/tests/scripts/benchmark_nnapi.sh

index c7f44c5..af79728 100755 (executable)
--- a/tests/scripts/benchmark_nnapi.sh
+++ b/tests/scripts/benchmark_nnapi.sh
@@ -18,7 +18,6 @@ MY_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
  
  source $MY_PATH/common.sh
  
-BENCHMARK_RUN_TEST_SH=
  BENCHMARK_DRIVER_BIN=
  BENCHMARK_REPORT_DIR=
  BENCHMARK_MODELS_FILE=
@@ -30,7 +29,7 @@ EXECUTORS="Linear Parallel" #TODO: accept this list as argument
  
  function Usage()
  {
-    echo "Usage: ./$0 --reportdir=. --runtestsh=tests/scripts/framework/run_test.sh --driverbin=Product/out/bin/tflite_run"
+    echo "Usage: ./$0 --reportdir=. --driverbin=Product/out/bin/tflite_run"
  }
  
  for i in "$@"
@@ -43,9 +42,6 @@ do
          --test_op)
              TEST_OP="true"
              ;;
-        --runtestsh=*)
-            BENCHMARK_RUN_TEST_SH=${i#*=}
-            ;;
          --driverbin=*)
              BENCHMARK_DRIVER_BIN=${i#*=}
              ;;
@@ -147,9 +143,8 @@ function run_onert_with_all_config()
      local REPORT_MODEL_DIR=$2
      local PAUSE_TIME_IN_SEC=$3
      local BENCHMARK_DRIVER_BIN=$4
-    local BENCHMARK_RUN_TEST_SH=$5
-    local EXECUTORS=$6
-    local BACKEND_LIST=$7
+    local EXECUTORS=$5
+    local BACKEND_LIST=$6
  
      export USE_NNAPI=1
  
@@ -163,18 +158,18 @@ function run_onert_with_all_config()
      done
      export BACKENDS=$BACKENDS_TO_USE
      if [ "$TEST_OP" == "false" ]; then
-        profile_for_he_shed $REPORT_MODEL_DIR $BENCHMARK_RUN_TEST_SH $BENCHMARK_DRIVER_BIN $MODEL $PROFILING_RUN_CNT
+        profile_for_he_shed $REPORT_MODEL_DIR $BENCHMARK_DRIVER_BIN $MODEL $PROFILING_RUN_CNT
      fi
  
      for executor in $EXECUTORS; do
          export EXECUTOR=$executor
          if [ "$TEST_OP" == "false" ]; then
-            run_with_he_scheduler $REPORT_MODEL_DIR $BENCHMARK_RUN_TEST_SH $BENCHMARK_DRIVER_BIN $MODEL $executor
+            run_with_he_scheduler $REPORT_MODEL_DIR $BENCHMARK_DRIVER_BIN $MODEL $executor
          fi
          for backend in $BACKEND_LIST; do
              export OP_BACKEND_ALLOPS=$backend
              run_benchmark_and_print "tflite_onert_"$executor"_executor_$backend" "TFLite onert $executor Executor $backend"\
-                                    $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN $BENCHMARK_RUN_TEST_SH
+                                    $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN
          done
      done
      unset USE_NNAPI EXECUTOR OP_BACKEND_ALLOPS BACKENDS
@@ -215,14 +210,14 @@ function run_benchmark_test()
  
          # TFLite+CPU
          unset USE_NNAPI
-        run_benchmark_and_print "tflite_cpu" "TFLite CPU" $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN $BENCHMARK_RUN_TEST_SH
+        run_benchmark_and_print "tflite_cpu" "TFLite CPU" $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN
  
          # run onert
          if [ "$TEST_OP" == "true" ]; then
            # Operation test don't need to test each scheduler
-          run_onert_with_all_config $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN $BENCHMARK_RUN_TEST_SH "Linear" "$BACKEND_LIST"
+          run_onert_with_all_config $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN "Linear" "$BACKEND_LIST"
          else
-          run_onert_with_all_config $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN $BENCHMARK_RUN_TEST_SH "$EXECUTORS" "$BACKEND_LIST"
+          run_onert_with_all_config $MODEL $REPORT_MODEL_DIR 0 $BENCHMARK_DRIVER_BIN "$EXECUTORS" "$BACKEND_LIST"
          fi
  
          if [[ $i -ne $(echo $BENCHMARK_MODEL_LIST | wc -w)-1 ]]; then
diff --git a/tests/scripts/nnpkg_test.sh b/tests/scripts/command/nnpkg-test

old mode 100755 (executable)

new mode 100644 (file)

similarity index 84%

rename from tests/scripts/nnpkg_test.sh

rename to tests/scripts/command/nnpkg-test

index cd3e923..a1176d1
--- a/tests/scripts/nnpkg_test.sh
+++ b/tests/scripts/command/nnpkg-test
@@ -9,12 +9,12 @@ command_exists() {
  progname=$(basename "${BASH_SOURCE[0]}")
  indir="."
  outdir="."
-nnpkg_run=${nnpkg_run:-"Product/out/bin/nnpackage_run"}
+nnpkg_run=${nnpkg_run:-"nnpackage_run"}
  difftool=${difftool:-"h5diff"}
  delete_dumped_on_failure=0
  
  usage() {
-  echo "Usage: $progname [options] nnpackage_test"
+  echo "Usage: $0 $progname [options] nnpackage_test"
    echo "Run an nnpackage testcase"
    echo ""
    echo "Returns"
@@ -29,12 +29,12 @@ usage() {
    echo "         (dumped file are always deleted on success) (default=$delete_dumped_on_failure)"
    echo ""
    echo "Environment variables:"
-  echo "   nnpackage_run    path to nnpackage_run (default=Product/out/bin/nnpackage_run)"
+  echo "   nnpackage_run    path to nnpackage_run (default=nnpackage_run)"
    echo "   difftool         path to i5diff or h5diff (default=h5diff)"
    echo ""
    echo "Examples:"
-  echo "    $progname Add_000                => run $indir/Add_000 and check output"
-  echo "    $progname -i nnpkg-tcs Add_000   => run nnpkg-tcs/Add_000 and check output"
+  echo "    $0 $progname Add_000                => run $indir/Add_000 and check output"
+  echo "    $0 $progname -i nnpkg-tcs Add_000   => run nnpkg-tcs/Add_000 and check output"
    exit 1
  }
  
@@ -61,11 +61,6 @@ if [ $# -ne 1 ]; then
    exit 1
  fi
  
-if [ ! -e Product ]; then
-  echo "error: please make sure to run this script in nnfw home."
-  exit 1
-fi
-
  tcname=$(basename "$1")
  nnpkg="$indir/$tcname"
  
@@ -78,6 +73,7 @@ fi
  
  if ! command_exists $nnpkg_run; then
    echo "error: runner "$nnpkg_run" does not exist."
+  echo "       if $nnpkg_run exists, please set PATH to $nnpkg_run"
    exit 1
  fi
  
diff --git a/tests/scripts/command/prepare-model b/tests/scripts/command/prepare-model

new file mode 100644 (file)

index 0000000..feb658c
--- /dev/null
+++ b/tests/scripts/command/prepare-model
@@ -0,0 +1,64 @@
+#!/bin/bash
+#
+# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+COMMAND_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+INSTALL_DIR="$(dirname $(dirname $COMMAND_DIR))"
+
+MD5_CHECK="on"
+DOWNLOAD_MODEL="all"
+
+function Usage()
+{
+    echo "Usage: $0 $(basename ${BASH_SOURCE[0]}) [OPTIONS]"
+    echo ""
+    echo "Options:"
+    echo "      --ignoremd5                         Ignore MD5 check when download model files"
+    echo "      --model=(all|nnpackage|tflite)      Download test model (default=all)"
+}
+
+for i in "$@"
+do
+    case $i in
+        -h|--help|help)
+            Usage
+            exit 1
+            ;;
+        --ignoremd5)
+            MD5_CHECK="off"
+            ;;
+        --model=*)
+            DOWNLOAD_MODEL=${i#*=}
+            ;;
+        *)
+            echo "Unknown option: $i"
+            exit 1
+        ;;
+    esac
+    shift
+done
+
+if [[ $DOWNLOAD_MODEL == "all" ]] || [[ $DOWNLOAD_MODEL == "tflite" ]]; then
+    # Download tflite models
+    $INSTALL_DIR/test/models/run_test.sh --download=on --run=off --md5=$MD5_CHECK
+fi
+
+if [[ $DOWNLOAD_MODEL == "all" ]] || [[ $DOWNLOAD_MODEL == "nnpackage" ]]; then
+    # Download nnpackage model
+    NNPACKAGE_CONFIG_DIR=$INSTALL_DIR/test/models/nnpackage/
+    NNPACKAGE_CACHE_DIR=$INSTALL_DIR/unittest_standalone/nnfw_api_gtest_models/
+    $INSTALL_DIR/test/models/run_test.sh --download=on --run=off --md5=$MD5_CHECK \
+        --configdir=$NNPACKAGE_CONFIG_DIR --cachedir=$NNPACKAGE_CACHE_DIR
+fi
diff --git a/tests/scripts/unittest.sh b/tests/scripts/command/unittest

old mode 100755 (executable)

new mode 100644 (file)

similarity index 72%

rename from tests/scripts/unittest.sh

rename to tests/scripts/command/unittest

index 717779d..135ebea
--- a/tests/scripts/unittest.sh
+++ b/tests/scripts/command/unittest
@@ -14,22 +14,31 @@
  # See the License for the specific language governing permissions and
  # limitations under the License.
  
+COMMAND_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+INSTALL_DIR="$(dirname $(dirname $COMMAND_DIR))"
  UNITTEST_REPORT_DIR=
-UNITTEST_TEST_DIR=
+UNITTEST_TEST_DIR=$INSTALL_DIR/unittest
  UNITTEST_RESULT=0
  UNITTEST_RUN_ALL=""
  
  function Usage()
  {
      # TODO: Fill this
-    echo "Usage: LD_LIBRARY_PATH=Product/out/lib ./$0 --reportdir=report --unittestdir=Product/out/unittest"
+    echo "Usage: $0 $(basename ${BASH_SOURCE[0]}) [OPTIONS]"
+    echo ""
+    echo "Options:"
+    echo "      --reportdir=PATH        Path to write unittest report"
+    echo "      --unittestdir=PATH      Path to run unittest (default: $UNITTEST_TEST_DIR"
  }
  
-get_gtest_option()
+function get_gtest_option()
  {
      local UNITTEST_REPORT_FILE=$(basename $TEST_BIN)
-    local output_option="--gtest_output=xml:$UNITTEST_REPORT_DIR/$UNITTEST_REPORT_FILE.xml"
+    local output_option
      local filter_option
+    if [ -n "$UNITTEST_REPORT_DIR" ]; then
+        output_option="--gtest_output=xml:$UNITTEST_REPORT_DIR/$UNITTEST_REPORT_FILE.xml"
+    fi
      if [ -r "$TEST_BIN.skip" ]; then
        filter_option="--gtest_filter=-$(grep -v '#' "$TEST_BIN.skip" | tr '\n' ':')"
      fi
@@ -49,15 +58,15 @@ do
          --unittestdir=*)
              UNITTEST_TEST_DIR=${i#*=}
              ;;
-        --runall)
-            UNITTEST_RUN_ALL="true"
+        *)
+            echo "Unknown option: $i"
+            exit 1
+        ;;
      esac
      shift
  done
  
-# TODO: handle exceptions for params
-
-if [ ! -e "$UNITTEST_REPORT_DIR" ]; then
+if [ -n "$UNITTEST_REPORT_DIR" ] && [ ! -e "$UNITTEST_REPORT_DIR" ]; then
      mkdir -p $UNITTEST_REPORT_DIR
  fi
  
@@ -73,21 +82,9 @@ for TEST_BIN in `find $UNITTEST_TEST_DIR -maxdepth 1 -type f -executable`; do
      echo "============================================"
      echo "Starting set $num_unittest: $TEST_BIN..."
      echo "============================================"
-    TEMP_UNITTEST_RESULT=0
  
-    if [ "$UNITTEST_RUN_ALL" == "true" ]; then
-        for TEST_LIST_VERBOSE_LINE in $($TEST_BIN --gtest_list_tests); do
-            if [[ $TEST_LIST_VERBOSE_LINE == *\. ]]; then
-                TEST_LIST_CATEGORY=$TEST_LIST_VERBOSE_LINE
-            else
-                TEST_LIST_ITEM="$TEST_LIST_CATEGORY""$TEST_LIST_VERBOSE_LINE"
-                $TEST_BIN --gtest_filter=$TEST_LIST_ITEM --gtest_output="xml:$UNITTEST_REPORT_DIR/$TEST_LIST_ITEM.xml"
-            fi
-        done
-    else
-        $TEST_BIN $(get_gtest_option)
-        TEMP_UNITTEST_RESULT=$?
-    fi
+    $TEST_BIN $(get_gtest_option)
+    TEMP_UNITTEST_RESULT=$?
  
      if [[ $TEMP_UNITTEST_RESULT -ne 0 ]]; then
          UNITTEST_RESULT=$TEMP_UNITTEST_RESULT
diff --git a/tests/scripts/command/verify-tflite b/tests/scripts/command/verify-tflite

new file mode 100644 (file)

index 0000000..48863ff
--- /dev/null
+++ b/tests/scripts/command/verify-tflite
@@ -0,0 +1,106 @@
+#!/bin/bash
+#
+# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+COMMAND_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+INSTALL_DIR="$(dirname $(dirname $COMMAND_DIR))"
+
+MD5_CHECK="on"
+TFLITE_LOADER="nnapi"
+REPORT_DIR="report"
+TEST_LIST_FILE=
+
+function Usage()
+{
+    echo "Usage: $0 $(basename ${BASH_SOURCE[0]}) [OPTIONS]"
+    echo ""
+    echo "Options:"
+    echo "      --ignoremd5             Ignore MD5 check when download model files"
+    echo "      --api=(nnapi|loader)    TFLite model file loading API (default=$TFLITE_LOADER)"
+    echo "      --reportdir=PATH        Path to write report (default=$REPORT_DIR)"
+    echo "      --list=FILE             List file to test. Test all if list option is not passed"
+}
+
+for i in "$@"
+do
+    case $i in
+        -h|--help|help)
+            Usage
+            exit 1
+            ;;
+        --ignoremd5)
+            MD5_CHECK="off"
+            ;;
+        --api=*)
+            TFLITE_LOADER=${i#*=}
+            ;;
+        --reportdir=*)
+            REPORT_DIR=${i#*=}
+            ;;
+        --list=*)
+            TEST_LIST_FILE=${i#*=}
+            ;;
+        *)
+            echo "Unknown option: $i"
+            exit 1
+        ;;
+    esac
+    shift
+done
+
+if [ ! -z "$TEST_LIST_FILE" ]; then
+    MODELLIST=$(cat "${TEST_LIST_FILE}")
+fi
+
+if [ ! -e "$REPORT_DIR" ]; then
+    mkdir -p $REPORT_DIR
+fi
+
+TEST_RESULT=0
+TAP_NAME=verification_test.tap
+TEST_NAME="Verification"
+TEST_DRIVER=
+
+if [[ $TFLITE_LOADER == "nnapi" ]]; then
+    TEST_NAME="NNAPI Verification"
+    TEST_DRIVER=nnapi_test
+elif [[ $TFLITE_LOADER == "loader" ]]; then
+    TEST_NAME="Loader Verification"
+    TEST_DRIVER=tflite_loader_test_tool
+else
+    Usage
+    exit 1
+fi
+
+$INSTALL_DIR/test/models/run_test.sh --driverbin=$TEST_DRIVER \
+    --reportdir=$REPORT_DIR \
+    --tapname=$TAP_NAME \
+    ${MODELLIST:-} > $REPORT_DIR/verification_test.log 2>&1
+TEST_RESULT=$?
+
+if [[ $TEST_RESULT -ne 0 ]]; then
+    echo ""
+    cat $REPORT_DIR/$TAP_NAME
+    echo ""
+    echo "$TEST_NAME failed... exit code: $TEST_RESULT"
+    echo "============================================"
+    echo ""
+    exit $TEST_RESULT
+fi
+
+echo ""
+cat $REPORT_DIR/$TAP_NAME
+echo "============================================"
+echo ""
diff --git a/tests/scripts/common.sh b/tests/scripts/common.sh

index 8800290..87aec86 100755 (executable)
--- a/tests/scripts/common.sh
+++ b/tests/scripts/common.sh
@@ -18,13 +18,12 @@ MY_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
  
  function get_result_of_benchmark_test()
  {
-    local RUN_TEST_SH=$1
-    local DRIVER_BIN=$2
-    local MODEL=$3
-    local LOG_FILE=$4
+    local DRIVER_BIN=$1
+    local MODEL=$2
+    local LOG_FILE=$3
  
      local RET=0
-    $RUN_TEST_SH --driverbin="$DRIVER_BIN  -r 5 -w 3" $MODEL > $LOG_FILE 2>&1
+    $MY_PATH/models/run_test.sh --driverbin="$DRIVER_BIN  -r 5 -w 3" $MODEL > $LOG_FILE 2>&1
      RET=$?
      if [[ $RET -ne 0 ]]; then
          echo "Testing $MODEL aborted... exit code: $RET"
@@ -68,7 +67,7 @@ function run_benchmark_and_print()
      LOG_FILE=$REPORT_MODEL_DIR/$WRITE_FILE_NAME.txt
      RESULT_FILE=$REPORT_MODEL_DIR/$WRITE_FILE_NAME.result
      print_with_dots $MSG
-    RESULT=$(get_result_of_benchmark_test $BENCHMARK_RUN_TEST_SH $DRIVER_BIN $MODEL $LOG_FILE)
+    RESULT=$(get_result_of_benchmark_test $DRIVER_BIN $MODEL $LOG_FILE)
      echo "$RESULT ms"
      print_result_of_benchmark_test "$MSG" "$RESULT" $RESULT_FILE
      sleep $PAUSE_TIME_IN_SEC
diff --git a/tests/scripts/list/frameworktest_list.aarch64.acl_cl.txt b/tests/scripts/list/frameworktest_list.aarch64.acl_cl.txt

index fc7e71a..dd8d3b7 100644 (file)
--- a/tests/scripts/list/frameworktest_list.aarch64.acl_cl.txt
+++ b/tests/scripts/list/frameworktest_list.aarch64.acl_cl.txt
@@ -27,7 +27,7 @@ pack
  pad
  reduce_max
  reduce_mean
-reduce_sum
+reduce_sum/float
  relu
  relu6
  reshape
diff --git a/tests/scripts/list/frameworktest_list.armv7l.acl_cl.txt b/tests/scripts/list/frameworktest_list.armv7l.acl_cl.txt

index fc7e71a..dd8d3b7 100644 (file)
--- a/tests/scripts/list/frameworktest_list.armv7l.acl_cl.txt
+++ b/tests/scripts/list/frameworktest_list.armv7l.acl_cl.txt
@@ -27,7 +27,7 @@ pack
  pad
  reduce_max
  reduce_mean
-reduce_sum
+reduce_sum/float
  relu
  relu6
  reshape
diff --git a/tests/scripts/list/tflite_loader_list.aarch64.txt b/tests/scripts/list/tflite_loader_list.aarch64.txt

index aca8f16..e04d89d 100644 (file)
--- a/tests/scripts/list/tflite_loader_list.aarch64.txt
+++ b/tests/scripts/list/tflite_loader_list.aarch64.txt
@@ -18,7 +18,7 @@ mul
  pack
  pad
  reduce_max
-reduce_sum
+reduce_sum/float
  relu
  relu6
  reshape/reshape1
diff --git a/tests/scripts/list/tflite_loader_list.armv7l.txt b/tests/scripts/list/tflite_loader_list.armv7l.txt

index aca8f16..e04d89d 100644 (file)
--- a/tests/scripts/list/tflite_loader_list.armv7l.txt
+++ b/tests/scripts/list/tflite_loader_list.armv7l.txt
@@ -18,7 +18,7 @@ mul
  pack
  pad
  reduce_max
-reduce_sum
+reduce_sum/float
  relu
  relu6
  reshape/reshape1
diff --git a/tests/scripts/framework/tests/MODELS/inception_module/config.sh b/tests/scripts/models/config/MODELS/inception_module/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/MODELS/inception_module/config.sh

rename to tests/scripts/models/config/MODELS/inception_module/config.sh
diff --git a/tests/scripts/framework/tests/MODELS/inception_nonslim/config.sh b/tests/scripts/models/config/MODELS/inception_nonslim/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/MODELS/inception_nonslim/config.sh

rename to tests/scripts/models/config/MODELS/inception_nonslim/config.sh
diff --git a/tests/scripts/framework/tests/MODELS/inception_slim/config.sh b/tests/scripts/models/config/MODELS/inception_slim/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/MODELS/inception_slim/config.sh

rename to tests/scripts/models/config/MODELS/inception_slim/config.sh
diff --git a/tests/scripts/framework/tests/MODELS/mobilenet/config.sh b/tests/scripts/models/config/MODELS/mobilenet/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/MODELS/mobilenet/config.sh

rename to tests/scripts/models/config/MODELS/mobilenet/config.sh
diff --git a/tests/scripts/framework/tests/MODELS/mobilenet_quant8/config.sh b/tests/scripts/models/config/MODELS/mobilenet_quant8/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/MODELS/mobilenet_quant8/config.sh

rename to tests/scripts/models/config/MODELS/mobilenet_quant8/config.sh
diff --git a/tests/scripts/framework/tests/abs/config.sh b/tests/scripts/models/config/abs/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/abs/config.sh

rename to tests/scripts/models/config/abs/config.sh
diff --git a/tests/scripts/framework/tests/add/1D/config.sh b/tests/scripts/models/config/add/1D/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/add/1D/config.sh

rename to tests/scripts/models/config/add/1D/config.sh
diff --git a/tests/scripts/framework/tests/add/4D/config.sh b/tests/scripts/models/config/add/4D/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/add/4D/config.sh

rename to tests/scripts/models/config/add/4D/config.sh
diff --git a/tests/scripts/framework/tests/average_pool_2d/aligned/config.sh b/tests/scripts/models/config/average_pool_2d/aligned/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/average_pool_2d/aligned/config.sh

rename to tests/scripts/models/config/average_pool_2d/aligned/config.sh
diff --git a/tests/scripts/framework/tests/average_pool_2d/avgpool1/config.sh b/tests/scripts/models/config/average_pool_2d/avgpool1/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/average_pool_2d/avgpool1/config.sh

rename to tests/scripts/models/config/average_pool_2d/avgpool1/config.sh
diff --git a/tests/scripts/framework/tests/average_pool_2d/avgpool2/config.sh b/tests/scripts/models/config/average_pool_2d/avgpool2/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/average_pool_2d/avgpool2/config.sh

rename to tests/scripts/models/config/average_pool_2d/avgpool2/config.sh
diff --git a/tests/scripts/framework/tests/batch_to_space_nd2/config.sh b/tests/scripts/models/config/batch_to_space_nd2/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/batch_to_space_nd2/config.sh

rename to tests/scripts/models/config/batch_to_space_nd2/config.sh
diff --git a/tests/scripts/framework/tests/cast/config.sh b/tests/scripts/models/config/cast/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/cast/config.sh

rename to tests/scripts/models/config/cast/config.sh
diff --git a/tests/scripts/framework/tests/concat/2D/config.sh b/tests/scripts/models/config/concat/2D/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/concat/2D/config.sh

rename to tests/scripts/models/config/concat/2D/config.sh
diff --git a/tests/scripts/framework/tests/concat/concat1/config.sh b/tests/scripts/models/config/concat/concat1/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/concat/concat1/config.sh

rename to tests/scripts/models/config/concat/concat1/config.sh
diff --git a/tests/scripts/framework/tests/concat/concat2/config.sh b/tests/scripts/models/config/concat/concat2/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/concat/concat2/config.sh

rename to tests/scripts/models/config/concat/concat2/config.sh
diff --git a/tests/scripts/framework/tests/conv_2d/convolution1/config.sh b/tests/scripts/models/config/conv_2d/convolution1/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/conv_2d/convolution1/config.sh

rename to tests/scripts/models/config/conv_2d/convolution1/config.sh
diff --git a/tests/scripts/framework/tests/conv_2d/convolution2/config.sh b/tests/scripts/models/config/conv_2d/convolution2/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/conv_2d/convolution2/config.sh

rename to tests/scripts/models/config/conv_2d/convolution2/config.sh
diff --git a/tests/scripts/framework/tests/custom/squared_difference/config.sh b/tests/scripts/models/config/custom/squared_difference/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/custom/squared_difference/config.sh

rename to tests/scripts/models/config/custom/squared_difference/config.sh
diff --git a/tests/scripts/framework/tests/depthwise_conv_2d/depthconv1/config.sh b/tests/scripts/models/config/depthwise_conv_2d/depthconv1/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/depthwise_conv_2d/depthconv1/config.sh

rename to tests/scripts/models/config/depthwise_conv_2d/depthconv1/config.sh
diff --git a/tests/scripts/framework/tests/depthwise_conv_2d/depthconv2/config.sh b/tests/scripts/models/config/depthwise_conv_2d/depthconv2/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/depthwise_conv_2d/depthconv2/config.sh

rename to tests/scripts/models/config/depthwise_conv_2d/depthconv2/config.sh
diff --git a/tests/scripts/framework/tests/depthwise_conv_2d_no_fuse/config.sh b/tests/scripts/models/config/depthwise_conv_2d_no_fuse/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/depthwise_conv_2d_no_fuse/config.sh

rename to tests/scripts/models/config/depthwise_conv_2d_no_fuse/config.sh
diff --git a/tests/scripts/framework/tests/div/broadcast/config.sh b/tests/scripts/models/config/div/broadcast/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/div/broadcast/config.sh

rename to tests/scripts/models/config/div/broadcast/config.sh
diff --git a/tests/scripts/framework/tests/embedding_lookup/config.sh b/tests/scripts/models/config/embedding_lookup/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/embedding_lookup/config.sh

rename to tests/scripts/models/config/embedding_lookup/config.sh
diff --git a/tests/scripts/framework/tests/equal/config.sh b/tests/scripts/models/config/equal/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/equal/config.sh

rename to tests/scripts/models/config/equal/config.sh
diff --git a/tests/scripts/framework/tests/exp/config.sh b/tests/scripts/models/config/exp/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/exp/config.sh

rename to tests/scripts/models/config/exp/config.sh
diff --git a/tests/scripts/framework/tests/floor/floor1/config.sh b/tests/scripts/models/config/floor/floor1/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/floor/floor1/config.sh

rename to tests/scripts/models/config/floor/floor1/config.sh
diff --git a/tests/scripts/framework/tests/floor/floor2/config.sh b/tests/scripts/models/config/floor/floor2/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/floor/floor2/config.sh

rename to tests/scripts/models/config/floor/floor2/config.sh
diff --git a/tests/scripts/framework/tests/fullyconnected/fc1/config.sh b/tests/scripts/models/config/fullyconnected/fc1/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/fullyconnected/fc1/config.sh

rename to tests/scripts/models/config/fullyconnected/fc1/config.sh
diff --git a/tests/scripts/framework/tests/fullyconnected/hybrid/config.sh b/tests/scripts/models/config/fullyconnected/hybrid/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/fullyconnected/hybrid/config.sh

rename to tests/scripts/models/config/fullyconnected/hybrid/config.sh
diff --git a/tests/scripts/framework/tests/fullyconnected/matmul2x2/config.sh b/tests/scripts/models/config/fullyconnected/matmul2x2/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/fullyconnected/matmul2x2/config.sh

rename to tests/scripts/models/config/fullyconnected/matmul2x2/config.sh
diff --git a/tests/scripts/framework/tests/fullyconnected/weights_as_input/config.sh b/tests/scripts/models/config/fullyconnected/weights_as_input/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/fullyconnected/weights_as_input/config.sh

rename to tests/scripts/models/config/fullyconnected/weights_as_input/config.sh
diff --git a/tests/scripts/framework/tests/gather/config.sh b/tests/scripts/models/config/gather/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/gather/config.sh

rename to tests/scripts/models/config/gather/config.sh
diff --git a/tests/scripts/framework/tests/greater/config.sh b/tests/scripts/models/config/greater/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/greater/config.sh

rename to tests/scripts/models/config/greater/config.sh
diff --git a/tests/scripts/framework/tests/greater_equal/config.sh b/tests/scripts/models/config/greater_equal/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/greater_equal/config.sh

rename to tests/scripts/models/config/greater_equal/config.sh
diff --git a/tests/scripts/framework/tests/hashtable_lookup/config.sh b/tests/scripts/models/config/hashtable_lookup/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/hashtable_lookup/config.sh

rename to tests/scripts/models/config/hashtable_lookup/config.sh
diff --git a/tests/scripts/framework/tests/l2_normalization/config.sh b/tests/scripts/models/config/l2_normalization/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/l2_normalization/config.sh

rename to tests/scripts/models/config/l2_normalization/config.sh
diff --git a/tests/scripts/framework/tests/l2_pool_2d/config.sh b/tests/scripts/models/config/l2_pool_2d/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/l2_pool_2d/config.sh

rename to tests/scripts/models/config/l2_pool_2d/config.sh
diff --git a/tests/scripts/framework/tests/less/config.sh b/tests/scripts/models/config/less/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/less/config.sh

rename to tests/scripts/models/config/less/config.sh
diff --git a/tests/scripts/framework/tests/less_equal/config.sh b/tests/scripts/models/config/less_equal/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/less_equal/config.sh

rename to tests/scripts/models/config/less_equal/config.sh
diff --git a/tests/scripts/framework/tests/logistic/config.sh b/tests/scripts/models/config/logistic/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/logistic/config.sh

rename to tests/scripts/models/config/logistic/config.sh
diff --git a/tests/scripts/framework/tests/max/config.sh b/tests/scripts/models/config/max/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/max/config.sh

rename to tests/scripts/models/config/max/config.sh
diff --git a/tests/scripts/framework/tests/max_pool_2d/maxpool1/config.sh b/tests/scripts/models/config/max_pool_2d/maxpool1/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/max_pool_2d/maxpool1/config.sh

rename to tests/scripts/models/config/max_pool_2d/maxpool1/config.sh
diff --git a/tests/scripts/framework/tests/max_pool_2d/maxpool2/config.sh b/tests/scripts/models/config/max_pool_2d/maxpool2/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/max_pool_2d/maxpool2/config.sh

rename to tests/scripts/models/config/max_pool_2d/maxpool2/config.sh
diff --git a/tests/scripts/framework/tests/mean/config.sh b/tests/scripts/models/config/mean/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/mean/config.sh

rename to tests/scripts/models/config/mean/config.sh
diff --git a/tests/scripts/framework/tests/min/config.sh b/tests/scripts/models/config/min/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/min/config.sh

rename to tests/scripts/models/config/min/config.sh
diff --git a/tests/scripts/framework/tests/mul/broadcast/config.sh b/tests/scripts/models/config/mul/broadcast/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/mul/broadcast/config.sh

rename to tests/scripts/models/config/mul/broadcast/config.sh
diff --git a/tests/scripts/framework/tests/neg/config.sh b/tests/scripts/models/config/neg/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/neg/config.sh

rename to tests/scripts/models/config/neg/config.sh
diff --git a/tests/scripts/framework/tests/not_equal/config.sh b/tests/scripts/models/config/not_equal/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/not_equal/config.sh

rename to tests/scripts/models/config/not_equal/config.sh
diff --git a/tests/scripts/framework/tests/one_hot/config.sh b/tests/scripts/models/config/one_hot/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/one_hot/config.sh

rename to tests/scripts/models/config/one_hot/config.sh
diff --git a/tests/scripts/framework/tests/pack/config.sh b/tests/scripts/models/config/pack/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/pack/config.sh

rename to tests/scripts/models/config/pack/config.sh
diff --git a/tests/scripts/framework/tests/pad/4D_2D/config.sh b/tests/scripts/models/config/pad/4D_2D/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/pad/4D_2D/config.sh

rename to tests/scripts/models/config/pad/4D_2D/config.sh
diff --git a/tests/scripts/framework/tests/pad/pad1/config.sh b/tests/scripts/models/config/pad/pad1/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/pad/pad1/config.sh

rename to tests/scripts/models/config/pad/pad1/config.sh
diff --git a/tests/scripts/framework/tests/pad/pad2/config.sh b/tests/scripts/models/config/pad/pad2/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/pad/pad2/config.sh

rename to tests/scripts/models/config/pad/pad2/config.sh
diff --git a/tests/scripts/framework/tests/reduce_max/config.sh b/tests/scripts/models/config/reduce_max/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/reduce_max/config.sh

rename to tests/scripts/models/config/reduce_max/config.sh
diff --git a/tests/scripts/framework/tests/reduce_mean/test1/config.sh b/tests/scripts/models/config/reduce_mean/test1/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/reduce_mean/test1/config.sh

rename to tests/scripts/models/config/reduce_mean/test1/config.sh
diff --git a/tests/scripts/framework/tests/reduce_mean/test2/config.sh b/tests/scripts/models/config/reduce_mean/test2/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/reduce_mean/test2/config.sh

rename to tests/scripts/models/config/reduce_mean/test2/config.sh
diff --git a/tests/scripts/framework/tests/reduce_sum/config.sh b/tests/scripts/models/config/reduce_sum/float/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/reduce_sum/config.sh

rename to tests/scripts/models/config/reduce_sum/float/config.sh
diff --git a/tests/scripts/models/config/reduce_sum/uint8/config.sh b/tests/scripts/models/config/reduce_sum/uint8/config.sh

new file mode 100755 (executable)

index 0000000..d7d9f73
--- /dev/null
+++ b/tests/scripts/models/config/reduce_sum/uint8/config.sh
@@ -0,0 +1 @@
+MODELFILE_NAME="reduce_sum_uint8.tflite"
diff --git a/tests/scripts/framework/tests/relu/config.sh b/tests/scripts/models/config/relu/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/relu/config.sh

rename to tests/scripts/models/config/relu/config.sh
diff --git a/tests/scripts/framework/tests/relu6/config.sh b/tests/scripts/models/config/relu6/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/relu6/config.sh

rename to tests/scripts/models/config/relu6/config.sh
diff --git a/tests/scripts/framework/tests/reshape/3D/config.sh b/tests/scripts/models/config/reshape/3D/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/reshape/3D/config.sh

rename to tests/scripts/models/config/reshape/3D/config.sh
diff --git a/tests/scripts/framework/tests/reshape/reshape1/config.sh b/tests/scripts/models/config/reshape/reshape1/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/reshape/reshape1/config.sh

rename to tests/scripts/models/config/reshape/reshape1/config.sh
diff --git a/tests/scripts/framework/tests/reshape/reshape2/config.sh b/tests/scripts/models/config/reshape/reshape2/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/reshape/reshape2/config.sh

rename to tests/scripts/models/config/reshape/reshape2/config.sh
diff --git a/tests/scripts/framework/tests/resize_bilinear/config.sh b/tests/scripts/models/config/resize_bilinear/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/resize_bilinear/config.sh

rename to tests/scripts/models/config/resize_bilinear/config.sh
diff --git a/tests/scripts/framework/tests/rnn/config.sh b/tests/scripts/models/config/rnn/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/rnn/config.sh

rename to tests/scripts/models/config/rnn/config.sh
diff --git a/tests/scripts/framework/tests/rsqrt/config.sh b/tests/scripts/models/config/rsqrt/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/rsqrt/config.sh

rename to tests/scripts/models/config/rsqrt/config.sh
diff --git a/tests/scripts/framework/tests/select/config.sh b/tests/scripts/models/config/select/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/select/config.sh

rename to tests/scripts/models/config/select/config.sh
diff --git a/tests/scripts/framework/tests/shape/config.sh b/tests/scripts/models/config/shape/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/shape/config.sh

rename to tests/scripts/models/config/shape/config.sh
diff --git a/tests/scripts/framework/tests/sin/config.sh b/tests/scripts/models/config/sin/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/sin/config.sh

rename to tests/scripts/models/config/sin/config.sh
diff --git a/tests/scripts/framework/tests/slice/config.sh b/tests/scripts/models/config/slice/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/slice/config.sh

rename to tests/scripts/models/config/slice/config.sh
diff --git a/tests/scripts/framework/tests/softmax/config.sh b/tests/scripts/models/config/softmax/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/softmax/config.sh

rename to tests/scripts/models/config/softmax/config.sh
diff --git a/tests/scripts/framework/tests/space_to_batch_nd2/config.sh b/tests/scripts/models/config/space_to_batch_nd2/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/space_to_batch_nd2/config.sh

rename to tests/scripts/models/config/space_to_batch_nd2/config.sh
diff --git a/tests/scripts/framework/tests/space_to_depth/config.sh b/tests/scripts/models/config/space_to_depth/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/space_to_depth/config.sh

rename to tests/scripts/models/config/space_to_depth/config.sh
diff --git a/tests/scripts/framework/tests/sqrt/config.sh b/tests/scripts/models/config/sqrt/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/sqrt/config.sh

rename to tests/scripts/models/config/sqrt/config.sh
diff --git a/tests/scripts/framework/tests/squeeze/config.sh b/tests/scripts/models/config/squeeze/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/squeeze/config.sh

rename to tests/scripts/models/config/squeeze/config.sh
diff --git a/tests/scripts/framework/tests/strided_slice/config.sh b/tests/scripts/models/config/strided_slice/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/strided_slice/config.sh

rename to tests/scripts/models/config/strided_slice/config.sh
diff --git a/tests/scripts/framework/tests/sub/broadcast/config.sh b/tests/scripts/models/config/sub/broadcast/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/sub/broadcast/config.sh

rename to tests/scripts/models/config/sub/broadcast/config.sh
diff --git a/tests/scripts/framework/tests/tanh/config.sh b/tests/scripts/models/config/tanh/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/tanh/config.sh

rename to tests/scripts/models/config/tanh/config.sh
diff --git a/tests/scripts/framework/tests/tile/config.sh b/tests/scripts/models/config/tile/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/tile/config.sh

rename to tests/scripts/models/config/tile/config.sh
diff --git a/tests/scripts/framework/tests/topk_v2/config.sh b/tests/scripts/models/config/topk_v2/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/topk_v2/config.sh

rename to tests/scripts/models/config/topk_v2/config.sh
diff --git a/tests/scripts/framework/tests/transpose/config.sh b/tests/scripts/models/config/transpose/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/transpose/config.sh

rename to tests/scripts/models/config/transpose/config.sh
diff --git a/tests/scripts/framework/tests/transpose_conv/same/config.sh b/tests/scripts/models/config/transpose_conv/same/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/transpose_conv/same/config.sh

rename to tests/scripts/models/config/transpose_conv/same/config.sh
diff --git a/tests/scripts/framework/tests/transpose_conv/valid/config.sh b/tests/scripts/models/config/transpose_conv/valid/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/transpose_conv/valid/config.sh

rename to tests/scripts/models/config/transpose_conv/valid/config.sh
diff --git a/tests/scripts/framework/tests/zeros_like/config.sh b/tests/scripts/models/config/zeros_like/config.sh

similarity index 100%

rename from tests/scripts/framework/tests/zeros_like/config.sh

rename to tests/scripts/models/config/zeros_like/config.sh
diff --git a/tests/scripts/framework/run_test.sh b/tests/scripts/models/run_test.sh

similarity index 81%

rename from tests/scripts/framework/run_test.sh

rename to tests/scripts/models/run_test.sh

index 44b7149..0aa363f 100755 (executable)
--- a/tests/scripts/framework/run_test.sh
+++ b/tests/scripts/models/run_test.sh
@@ -18,20 +18,28 @@
  MY_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
  NNFW_HOME="$(dirname $(dirname $(dirname ${MY_PATH})))"
  CACHE_ROOT_PATH=$MY_PATH/"cache"
-TEST_ROOT_PATH=$MY_PATH/"tests"
+TEST_ROOT_PATH=$MY_PATH/"config"
  REPORT_DIR="report"
  
  RUN_DISABLED="true"
  
+function command_exists() {
+       command -v "$@" > /dev/null 2>&1
+}
+
  function Usage()
  {
      echo "Usage: ./$0 --driverbin={such as tflite_run} {tests to test or empty for all of tests}"
      echo "Usage: ./$0 --driverbin=Product/out/bin/tflite_run --reportdir=report --tapname=verification.tap avgpool1 avgpool2"
      echo ""
-    echo "--download            - (default=off) Download model files. Other options is ignored"
-    echo "--driverbin           - (default=../../Product/out/bin/tflite_run) runner for runnning framework tests"
-    echo "--reportdir           - (default=report) directory to place tap files"
-    echo "--tapname             - (default=framework_test.tap) file name to be written for tap"
+    echo "--download            - (default=on) Download model files"
+    echo "--run                 - (default=on) Test model files"
+    echo "--driverbin           - (default=../../Product/out/bin/tflite_run) Runner for runnning model tests"
+    echo "--reportdir           - (default=report) Directory to place tap files"
+    echo "--tapname             - (default=framework_test.tap) File name to be written for tap"
+    echo "--md5                 - (default=on) MD5 check when download model files"
+    echo "--configdir           - (default=$TEST_ROOT_PATH) Config directory to download and test model"
+    echo "--cachedir            - (default=$CACHE_ROOT_PATH) Directory to download model"
      echo ""
  }
  
@@ -43,9 +51,13 @@ function need_download()
          return 0;
      fi
      # Ignore checking md5 in cache
+    # TODO Use "--md5" option only and remove IGNORE_MD5 environment variable
      if [ ! -z $IGNORE_MD5 ] && [ "$IGNORE_MD5" == "1" ]; then
          return 1
      fi
+    if [ "$MD5_CHECK" = "off" ]; then
+        return 1
+    fi
  
      LOCAL_HASH=$(md5sum $LOCAL_PATH | awk '{ print $1 }')
      REMOTE_HASH=$(curl -ss $REMOTE_URL | md5sum  | awk '{ print $1 }')
@@ -60,7 +72,9 @@ function need_download()
  DRIVER_BIN=""
  TAP_NAME="framework_test.tap"
  TEST_LIST=()
-DOWNLOAD_MODE="off"
+DOWNLOAD_MODEL="on"
+RUN_TEST="on"
+MD5_CHECK="on"
  
  # Support environment variable setting for mirror server
  FIXED_MODELFILE_SERVER="${MODELFILE_SERVER:-}"
@@ -84,6 +98,18 @@ do
          --download=*)
              DOWNLOAD_MODE=${i#*=}
              ;;
+        --md5=*)
+            MD5_CHECK=${i#*=}
+            ;;
+        --run=*)
+            RUN_TEST=${i#*=}
+            ;;
+        --configdir=*)
+            TEST_ROOT_PATH=${i#*=}
+            ;;
+        --cachedir=*)
+            CACHE_ROOT_PATH=${i#*=}
+            ;;
          *)
              TEST_LIST+=( $i )
              ;;
@@ -99,8 +125,13 @@ if [ ! -n "$DRIVER_BIN" ]; then
      DRIVER_BIN="$NNFW_HOME/Product/out/bin/tflite_run"
  fi
  
+if [ ! -d "$TEST_ROOT_PATH" ]; then
+    echo "Cannot find config directory for test: please set proper configdir"
+    exit 1
+fi
+
  # Check test driver setting
-if [ ! -e $DRIVER_BIN ] && [ "$DOWNLOAD_MODE" != "on" ]; then
+if [ ! command_exists $DRIVER_BIN ] && [ "$RUN_TEST" = "on" ]; then
      echo "Cannot find test driver" $DRIVER_BIN ": please set proper DRIVER_BIN"
      exit 1
  fi
@@ -139,33 +170,9 @@ run_tests()
  
          TEST_CACHE_PATH=$CACHE_ROOT_PATH/$TEST_NAME
          MODELFILE=$TEST_CACHE_PATH/$MODELFILE_NAME
-        MODELFILE_URL="$MODELFILE_SERVER_PATH/$MODELFILE_NAME"
-        if [ -n  "$FIXED_MODELFILE_SERVER" ]; then
-            MODELFILE_URL="$FIXED_MODELFILE_SERVER/$MODELFILE_NAME"
-        fi
-
-        # Download model file
-        if [ ! -e $TEST_CACHE_PATH ]; then
-            mkdir -p $TEST_CACHE_PATH
-        fi
-
-        # Download unless we have it in cache (Also check md5sum)
-        if need_download "$MODELFILE" "$MODELFILE_URL"; then
-            echo ""
-            echo "Download test file for $TEST_NAME"
-            echo "======================"
-
-            rm -f $MODELFILE # Remove invalid file if exists
-            pushd $TEST_CACHE_PATH
-            wget -nv $MODELFILE_URL
-            if [ "${MODELFILE_NAME##*.}" == "zip" ]; then
-                unzip -o $MODELFILE_NAME
-            fi
-            popd
-        fi
  
          # Find model file for downloaded by zip
-        if [ "${MODELFILE_NAME##*.}" == "zip" ]; then
+        if [ "${MODELFILE_NAME##*.}" = "zip" ]; then
              pushd $TEST_CACHE_PATH
              MODELFILE=$TEST_CACHE_PATH/$(ls *.tflite)
              popd
@@ -178,7 +185,6 @@ run_tests()
          # Run driver to test framework
          $DRIVER_BIN $MODELFILE
  
-        #$DRIVER_BIN $MODELFILE
          if [[ $? -eq 0 ]]; then
              echo "ok $i - $TEST_NAME" >> $REPORT_DIR/$TAP_NAME
          else
@@ -268,10 +274,11 @@ find_tests()
  mkdir -p $REPORT_DIR
  TESTS_TO_RUN=$(find_tests ${TEST_LIST[@]})
  
-if [[ "$DOWNLOAD_MODE" == "on" ]]; then
+if [ "$DOWNLOAD_MODEL" = "on" ]; then
      download_tests $TESTS_TO_RUN
-    exit 0;
  fi
  
-run_tests $TESTS_TO_RUN
+if [ "$RUN_TEST" = "on" ]; then
+    run_tests $TESTS_TO_RUN
+fi
  exit $?
diff --git a/tests/scripts/onert-test b/tests/scripts/onert-test

new file mode 100644 (file)

index 0000000..99c107c
--- /dev/null
+++ b/tests/scripts/onert-test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+[[ "${BASH_SOURCE[0]}" != "${0}" ]] && echo "Please don't source ${BASH_SOURCE[0]}, execute it" && return
+
+DRIVER_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+INSTALL_PATH="$(dirname $DRIVER_PATH)"
+COMMAND_PATH=$INSTALL_PATH/test/command
+BIN_PATH=$INSTALL_PATH/bin
+
+export PATH=$BIN_PATH:$PATH
+
+function Usage()
+{
+  echo "Usage: $0 [COMMAND] ..."
+  echo "Command:"
+  for file in $COMMAND_PATH/*;
+  do
+    echo "    $(basename "$file")"
+  done
+  exit 255
+}
+
+COMMAND=$1; shift
+if [[ -z $COMMAND ]] || [[ $COMMAND == "--help" ]]; then
+  Usage
+  exit 255
+fi
+
+COMMAND_FILE=$COMMAND_PATH/$COMMAND
+if [[ ! -f $COMMAND_FILE ]]; then
+  echo "ERROR: '$COMMAND' is not supported"
+  exit 255
+fi
+
+source $COMMAND_FILE $@
diff --git a/tests/scripts/test-driver.sh b/tests/scripts/test-driver.sh

index 615fc2c..aa97d95 100755 (executable)
--- a/tests/scripts/test-driver.sh
+++ b/tests/scripts/test-driver.sh
@@ -27,7 +27,6 @@ function Usage()
      echo "--artifactpath            - (default={test-driver.sh's path}/../../) it should contain tests/ and Product/"
      echo ""
      echo "Following options are needed when you want to tests of specific types. If you don't pass any one, unittest and verification will be run"
-    echo "--unittest                - (default=on) run unit test"
      echo "--frameworktest           - (default=off) run framework test"
      echo "--verification            - (default=on) run verification"
      echo "--frameworktest_list_file - filepath of model list for test"
@@ -38,8 +37,6 @@ function Usage()
      echo "etc."
      echo "--framework_driverbin     - (default=../../Product/out/bin/tflite_run) runner for runnning framework tests"
      echo "--verification_driverbin  - (default=../../Product/out/bin/nnapi_test) runner for runnning verification tests"
-    echo "--runtestsh               - (default=\$ARTIFACT_PATH/tests/scripts/framework/run_test.sh) run_test.sh with path where it is for framework test and verification"
-    echo "--unittestdir             - (default=\$ARTIFACT_PATH/Product/out/unittest) directory that has unittest binaries for unit test"
      echo ""
      echo "--reportdir               - (default=\$ARTIFACT_PATH/report) directory to save report"
      echo ""
@@ -49,10 +46,7 @@ TEST_DRIVER_DIR="$( cd "$( dirname "${BASH_SOURCE}" )" && pwd )"
  ARTIFACT_PATH="$TEST_DRIVER_DIR/../../"
  FRAMEWORK_DRIVER_BIN=""
  VERIFICATION_DRIVER_BIN=""
-RUN_TEST_SH=""
-UNIT_TEST_DIR=""
  ALLTEST_ON="true"
-UNITTEST_ON="false"
  FRAMEWORKTEST_ON="false"
  VERIFICATION_ON="false"
  BENCHMARK_ONERT_OP_ON="false"
@@ -74,16 +68,6 @@ do
          --verification_driverbin=*)
              VERIFICATION_DRIVER_BIN=${i#*=}
              ;;
-        --runtestsh=*)
-            RUN_TEST_SH=${i#*=}
-            ;;
-        --unittestdir=*)
-            UNIT_TEST_DIR=${i#*=}
-            ;;
-        --unittest)
-            ALLTEST_ON="false"
-            UNITTEST_ON="true"
-            ;;
          --frameworktest)
              ALLTEST_ON="false"
              FRAMEWORKTEST_ON="true"
@@ -116,15 +100,6 @@ done
  
  ARTIFACT_PATH="$(readlink -f $ARTIFACT_PATH)"
  
-if [ -z "$RUN_TEST_SH" ]; then
-    RUN_TEST_SH=$ARTIFACT_PATH/tests/scripts/framework/run_test.sh
-fi
-
-if [ ! -e "$RUN_TEST_SH" ]; then
-    echo "Cannot find $RUN_TEST_SH"
-    exit 1
-fi
-
  if [ -z "$UNIT_TEST_DIR" ]; then
      UNIT_TEST_DIR=$ARTIFACT_PATH/Product/out/unittest
  fi
@@ -135,13 +110,6 @@ fi
  
  source $TEST_DRIVER_DIR/common.sh
  
-# Run unittest in each part such as Runtime
-if [ "$ALLTEST_ON" == "true" ] || [ "$UNITTEST_ON" == "true" ]; then
-    $TEST_DRIVER_DIR/unittest.sh \
-        --reportdir=$REPORT_DIR \
-        --unittestdir=$UNIT_TEST_DIR
-fi
-
  # Run tflite_run with various tflite models
  if [ "$FRAMEWORKTEST_ON" == "true" ]; then
      if [ -z "$FRAMEWORK_DRIVER_BIN" ]; then
@@ -149,7 +117,6 @@ if [ "$FRAMEWORKTEST_ON" == "true" ]; then
      fi
  
      $TEST_DRIVER_DIR/test_framework.sh \
-        --runtestsh=$RUN_TEST_SH \
          --driverbin=$FRAMEWORK_DRIVER_BIN \
          --reportdir=$REPORT_DIR \
          --tapname=framework_test.tap \
@@ -166,7 +133,6 @@ if [ "$ALLTEST_ON" == "true" ] || [ "$VERIFICATION_ON" == "true" ]; then
  
      # verification uses the same script as frameworktest does
      $TEST_DRIVER_DIR/test_framework.sh \
-        --runtestsh=$RUN_TEST_SH \
          --driverbin=$VERIFICATION_DRIVER_BIN \
          --reportdir=$REPORT_DIR \
          --tapname=verification_test.tap \
@@ -180,10 +146,9 @@ if [ "$BENCHMARK_ONERT_OP_ON" == "true" ]; then
  
      $TEST_DRIVER_DIR/benchmark_nnapi.sh \
          --test_op \
-        --runtestsh=$RUN_TEST_SH \
          --driverbin=$DRIVER_BIN \
          --reportdir=$REPORT_DIR/benchmark_op \
-        --modelfilepath=$ARTIFACT_PATH/tests/scripts/framework
+        --modelfilepath=$ARTIFACT_PATH/tests/scripts/models
  fi
  
  # Make json file. Actually, this process is only needed on CI. That's why it is in test-driver.sh.
diff --git a/tests/scripts/test_framework.sh b/tests/scripts/test_framework.sh

index 1d97515..6bf9c89 100755 (executable)
--- a/tests/scripts/test_framework.sh
+++ b/tests/scripts/test_framework.sh
@@ -14,7 +14,8 @@
  # See the License for the specific language governing permissions and
  # limitations under the License.
  
-FWTEST_RUN_TEST_SH=
+MY_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
  FWTEST_DRIVER_BIN=
  FWTEST_REPORT_DIR=
  FWTEST_TAP_NAME=
@@ -25,7 +26,6 @@ function Usage()
  {
      echo "Usage Example:"
      echo "./$0 \\"
-    echo "  --runtestsh=tests/scripts/framework/run_test.sh \\ # Test runner script path"
      echo "  --driverbin=Product/out/bin/tflite_run \\  # Test driver path"
      echo "  --frameworktest_list_file=tests/scripts/list/frameworktest_list.armv7l.cpu.txt \\"
      echo "  --reportdir=report \\            # Directory for the report files will be saved"
@@ -42,9 +42,6 @@ do
          -h|--help|help)
              Usage
              ;;
-        --runtestsh=*)
-            FWTEST_RUN_TEST_SH=${i#*=}
-            ;;
          --driverbin=*)
              FWTEST_DRIVER_BIN=${i#*=}
              ;;
@@ -67,7 +64,6 @@ do
      shift
  done
  
-[ ! -z "$FWTEST_RUN_TEST_SH" ] || Usage
  [ ! -z "$FWTEST_DRIVER_BIN" ] || Usage
  [ ! -z "$FWTEST_REPORT_DIR" ] || Usage
  [ ! -z "$FWTEST_TAP_NAME" ] || Usage
@@ -86,7 +82,7 @@ if [ ! -z "$FRAMEWORKTEST_LIST_FILE" ]; then
      MODELLIST=$(cat "${FRAMEWORKTEST_LIST_FILE}")
  fi
  
-$FWTEST_RUN_TEST_SH --driverbin=$FWTEST_DRIVER_BIN \
+$MY_PATH/models/run_test.sh --driverbin=$FWTEST_DRIVER_BIN \
      --reportdir=$FWTEST_REPORT_DIR \
      --tapname=$FWTEST_TAP_NAME \
      ${MODELLIST:-} \
diff --git a/tests/scripts/test_scheduler_with_profiling.sh b/tests/scripts/test_scheduler_with_profiling.sh

index 8b2f2d2..5c24572 100755 (executable)
--- a/tests/scripts/test_scheduler_with_profiling.sh
+++ b/tests/scripts/test_scheduler_with_profiling.sh
@@ -11,7 +11,7 @@ TEST_DRIVER_DIR="$( cd "$( dirname "${BASH_SOURCE}" )" && pwd )"
  ARTIFACT_PATH="$TEST_DRIVER_DIR/../.."
  BENCHMARK_DRIVER_BIN=$ARTIFACT_PATH/Product/out/bin/tflite_run
  REPORT_DIR=$ARTIFACT_PATH/report
-RUN_TEST_SH=$ARTIFACT_PATH/tests/scripts/framework/run_test.sh
+RUN_TEST_SH=$ARTIFACT_PATH/tests/scripts/models/run_test.sh
  BENCHMARK_MODEL_LIST="MODELS/inception_nonslim MODELS/inception_slim MODELS/mobilenet"
  
  if [ ! -e "$RUN_TEST_SH" ]; then
diff --git a/tests/tools/nnpackage_run/CMakeLists.txt b/tests/tools/nnpackage_run/CMakeLists.txt

index 0e333a0..ec45db4 100644 (file)
--- a/tests/tools/nnpackage_run/CMakeLists.txt
+++ b/tests/tools/nnpackage_run/CMakeLists.txt
@@ -33,7 +33,7 @@ target_include_directories(nnpackage_run PRIVATE src)
  target_include_directories(nnpackage_run PRIVATE ${Boost_INCLUDE_DIRS})
  
  target_link_libraries(nnpackage_run onert_core onert tflite_loader)
-target_link_libraries(nnpackage_run tensorflow-lite ${LIB_PTHREAD} dl nnfw_lib_tflite jsoncpp)
+target_link_libraries(nnpackage_run nnfw_lib_tflite jsoncpp)
  target_link_libraries(nnpackage_run nnfw-dev)
  target_link_libraries(nnpackage_run ${Boost_PROGRAM_OPTIONS_LIBRARY})
  target_link_libraries(nnpackage_run nnfw_lib_benchmark)
diff --git a/tests/tools/nnpackage_run/src/args.cc b/tests/tools/nnpackage_run/src/args.cc

index 0dbcafc..cb4a7db 100644 (file)
--- a/tests/tools/nnpackage_run/src/args.cc
+++ b/tests/tools/nnpackage_run/src/args.cc
@@ -16,6 +16,7 @@
  
  #include "args.h"
  
+#include <functional>
  #include <iostream>
  #include <json/json.h>
  
@@ -105,6 +106,75 @@ Args::Args(const int argc, char **argv)
  
  void Args::Initialize(void)
  {
+  auto process_nnpackage = [&](const std::string &package_filename) {
+    _package_filename = package_filename;
+
+    std::cerr << "Package Filename " << _package_filename << std::endl;
+    if (_package_filename.empty())
+    {
+      // TODO Print usage instead of the below message
+      std::cerr << "Please specify nnpackage file. Run with `--help` for usage."
+                << "\n";
+
+      exit(1);
+    }
+    else
+    {
+      if (access(_package_filename.c_str(), F_OK) == -1)
+      {
+        std::cerr << "nnpackage not found: " << _package_filename << "\n";
+      }
+    }
+  };
+
+  auto process_output_sizes = [&](const std::string &output_sizes_json_str) {
+    Json::Value root;
+    Json::Reader reader;
+    if (!reader.parse(output_sizes_json_str, root, false))
+    {
+      std::cerr << "Invalid JSON format for output_sizes \"" << output_sizes_json_str << "\"\n";
+      exit(1);
+    }
+
+    auto arg_map = argArrayToMap(root);
+    for (auto &pair : arg_map)
+    {
+      uint32_t key = pair.first;
+      Json::Value &val_json = pair.second;
+      if (!val_json.isUInt())
+      {
+        std::cerr << "All the values in `output_sizes` must be unsigned integers\n";
+        exit(1);
+      }
+      uint32_t val = val_json.asUInt();
+      _output_sizes[key] = val;
+    }
+  };
+
+  auto process_shape_prepare = [&](const std::string &shape_str) {
+    try
+    {
+      handleShapeParam(_shape_prepare, shape_str);
+    }
+    catch (const std::exception &e)
+    {
+      std::cerr << "error with '--shape_prepare' option: " << shape_str << std::endl;
+      exit(1);
+    }
+  };
+
+  auto process_shape_run = [&](const std::string &shape_str) {
+    try
+    {
+      handleShapeParam(_shape_run, shape_str);
+    }
+    catch (const std::exception &e)
+    {
+      std::cerr << "error with '--shape_run' option: " << shape_str << std::endl;
+      exit(1);
+    }
+  };
+
    // General options
    po::options_description general("General options", 100);
  
@@ -112,32 +182,33 @@ void Args::Initialize(void)
    general.add_options()
      ("help,h", "Print available options")
      ("version", "Print version and exit immediately")
-    ("nnpackage", po::value<std::string>()->required())
+    ("nnpackage", po::value<std::string>()->required()->notifier(process_nnpackage))
  #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
-    ("dump,d", po::value<std::string>()->default_value(""), "Output filename")
-    ("load,l", po::value<std::string>()->default_value(""), "Input filename")
+    ("dump,d", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _dump_filename = v; }), "Output filename")
+    ("load,l", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _load_filename = v; }), "Input filename")
  #endif
-    ("output_sizes", po::value<std::string>(),
+    ("output_sizes", po::value<std::string>()->notifier(process_output_sizes),
          "The output buffer size in JSON 1D array\n"
          "If not given, the model's output sizes are used\n"
          "e.g. '[0, 40, 2, 80]' to set 0th tensor to 40 and 2nd tensor to 80.\n")
-    ("num_runs,r", po::value<int>()->default_value(1), "The number of runs")
-    ("warmup_runs,w", po::value<int>()->default_value(0), "The number of warmup runs")
-    ("run_delay,t", po::value<int>()->default_value(-1), "Delay time(ms) between runs (as default no delay")
-    ("gpumem_poll,g", po::value<bool>()->default_value(false), "Check gpu memory polling separately")
-    ("mem_poll,m", po::value<bool>()->default_value(false), "Check memory polling")
-    ("write_report,p", po::value<bool>()->default_value(false),
+    ("num_runs,r", po::value<int>()->default_value(1)->notifier([&](const auto &v) { _num_runs = v; }), "The number of runs")
+    ("warmup_runs,w", po::value<int>()->default_value(0)->notifier([&](const auto &v) { _warmup_runs = v; }), "The number of warmup runs")
+    ("run_delay,t", po::value<int>()->default_value(-1)->notifier([&](const auto &v) { _run_delay = v; }), "Delay time(ms) between runs (as default no delay")
+    ("gpumem_poll,g", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _gpumem_poll = v; }), "Check gpu memory polling separately")
+    ("mem_poll,m", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _mem_poll = v; }), "Check memory polling")
+    ("write_report,p", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _write_report = v; }),
           "Write report\n"
           "{exec}-{nnpkg}-{backend}.csv will be generated.\n"
           "e.g. nnpackage_run-UNIT_Add_000-acl_cl.csv.\n"
           "{nnpkg} name may be changed to realpath if you use symbolic-link.")
-    ("shape_prepare", po::value<std::string>()->default_value("[]"),
+    ("shape_prepare", po::value<std::string>()->default_value("[]")->notifier(process_shape_prepare),
           "set shape of specified tensor before compilation\n"
           "e.g. '[0, [1, 2], 2, []]' to set 0th tensor to [1, 2] and 2nd tensor to [].\n")
-    ("shape_run", po::value<std::string>()->default_value("[]"),
+    ("shape_run", po::value<std::string>()->default_value("[]")->notifier(process_shape_run),
           "set shape of specified tensor right before running\n"
           "e.g. '[1, [1, 2]]` to set 1st tensor to [1, 2].\n")
-    ("verbose_level,v", po::value<int>()->default_value(0), "Verbose level\n"
+    ("verbose_level,v", po::value<int>()->default_value(0)->notifier([&](const auto &v) { _verbose_level = v; }),
+         "Verbose level\n"
           "0: prints the only result. Messages btw run don't print\n"
           "1: prints result and message btw run\n"
           "2: prints all of messages to print\n")
@@ -180,158 +251,23 @@ void Args::Parse(const int argc, char **argv)
      return;
    }
  
-  po::notify(vm);
    try
    {
-#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
-    if (vm.count("dump"))
-    {
-      _dump_filename = vm["dump"].as<std::string>();
-    }
-
-    if (vm.count("load"))
-    {
-      _load_filename = vm["load"].as<std::string>();
-    }
-#endif
-
-    if (vm.count("nnpackage"))
-    {
-      _package_filename = vm["nnpackage"].as<std::string>();
-
-      if (_package_filename.empty())
-      {
-        // TODO Print usage instead of the below message
-        std::cerr << "Please specify nnpackage file. Run with `--help` for usage."
-                  << "\n";
-
-        exit(1);
-      }
-      else
-      {
-        if (access(_package_filename.c_str(), F_OK) == -1)
-        {
-          std::cerr << "nnpackage not found: " << _package_filename << "\n";
-        }
-      }
-    }
-
-    if (vm.count("output_sizes"))
-    {
-      auto output_sizes_json_str = vm["output_sizes"].as<std::string>();
-
-      Json::Value root;
-      Json::Reader reader;
-      if (!reader.parse(output_sizes_json_str, root, false))
-      {
-        std::cerr << "Invalid JSON format for output_sizes \"" << output_sizes_json_str << "\"\n";
-        exit(1);
-      }
-
-      auto arg_map = argArrayToMap(root);
-      for (auto &pair : arg_map)
-      {
-        uint32_t key = pair.first;
-        Json::Value &val_json = pair.second;
-        if (!val_json.isUInt())
-        {
-          std::cerr << "All the values in `output_sizes` must be unsigned integers\n";
-          exit(1);
-        }
-        uint32_t val = val_json.asUInt();
-        _output_sizes[key] = val;
-      }
-    }
-
-    if (vm.count("num_runs"))
-    {
-      _num_runs = vm["num_runs"].as<int>();
-    }
-
-    if (vm.count("warmup_runs"))
-    {
-      _warmup_runs = vm["warmup_runs"].as<int>();
-    }
-
-    if (vm.count("run_delay"))
-    {
-      _run_delay = vm["run_delay"].as<int>();
-    }
-
-    if (vm.count("gpumem_poll"))
-    {
-      _gpumem_poll = vm["gpumem_poll"].as<bool>();
-    }
-
-    if (vm.count("mem_poll"))
-    {
-      _mem_poll = vm["mem_poll"].as<bool>();
-      // Instead of EXECUTE to avoid overhead, memory polling runs on WARMUP
-      if (_mem_poll && _warmup_runs == 0)
-      {
-        _warmup_runs = 1;
-      }
-    }
-
-    if (vm.count("write_report"))
-    {
-      _write_report = vm["write_report"].as<bool>();
-    }
-
-    if (vm.count("verbose_level"))
-    {
-      _verbose_level = vm["verbose_level"].as<int>();
-    }
+    po::notify(vm);
    }
    catch (const std::bad_cast &e)
    {
-    std::cerr << "error by bad cast" << e.what() << '\n';
+    std::cerr << "Bad cast error - " << e.what() << '\n';
      exit(1);
    }
  
-  if (vm.count("shape_prepare"))
-  {
-    std::string shape_str;
-    try
-    {
-      shape_str = vm["shape_prepare"].as<std::string>();
-    }
-    catch (const std::bad_cast &e)
-    {
-      std::cerr << "error by bad cast with '--shape_prepare' option" << e.what() << '\n';
-      exit(1);
-    }
-    try
-    {
-      handleShapeParam(_shape_prepare, shape_str);
-    }
-    catch (const std::exception &e)
-    {
-      std::cerr << "error with '--shape_prepare' option: " << shape_str << std::endl;
-      exit(1);
-    }
-  }
-
-  if (vm.count("shape_run"))
+  // This must be run after `notify` as `_warm_up_runs` must have been processed before.
+  if (vm.count("mem_poll"))
    {
-    std::string shape_str;
-    try
-    {
-      shape_str = vm["shape_run"].as<std::string>();
-    }
-    catch (const std::bad_cast &e)
+    // Instead of EXECUTE to avoid overhead, memory polling runs on WARMUP
+    if (_mem_poll && _warmup_runs == 0)
      {
-      std::cerr << "error by bad cast with '--shape_run' option" << e.what() << '\n';
-      exit(1);
-    }
-    try
-    {
-      handleShapeParam(_shape_run, shape_str);
-    }
-    catch (const std::exception &e)
-    {
-      std::cerr << "error with '--shape_run' option: " << shape_str << std::endl;
-      exit(1);
+      _warmup_runs = 1;
      }
    }
  }
diff --git a/tests/tools/nnpackage_run/src/h5formatter.cc b/tests/tools/nnpackage_run/src/h5formatter.cc

index 34c075c..09ace47 100644 (file)
--- a/tests/tools/nnpackage_run/src/h5formatter.cc
+++ b/tests/tools/nnpackage_run/src/h5formatter.cc
@@ -145,6 +145,7 @@ void H5Formatter::dumpOutputs(const std::string &filename, std::vector<Allocatio
            data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT64);
            break;
          }
+        case NNFW_TYPE_TENSOR_UINT8:
          case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
          {
            H5::DataSet data_set =
@@ -159,13 +160,6 @@ void H5Formatter::dumpOutputs(const std::string &filename, std::vector<Allocatio
            data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT8);
            break;
          }
-        case NNFW_TYPE_TENSOR_UINT8:
-        {
-          H5::DataSet data_set =
-              value_group.createDataSet(std::to_string(i), H5::PredType::STD_U8BE, data_space);
-          data_set.write(outputs[i].data(), H5::PredType::NATIVE_UINT8);
-          break;
-        }
          default:
            throw std::runtime_error("nnpkg_run can dump f32, i32, qasymm8, bool and uint8.");
        }
diff --git a/tests/tools/nnpackage_run/src/nnpackage_run.cc b/tests/tools/nnpackage_run/src/nnpackage_run.cc

index ccad10e..88d3307 100644 (file)
--- a/tests/tools/nnpackage_run/src/nnpackage_run.cc
+++ b/tests/tools/nnpackage_run/src/nnpackage_run.cc
@@ -22,7 +22,7 @@
  #endif
  #include "nnfw.h"
  #include "nnfw_util.h"
-#include "nnfw_debug.h"
+#include "nnfw_internal.h"
  #include "randomgen.h"
  #ifdef RUY_PROFILER
  #include "ruy/profiler/profiler.h"
diff --git a/tests/tools/tflite_loader/CMakeLists.txt b/tests/tools/tflite_loader/CMakeLists.txt

index 5a9e3a8..0fe1c69 100644 (file)
--- a/tests/tools/tflite_loader/CMakeLists.txt
+++ b/tests/tools/tflite_loader/CMakeLists.txt
@@ -17,7 +17,7 @@ add_executable(tflite_loader_test_tool ${SOURCES})
  target_include_directories(tflite_loader_test_tool PRIVATE ${Boost_INCLUDE_DIRS})
  
  target_link_libraries(tflite_loader_test_tool onert_core onert tflite_loader)
-target_link_libraries(tflite_loader_test_tool nnfw_lib_tflite tensorflow-lite ${LIB_PTHREAD} dl nnfw_lib_misc)
+target_link_libraries(tflite_loader_test_tool nnfw_lib_tflite nnfw_lib_misc)
  target_link_libraries(tflite_loader_test_tool ${Boost_PROGRAM_OPTIONS_LIBRARY} ${Boost_SYSTEM_LIBRARY} ${Boost_FILESYSTEM_LIBRARY})
  
  install(TARGETS tflite_loader_test_tool DESTINATION bin)
diff --git a/tests/tools/tflite_run/CMakeLists.txt b/tests/tools/tflite_run/CMakeLists.txt

index 19e7126..3f30d3e 100644 (file)
--- a/tests/tools/tflite_run/CMakeLists.txt
+++ b/tests/tools/tflite_run/CMakeLists.txt
@@ -13,7 +13,7 @@ add_executable(tflite_run ${TFLITE_RUN_SRCS})
  target_include_directories(tflite_run PRIVATE src)
  target_include_directories(tflite_run PRIVATE ${Boost_INCLUDE_DIRS})
  
-target_link_libraries(tflite_run tensorflow-lite ${LIB_PTHREAD} dl nnfw_lib_tflite)
+target_link_libraries(tflite_run nnfw_lib_tflite)
  target_link_libraries(tflite_run ${Boost_PROGRAM_OPTIONS_LIBRARY})
  
  target_link_libraries(tflite_run nnfw_lib_benchmark)
diff --git a/tests/tools/tflite_run/src/args.cc b/tests/tools/tflite_run/src/args.cc

index fac2a6e..f8f581b 100644 (file)
--- a/tests/tools/tflite_run/src/args.cc
+++ b/tests/tools/tflite_run/src/args.cc
@@ -37,6 +37,39 @@ Args::Args(const int argc, char **argv) noexcept
  
  void Args::Initialize(void)
  {
+  auto process_input = [&](const std::string &v) {
+    _input_filename = v;
+
+    if (!_input_filename.empty())
+    {
+      if (access(_input_filename.c_str(), F_OK) == -1)
+      {
+        std::cerr << "input image file not found: " << _input_filename << "\n";
+      }
+    }
+  };
+
+  auto process_tflite = [&](const std::string &v) {
+    _tflite_filename = v;
+
+    if (_tflite_filename.empty())
+    {
+      // TODO Print usage instead of the below message
+      std::cerr << "Please specify tflite file. Run with `--help` for usage."
+                << "\n";
+
+      exit(1);
+    }
+    else
+    {
+      if (access(_tflite_filename.c_str(), F_OK) == -1)
+      {
+        std::cerr << "tflite file not found: " << _tflite_filename << "\n";
+        exit(1);
+      }
+    }
+  };
+
    try
    {
      // General options
@@ -45,19 +78,19 @@ void Args::Initialize(void)
      // clang-format off
    general.add_options()
      ("help,h", "Display available options")
-    ("input,i", po::value<std::string>()->default_value(""), "Input filename")
-    ("dump,d", po::value<std::string>()->default_value(""), "Output filename")
-    ("ishapes", po::value<std::vector<int>>()->multitoken(), "Input shapes")
-    ("compare,c", po::value<std::string>()->default_value(""), "filename to be compared with")
-    ("tflite", po::value<std::string>()->required())
-    ("num_runs,r", po::value<int>()->default_value(1), "The number of runs")
-    ("warmup_runs,w", po::value<int>()->default_value(0), "The number of warmup runs")
-    ("run_delay,t", po::value<int>()->default_value(-1), "Delay time(ms) between runs (as default no delay")
-    ("gpumem_poll,g", po::value<bool>()->default_value(false), "Check gpu memory polling separately")
+    ("input,i", po::value<std::string>()->default_value("")->notifier(process_input), "Input filename")
+    ("dump,d", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _dump_filename = v; }), "Output filename")
+    ("ishapes", po::value<std::vector<int>>()->multitoken()->notifier([&](const auto &v) { _input_shapes = v; }), "Input shapes")
+    ("compare,c", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _compare_filename = v; }), "filename to be compared with")
+    ("tflite", po::value<std::string>()->required()->notifier(process_tflite))
+    ("num_runs,r", po::value<int>()->default_value(1)->notifier([&](const auto &v) { _num_runs = v; }), "The number of runs")
+    ("warmup_runs,w", po::value<int>()->default_value(0)->notifier([&](const auto &v) { _warmup_runs = v; }), "The number of warmup runs")
+    ("run_delay,t", po::value<int>()->default_value(-1)->notifier([&](const auto &v) { _run_delay = v; }), "Delay time(ms) between runs (as default no delay)")
+    ("gpumem_poll,g", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _gpumem_poll = v; }), "Check gpu memory polling separately")
      ("mem_poll,m", po::value<bool>()->default_value(false), "Check memory polling")
-    ("write_report,p", po::value<bool>()->default_value(false), "Write report")
-    ("validate", po::value<bool>()->default_value(true), "Validate tflite model")
-    ("verbose_level,v", po::value<int>()->default_value(0), "Verbose level\n"
+    ("write_report,p", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _write_report = v; }), "Write report")
+    ("validate", po::value<bool>()->default_value(true)->notifier([&](const auto &v) { _tflite_validate = v; }), "Validate tflite model")
+    ("verbose_level,v", po::value<int>()->default_value(0)->notifier([&](const auto &v) { _verbose_level = v; }), "Verbose level\n"
           "0: prints the only result. Messages btw run don't print\n"
           "1: prints result and message btw run\n"
           "2: prints all of messages to print\n")
@@ -105,80 +138,7 @@ void Args::Parse(const int argc, char **argv)
  
    po::notify(vm);
  
-  if (vm.count("dump"))
-  {
-    _dump_filename = vm["dump"].as<std::string>();
-  }
-
-  if (vm.count("compare"))
-  {
-    _compare_filename = vm["compare"].as<std::string>();
-  }
-
-  if (vm.count("input"))
-  {
-    _input_filename = vm["input"].as<std::string>();
-
-    if (!_input_filename.empty())
-    {
-      if (access(_input_filename.c_str(), F_OK) == -1)
-      {
-        std::cerr << "input image file not found: " << _input_filename << "\n";
-      }
-    }
-  }
-
-  if (vm.count("ishapes"))
-  {
-    _input_shapes.resize(vm["ishapes"].as<std::vector<int>>().size());
-    for (auto i = 0; i < _input_shapes.size(); i++)
-    {
-      _input_shapes[i] = vm["ishapes"].as<std::vector<int>>()[i];
-    }
-  }
-
-  if (vm.count("tflite"))
-  {
-    _tflite_filename = vm["tflite"].as<std::string>();
-
-    if (_tflite_filename.empty())
-    {
-      // TODO Print usage instead of the below message
-      std::cerr << "Please specify tflite file. Run with `--help` for usage."
-                << "\n";
-
-      exit(1);
-    }
-    else
-    {
-      if (access(_tflite_filename.c_str(), F_OK) == -1)
-      {
-        std::cerr << "tflite file not found: " << _tflite_filename << "\n";
-        exit(1);
-      }
-    }
-  }
-
-  if (vm.count("num_runs"))
-  {
-    _num_runs = vm["num_runs"].as<int>();
-  }
-
-  if (vm.count("warmup_runs"))
-  {
-    _warmup_runs = vm["warmup_runs"].as<int>();
-  }
-
-  if (vm.count("run_delay"))
-  {
-    _run_delay = vm["run_delay"].as<int>();
-  }
-
-  if (vm.count("gpumem_poll"))
-  {
-    _gpumem_poll = vm["gpumem_poll"].as<bool>();
-  }
-
+  // This must be run after `notify` as `_warm_up_runs` must have been processed before.
    if (vm.count("mem_poll"))
    {
      _mem_poll = vm["mem_poll"].as<bool>();
@@ -188,21 +148,6 @@ void Args::Parse(const int argc, char **argv)
        _warmup_runs = 1;
      }
    }
-
-  if (vm.count("write_report"))
-  {
-    _write_report = vm["write_report"].as<bool>();
-  }
-
-  if (vm.count("validate"))
-  {
-    _tflite_validate = vm["validate"].as<bool>();
-  }
-
-  if (vm.count("verbose_level"))
-  {
-    _verbose_level = vm["verbose_level"].as<int>();
-  }
  }
  
  } // end of namespace TFLiteRun
diff --git a/tests/tools/tflite_run_2_2_0/CMakeLists.txt b/tests/tools/tflite_run_2_2_0/CMakeLists.txt

deleted file mode 100644 (file)

index a2c85c5..0000000
--- a/tests/tools/tflite_run_2_2_0/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-if(NOT BUILD_TFLITE_RUN_2_2_0)
-  return()
-endif()
-
-if(NOT BUILD_TENSORFLOW_LITE_2_2_0)
-  set(BUILD_TENSORFLOW_LITE_2_2_0 ON)
-endif()
-
-nnfw_find_package(TensorFlowLite-2.2.0 REQUIRED)
-nnfw_find_package(Boost REQUIRED)
-
-list(APPEND TFLITE_RUN_SRCS "src/tflite_run_2_2_0.cc")
-list(APPEND TFLITE_RUN_SRCS "src/args.cc")
-
-add_executable(tflite_run_2_2_0 ${TFLITE_RUN_SRCS})
-target_include_directories(tflite_run_2_2_0 PRIVATE src)
-target_include_directories(tflite_run_2_2_0 PRIVATE ${Boost_INCLUDE_DIRS})
-
-target_link_libraries(tflite_run_2_2_0 tensorflow-lite-2.2.0 ${LIB_PTHREAD} dl)
-target_link_libraries(tflite_run_2_2_0 ${Boost_PROGRAM_OPTIONS_LIBRARY})
-target_link_libraries(tflite_run_2_2_0 nnfw_lib_benchmark nnfw_lib_misc)
-
-install(TARGETS tflite_run_2_2_0 DESTINATION bin)
diff --git a/tests/tools/tflite_vanilla_run/CMakeLists.txt b/tests/tools/tflite_vanilla_run/CMakeLists.txt

new file mode 100644 (file)

index 0000000..19e21e9
--- /dev/null
+++ b/tests/tools/tflite_vanilla_run/CMakeLists.txt
@@ -0,0 +1,23 @@
+if(NOT BUILD_TFLITE_VANILLA_RUN)
+  return()
+endif()
+
+if(NOT BUILD_TENSORFLOW_LITE_2_3_0)
+  set(BUILD_TENSORFLOW_LITE_2_3_0 ON)
+endif()
+
+nnfw_find_package(TensorFlowLite-2.3.0 REQUIRED)
+nnfw_find_package(Boost REQUIRED)
+
+list(APPEND TFLITE_RUN_SRCS "src/tflite_vanilla_run.cc")
+list(APPEND TFLITE_RUN_SRCS "src/args.cc")
+
+add_executable(tflite_vanilla_run ${TFLITE_RUN_SRCS})
+target_include_directories(tflite_vanilla_run PRIVATE src)
+target_include_directories(tflite_vanilla_run PRIVATE ${Boost_INCLUDE_DIRS})
+
+target_link_libraries(tflite_vanilla_run tensorflow-lite-2.3.0 ${LIB_PTHREAD} dl)
+target_link_libraries(tflite_vanilla_run ${Boost_PROGRAM_OPTIONS_LIBRARY})
+target_link_libraries(tflite_vanilla_run nnfw_lib_benchmark nnfw_lib_misc)
+
+install(TARGETS tflite_vanilla_run DESTINATION bin)
diff --git a/tests/tools/tflite_run_2_2_0/src/args.cc b/tests/tools/tflite_vanilla_run/src/args.cc

similarity index 98%

rename from tests/tools/tflite_run_2_2_0/src/args.cc

rename to tests/tools/tflite_vanilla_run/src/args.cc

index 1f89692..dc9f250 100644 (file)
--- a/tests/tools/tflite_run_2_2_0/src/args.cc
+++ b/tests/tools/tflite_vanilla_run/src/args.cc
@@ -18,7 +18,7 @@
  
  #include <iostream>
  
-namespace TFLiteRun220
+namespace TFLiteVanillaRun
  {
  
  Args::Args(const int argc, char **argv) noexcept
@@ -205,4 +205,4 @@ void Args::Parse(const int argc, char **argv)
    }
  }
  
-} // end of namespace TFLiteRun220
+} // end of namespace TFLiteVanillaRun
diff --git a/tests/tools/tflite_run_2_2_0/src/args.h b/tests/tools/tflite_vanilla_run/src/args.h

similarity index 92%

rename from tests/tools/tflite_run_2_2_0/src/args.h

rename to tests/tools/tflite_vanilla_run/src/args.h

index 630d50b..3605b65 100644 (file)
--- a/tests/tools/tflite_run_2_2_0/src/args.h
+++ b/tests/tools/tflite_vanilla_run/src/args.h
@@ -14,15 +14,15 @@
   * limitations under the License.
   */
  
-#ifndef __TFLITE_RUN_2_2_0_ARGS_H__
-#define __TFLITE_RUN_2_2_0_ARGS_H__
+#ifndef __TFLITE_VANILLA_RUN_ARGS_H__
+#define __TFLITE_VANILLA_RUN_ARGS_H__
  
  #include <string>
  #include <boost/program_options.hpp>
  
  namespace po = boost::program_options;
  
-namespace TFLiteRun220
+namespace TFLiteVanillaRun
  {
  
  class Args
@@ -68,6 +68,6 @@ private:
    int _verbose_level;
  };
  
-} // end of namespace TFLiteRun220
+} // end of namespace TFLiteVanillaRun
  
-#endif // __TFLITE_RUN_2_2_0_ARGS_H__
+#endif // __TFLITE_VANILLA_RUN_ARGS_H__
diff --git a/tests/tools/tflite_run_2_2_0/src/tensor_view.h b/tests/tools/tflite_vanilla_run/src/tensor_view.h

similarity index 94%

rename from tests/tools/tflite_run_2_2_0/src/tensor_view.h

rename to tests/tools/tflite_vanilla_run/src/tensor_view.h

index ef9dfbc..ca04a05 100644 (file)
--- a/tests/tools/tflite_run_2_2_0/src/tensor_view.h
+++ b/tests/tools/tflite_vanilla_run/src/tensor_view.h
@@ -20,8 +20,8 @@
   * @ingroup  COM_AI_RUNTIME
   */
  
-#ifndef __TFLITE_RUN_2_2_0_TENSOR_VIEW_H__
-#define __TFLITE_RUN_2_2_0_TENSOR_VIEW_H__
+#ifndef __TFLITE_VANILLA_RUN_TENSOR_VIEW_H__
+#define __TFLITE_VANILLA_RUN_TENSOR_VIEW_H__
  
  #include "tensorflow/lite/interpreter.h"
  
@@ -30,7 +30,7 @@
  #include "misc/tensor/Reader.h"
  #include "misc/tensor/NonIncreasingStride.h"
  
-namespace TFLiteRun220
+namespace TFLiteVanillaRun
  {
  
  /**
@@ -112,6 +112,6 @@ public:
    }
  };
  
-} // namespace TFLiteRun220
+} // namespace TFLiteVanillaRun
  
-#endif // __TFLITE_RUN_2_2_0_TENSOR_VIEW_H__
+#endif // __TFLITE_VANILLA_RUN_TENSOR_VIEW_H__
diff --git a/tests/tools/tflite_run_2_2_0/src/tflite_run_2_2_0.cc b/tests/tools/tflite_vanilla_run/src/tflite_vanilla_run.cc

similarity index 96%

rename from tests/tools/tflite_run_2_2_0/src/tflite_run_2_2_0.cc

rename to tests/tools/tflite_vanilla_run/src/tflite_vanilla_run.cc

index ca80e1c..d44ea60 100644 (file)
--- a/tests/tools/tflite_run_2_2_0/src/tflite_run_2_2_0.cc
+++ b/tests/tools/tflite_vanilla_run/src/tflite_vanilla_run.cc
@@ -79,7 +79,7 @@ int main(const int argc, char **argv)
  {
    tflite::StderrReporter error_reporter;
  
-  TFLiteRun220::Args args(argc, argv);
+  TFLiteVanillaRun::Args args(argc, argv);
  
    std::chrono::milliseconds t_model_load(0), t_prepare(0);
  
@@ -148,7 +148,7 @@ int main(const int argc, char **argv)
      if (tensor->type == kTfLiteInt32)
      {
        // Generate singed 32-bit integer (s32) input
-      auto tensor_view = TFLiteRun220::TensorView<int32_t>::make(*interpreter, o);
+      auto tensor_view = TFLiteVanillaRun::TensorView<int32_t>::make(*interpreter, o);
  
        int32_t value = 0;
  
@@ -163,7 +163,7 @@ int main(const int argc, char **argv)
      else if (tensor->type == kTfLiteUInt8)
      {
        // Generate unsigned 8-bit integer input
-      auto tensor_view = TFLiteRun220::TensorView<uint8_t>::make(*interpreter, o);
+      auto tensor_view = TFLiteVanillaRun::TensorView<uint8_t>::make(*interpreter, o);
  
        uint8_t value = 0;
  
@@ -177,7 +177,7 @@ int main(const int argc, char **argv)
      else if (tensor->type == kTfLiteBool)
      {
        // Generate bool input
-      auto tensor_view = TFLiteRun220::TensorView<bool>::make(*interpreter, o);
+      auto tensor_view = TFLiteVanillaRun::TensorView<bool>::make(*interpreter, o);
  
        auto fp = static_cast<bool (nnfw::misc::RandomGenerator::*)(
            const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
diff --git a/tools/nnpackage_tool/nncc-tc-to-nnpkg-tc/nncc-tc-to-nnpkg-tc.sh b/tools/nnpackage_tool/nncc-tc-to-nnpkg-tc/nncc-tc-to-nnpkg-tc.sh

index cf3e544..bbc5b3e 100755 (executable)
--- a/tools/nnpackage_tool/nncc-tc-to-nnpkg-tc/nncc-tc-to-nnpkg-tc.sh
+++ b/tools/nnpackage_tool/nncc-tc-to-nnpkg-tc/nncc-tc-to-nnpkg-tc.sh
@@ -62,6 +62,7 @@ tflite
  "
  
  model_type=""
+tf_intf_version=""
  
  for ext in $supported_model_types; do
    [ -e "$indir/$tcname"."$ext" ] && model_type=$ext
@@ -73,7 +74,9 @@ if [[ "$model_type" == "" ]]; then
  fi
  
  if [[ "$model_type" == "pb" ]]; then
-  $tf2nnpkg --info "$indir/$tcname".info --graphdef "$indir/$tcname"."$model_type" -o "$outdir"
+  [ -f "$indir/$tcname"."v2" ] && tf_intf_version="--v2"
+  $tf2nnpkg --info "$indir/$tcname".info --graphdef "$indir/$tcname"."$model_type" \
+  "$tf_intf_version" -o "$outdir"
  else
    $model2nnpkg -o "$outdir" "$indir/$tcname"."$model_type"
  fi
diff --git a/tools/nnpackage_tool/sth2nnpkgtc/pb2nnpkgtc.md b/tools/nnpackage_tool/sth2nnpkgtc/pb2nnpkgtc.md

index df90d0a..faf66fb 100644 (file)
--- a/tools/nnpackage_tool/sth2nnpkgtc/pb2nnpkgtc.md
+++ b/tools/nnpackage_tool/sth2nnpkgtc/pb2nnpkgtc.md
@@ -55,7 +55,7 @@ test_model.conv2d_transpose
  
  # @ target
  $ OP_BACKEND_ALLOPS=cpu \
-tests/scripts/nnpkg_test.sh test_model.conv2d_transpose
+onert/test/onert-test nnpkg-test test_model.conv2d_transpose
  [  Run  ] ./test_model.out   Pass
  [Compare] ./test_model.out   Pass
  ```
diff --git a/tools/nnpackage_tool/sth2nnpkgtc/tflite2nnpkgtc.md b/tools/nnpackage_tool/sth2nnpkgtc/tflite2nnpkgtc.md

index dab6ba4..9f28eba 100644 (file)
--- a/tools/nnpackage_tool/sth2nnpkgtc/tflite2nnpkgtc.md
+++ b/tools/nnpackage_tool/sth2nnpkgtc/tflite2nnpkgtc.md
@@ -42,5 +42,5 @@ nnpkg-tcs/cast
  
  # @ target
  # run nnpkg with nnpackage_run and compare with h5diff
-$ tests/scripts/nnpkg_test.sh -i nnpkg-tcs cast
+$ onert/test/onert-test nnpkg-test -i nnpkg-tcs cast
  ```
diff --git a/tools/tflitefile_tool/select_operator.py b/tools/tflitefile_tool/select_operator.py

index 1ad44a3..333ca32 100755 (executable)
--- a/tools/tflitefile_tool/select_operator.py
+++ b/tools/tflitefile_tool/select_operator.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
  
  # Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
  #
@@ -1180,23 +1180,6 @@ def GenerateModel(args, new_builder, sample_model, operator_list, new_input_tens
      return tflite.Model.ModelEnd(new_builder)
  
  
-def Finish(new_builder, new_model):
-    # Cusrom implementation: identifier
-    # Python API don't support identifier input yet
-    # Reference: Finish(self, rootTable)) in builder.py, Finish(uoffset_t root, const char *file_identifier, bool size_prefix) in flatbuffers.h
-    new_builder.Prep(new_builder.minalign,
-                     flatbuffers.number_types.UOffsetTFlags.bytewidth)
-
-    new_builder.PrependByte(0x33)
-    new_builder.PrependByte(0x4c)
-    new_builder.PrependByte(0x46)
-    new_builder.PrependByte(0x54)
-
-    new_builder.PrependUOffsetTRelative(new_model)
-    new_builder.finished = True
-    return new_builder.Head()
-
-
  def main(args):
      input_model_file = args.input_model
      oplist_file = args.opcode_list
@@ -1343,7 +1326,7 @@ def main(args):
                                new_input_tensors, new_output_tensors, used_tensors_dic,
                                used_buffers_dic, used_opcodes_dic, used_subgraphs_dic)
  
-    Finish(new_builder, new_model)
+    new_builder.Finish(new_model, file_identifier=b'TFL3')
      new_buf = new_builder.Output()
  
      output_model_file.write(new_buf)
diff --git a/tools/tflkit/README.md b/tools/tflkit/README.md

index a0c40c6..9e18834 100644 (file)
--- a/tools/tflkit/README.md
+++ b/tools/tflkit/README.md
@@ -1,4 +1,4 @@
-# tflkit
+# tflkit
  
  ## Purpose
  
@@ -114,11 +114,11 @@ Number of all operators                       :  126       (total instrs: 11,484,469
  
  ### TensorFlow
  
-TensorFlow provides some kinds of converting guideline. In Python, the [TFLiteConverter](https://www.tensorflow.org/api_docs/python/tf/contrib/lite/TFLiteConverter) class will help you to convert a TensorFlow GraphDef or SavedModel into `output_format` using TOCO. The `output_format` can be `TFLITE` or `GRAPHVIZ_DOT` format. The default `output_format` is `TFLITE`. And there is a Python command line interface for running TOCO, and its name is [tflite_convert](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/python/tflite_convert.py). This converts a TensorFlow GraphDef or SavedModel into `TFLITE` or `GRAPHVIZ_DOT` format like [TFLiteConverter](https://www.tensorflow.org/api_docs/python/tf/contrib/lite/TFLiteConverter). These two way also supports to convert a TensorFlow Keras model into `output_format`. Both functions are implemented using a tool called TOCO.
+TensorFlow provides some kinds of converting guideline. In Python, the [TFLiteConverter](https://www.tensorflow.org/api_docs/python/tf/lite/TFLiteConverter) class will help you to convert a TensorFlow GraphDef or SavedModel into `output_format` using TOCO. The `output_format` can be `TFLITE` or `GRAPHVIZ_DOT` format. The default `output_format` is `TFLITE`. And there is a Python command line interface for running TOCO, and its name is [tflite_convert](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/python/tflite_convert.py). This converts a TensorFlow GraphDef or SavedModel into `TFLITE` or `GRAPHVIZ_DOT` format like [TFLiteConverter](https://www.tensorflow.org/api_docs/python/tf/lite/TFLiteConverter). These two way also supports to convert a TensorFlow Keras model into `output_format`. Both functions are implemented using a tool called TOCO.
  
  ### with tflkit
  
-The tflkit uses the [tflite_convert](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/python/tflite_convert.py) python command line interface to convert a TensorFlow model into TfLite model. It only supports to convert a TensorFlow GraphDef file into `TFLITE` format file. This tool supports the creation of individual `TFLITE` files for different input shapes. When converting to multiple `TFLITE` files, it needs to put a string called `NAME` in `TFLITE_PATH`. The string `NAME` will be replaced by what is listed in teh `NAME` environment. This tool requires an information file as a parameter. There is an [example file](info/convert.template) for a convert information. The `--tensorflow_path` and `--tensorflow_version` can change the TensorFlow location. By default, it uses `externals/tensorflow` directory.
+The tflkit uses the [tflite_convert](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/python/tflite_convert.py) python command line interface to convert a TensorFlow model into TfLite model. It only supports to convert a TensorFlow GraphDef file into `TFLITE` format file. This tool supports the creation of individual `TFLITE` files for different input shapes. When converting to multiple `TFLITE` files, it needs to put a string called `NAME` in `TFLITE_PATH`. The string `NAME` will be replaced by what is listed in the `NAME` environment. This tool requires an information file as a parameter. There is an [example file](convert.template) for a convert information. The `--tensorflow_path` and `--tensorflow_version` can change the TensorFlow location. By default, it uses `externals/tensorflow` directory.
  
  Convert information:
    * GRAPHDEF_PATH : Full filepath of file containing frozen TensorFlow GraphDef.
@@ -176,7 +176,7 @@ The input and output file of this tool is a TensorFlow GraphDef file.
  
  ### with tflkit
  
-The [optimize_for_inference.sh](optimize_for_inference.sh) file invokes the TensorFlow [optimize tool](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/optimize_for_inference.py). This tool requires a optimize information file as a parameter. Here is an [example file](info/optimize.template) for this tool. The information file needs `INPUT` and `OUTPUT` array names. The [summarize_pb.sh](summarize_pb.sh) file will help you to define the `INPUT` and `OUTPUT` array names. The `--tensorflow_path` can change the TensorFlow location. By default, it uses `externals/tensorflow` directory.
+The [optimize_for_inference.sh](optimize_for_inference.sh) file invokes the TensorFlow [optimize tool](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/optimize_for_inference.py). This tool requires a optimize information file as a parameter. Here is an [example file](optimize.template) for this tool. The information file needs `INPUT` and `OUTPUT` array names. The [summarize_pb.sh](summarize_pb.sh) file will help you to define the `INPUT` and `OUTPUT` array names. The `--tensorflow_path` can change the TensorFlow location. By default, it uses `externals/tensorflow` directory.
  
  Optimize information:
    * GRAPHDEF_PATH : Full filepath of file containing frozen TensorFlow GraphDef.
@@ -207,7 +207,7 @@ The trained TensorFlow model can be trasformed by some variants to deploy it in
  
  ### with tflkit
  
-The [transform_graph.sh](transform_graph.sh) file supports to transform a TensorFlow GraphDef using various transform options. This tool requires a transform information file as a parameter and the transform options are described in the information file. There is an [example file](info/transform.template) for this tool. The information file needs `INPUT` and `OUTPUT` array names. The [summarize_pb.sh](summarize_pb.sh) file will help you to define the `INPUT` and `OUTPUT` array names. The `--tensorflow_path` can change the TensorFlow location. By default, it uses `externals/tensorflow` directory.
+The [transform_graph.sh](transform_graph.sh) file supports to transform a TensorFlow GraphDef using various transform options. This tool requires a transform information file as a parameter and the transform options are described in the information file. There is an [example file](transform.template) for this tool. The information file needs `INPUT` and `OUTPUT` array names. The [summarize_pb.sh](summarize_pb.sh) file will help you to define the `INPUT` and `OUTPUT` array names. The `--tensorflow_path` can change the TensorFlow location. By default, it uses `externals/tensorflow` directory.
  
  Transform information:
    * GRAPHDEF_PATH : Full filepath of file containing frozen TensorFlow GraphDef.
@@ -270,7 +270,7 @@ The [freeze_graph](https://github.com/tensorflow/tensorflow/blob/master/tensorfl
  
  ### with tflkit
  
-The tflkit provides the simple way to create a frozen graph using [freeze_graph](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/freeze_graph.py) tool. This tool requires an information file as a parameter. There is an [example file](info/freeze.info) for a freeze tool. Either `SAVED_MODEL` or `META_GRAPH` must be declared. And `META_GRAPH` is always used with `CKPT_PATH`. The `--tensorflow_path` can change the TensorFlow location. By default, it uses `externals/tensorflow` directory.
+The tflkit provides the simple way to create a frozen graph using [freeze_graph](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/freeze_graph.py) tool. This tool requires an information file as a parameter. There is an [example file](freeze.template) for a freeze tool. Either `SAVED_MODEL` or `META_GRAPH` must be declared. And `META_GRAPH` is always used with `CKPT_PATH`. The `--tensorflow_path` can change the TensorFlow location. By default, it uses `externals/tensorflow` directory.
  
  Freeze information:
    * SAVED_MODEL : Full directory path with TensorFlow `SavedModel` file and variables.
diff --git a/tools/update_version/update-version b/tools/update_version/update-version

index 4169327..1b77c10 100644 (file)
--- a/tools/update_version/update-version
+++ b/tools/update_version/update-version
@@ -40,11 +40,12 @@ fi
  
  version=$1
  
-sed -i "s/^release = .*/release = \'$version\'/" ${nnfw_root}/docs/conf.py
-sed -i "s/^Version: .*/Version: $version/" ${nnfw_root}/packaging/nnfw.spec
+perl -pi -e "s/^release = .*/release = \'$version\'/" ${nnfw_root}/docs/conf.py
  
-IFS=. read M m p <<< $version
+perl -pi -e "s/^Version: .*/Version: $version/" ${nnfw_root}/packaging/nnfw.spec
+
+IFS=. read M m p <<< "$version"
  hex=$(printf '0x%08x' $(( (($M << 24)) | (($m << 8)) | $p )))
-sed -i "s/^#define NNFW_VERSION.*/#define NNFW_VERSION $hex/" ${nnfw_root}/runtime/onert/api/include/nnfw_version.h
+perl -pi -e "s/^#define NNFW_VERSION.*/#define NNFW_VERSION $hex/" ${nnfw_root}/runtime/onert/api/include/nnfw_version.h
  
-sed -i "s/versionName .*$/versionName \"$version\"/" ${nnfw_root}/runtime/contrib/android/api/build.gradle
+perl -pi -e "s/versionName .*$/versionName \"$version\"/" ${nnfw_root}/runtime/contrib/android/api/build.gradle
author	Chunseok Lee <chunseok.lee@samsung.com>
	Fri, 14 Aug 2020 06:19:19 +0000 (15:19 +0900)
committer	Chunseok Lee <chunseok.lee@samsung.com>
	Fri, 14 Aug 2020 06:19:19 +0000 (15:19 +0900)