Imported Upstream version 1.9.0

author Chunseok Lee <chunseok.lee@samsung.com>

Sat, 5 Sep 2020 12:49:46 +0000 (21:49 +0900)

committer Chunseok Lee <chunseok.lee@samsung.com>

Sat, 5 Sep 2020 12:49:46 +0000 (21:49 +0900)
author Chunseok Lee <chunseok.lee@samsung.com>
Sat, 5 Sep 2020 12:49:46 +0000 (21:49 +0900)
committer Chunseok Lee <chunseok.lee@samsung.com>
Sat, 5 Sep 2020 12:49:46 +0000 (21:49 +0900)
diff --git a/Makefile.template b/Makefile.template

index a21937d..1b2f564 100644 (file)
--- a/Makefile.template
+++ b/Makefile.template
@@ -154,14 +154,14 @@ runtime_tar_internal: $(TIMESTAMP_BUILD) install_internal
         tar -zcf $(WORKSPACE)/nnfw-package.tar.gz -C $(INSTALL_PATH) lib
         tar -zcf $(WORKSPACE)/nnfw-devel-package.tar.gz -C $(INSTALL_PATH) include/nnfw
         tar -zcf $(WORKSPACE)/nnfw-plugin-devel-package.tar.gz -C $(INSTALL_PATH) include/onert
-       tar -zcf $(WORKSPACE)/nnfw-test-package.tar.gz -C ${INSTALL_PATH} bin test unittest unittest_standalone
+       tar -zcf $(WORKSPACE)/nnfw-test-package.tar.gz -C $(INSTALL_PATH) $(shell ls $(INSTALL_PATH) -I lib -I include)
  
  acl_tar_internal: $(BUILD_FOLDER)
-       tar -zcf $(WORKSPACE)/nnfw-acl.tar.gz -C ${OVERLAY_FOLDER} lib
+       tar -zcf $(WORKSPACE)/nnfw-acl.tar.gz -C ${OVERLAY_FOLDER} lib/libarm_compute.so lib/libarm_compute_core.so lib/libarm_compute_graph.so
  
  install_internal_acl:
  # Workaround to install acl for test (ignore error when there is no file to copy)
-       cp $(OVERLAY_FOLDER)/lib/* $(INSTALL_ALIAS)/lib || true
+       cp $(OVERLAY_FOLDER)/lib/libarm_compute* $(INSTALL_ALIAS)/lib || true
  
  build_test_suite: install_internal install_internal_acl
         @echo "packaging test suite"
diff --git a/compiler/circle-quantizer/CMakeLists.txt b/compiler/circle-quantizer/CMakeLists.txt

index 009bfab..5075b13 100644 (file)
--- a/compiler/circle-quantizer/CMakeLists.txt
+++ b/compiler/circle-quantizer/CMakeLists.txt
@@ -1,8 +1,6 @@
-file(GLOB_RECURSE SOURCES "src/*.cpp")
+set (SOURCES src/CircleQuantizer.cpp)
  
  add_executable(circle-quantizer "${SOURCES}")
-target_include_directories(circle-quantizer PRIVATE include)
-target_include_directories(circle-quantizer PRIVATE src)
  target_link_libraries(circle-quantizer foder)
  target_link_libraries(circle-quantizer safemain)
  target_link_libraries(circle-quantizer oops)
diff --git a/compiler/circle-quantizer/include/CircleExpContract.h b/compiler/circle-quantizer/include/CircleExpContract.h

deleted file mode 100644 (file)

index e888e4a..0000000
--- a/compiler/circle-quantizer/include/CircleExpContract.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __CIRCLEQUANTIZER_CIRCLEXPCONTRACT_H__
-#define __CIRCLEQUANTIZER_CIRCLEXPCONTRACT_H__
-
-#include <loco.h>
-#include <luci/CircleExporter.h>
-#include <luci/IR/Module.h>
-
-#include <memory>
-#include <string>
-
-struct CircleExpContract : public luci::CircleExporter::Contract
-{
-public:
-  CircleExpContract(luci::Module *module, const std::string &filename)
-      : _module(module), _filepath(filename)
-  {
-    // NOTHING TO DO
-  }
-  virtual ~CircleExpContract() = default;
-
-public:
-  loco::Graph *graph(void) const final { return nullptr; }
-  luci::Module *module(void) const final { return _module; };
-
-public:
-  bool store(const char *ptr, const size_t size) const final;
-
-private:
-  luci::Module *_module;
-  const std::string _filepath;
-};
-
-#endif // __CIRCLEQUANTIZER_CIRCLEXPCONTRACT_H__
diff --git a/compiler/circle-quantizer/src/CircleQuantizer.cpp b/compiler/circle-quantizer/src/CircleQuantizer.cpp

index 8d3a80c..54b38a1 100644 (file)
--- a/compiler/circle-quantizer/src/CircleQuantizer.cpp
+++ b/compiler/circle-quantizer/src/CircleQuantizer.cpp
@@ -14,14 +14,13 @@
   * limitations under the License.
   */
  
-#include "CircleExpContract.h"
-
  #include <foder/FileLoader.h>
  
  #include <luci/Importer.h>
  #include <luci/CircleOptimizer.h>
  #include <luci/Service/Validate.h>
  #include <luci/CircleExporter.h>
+#include <luci/CircleFileExpContract.h>
  
  #include <oops/InternalExn.h>
  #include <arser/arser.h>
@@ -37,6 +36,14 @@ using OptionHook = std::function<int(const char **)>;
  using Algorithms = luci::CircleOptimizer::Options::Algorithm;
  using AlgorithmParameters = luci::CircleOptimizer::Options::AlgorithmParameters;
  
+void print_exclusive_options(void)
+{
+  std::cout << "Use only one of the 3 options below." << std::endl;
+  std::cout << "    --quantize_dequantize_weights" << std::endl;
+  std::cout << "    --quantize_with_minmax" << std::endl;
+  std::cout << "    --requantize" << std::endl;
+}
+
  void print_version(void)
  {
    std::cout << "circle-quantizer version " << vconone::get_string() << std::endl;
@@ -53,6 +60,7 @@ int entry(int argc, char **argv)
  
    const std::string qdqw = "--quantize_dequantize_weights";
    const std::string qwmm = "--quantize_with_minmax";
+  const std::string rq = "--requantize";
  
    arser::Arser arser("circle-quantizer provides circle model quantization");
  
@@ -79,6 +87,14 @@ int entry(int argc, char **argv)
              "Three arguments required: input_dtype(float32) "
              "output_dtype(uint8) granularity(layer, channel)");
  
+  arser.add_argument(rq)
+      .nargs(2)
+      .type(arser::DataType::STR_VEC)
+      .required(false)
+      .help("Requantize a quantized model. "
+            "Two arguments required: input_dtype(int8) "
+            "output_dtype(uint8)");
+
    arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
    arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
  
@@ -95,6 +111,11 @@ int entry(int argc, char **argv)
  
    if (arser[qdqw])
    {
+    if (arser[qwmm] || arser[rq])
+    {
+      print_exclusive_options();
+      return 255;
+    }
      auto values = arser.get<std::vector<std::string>>(qdqw);
      if (values.size() != 3)
      {
@@ -110,6 +131,11 @@ int entry(int argc, char **argv)
  
    if (arser[qwmm])
    {
+    if (arser[qdqw] || arser[rq])
+    {
+      print_exclusive_options();
+      return 255;
+    }
      auto values = arser.get<std::vector<std::string>>(qwmm);
      if (values.size() != 3)
      {
@@ -123,12 +149,40 @@ int entry(int argc, char **argv)
      options->param(AlgorithmParameters::Quantize_granularity, values.at(2));
    }
  
+  if (arser[rq])
+  {
+    if (arser[qwmm] || arser[qdqw])
+    {
+      print_exclusive_options();
+      return 255;
+    }
+    auto values = arser.get<std::vector<std::string>>(rq);
+    if (values.size() != 2)
+    {
+      std::cerr << arser;
+      return 255;
+    }
+    options->enable(Algorithms::Requantize);
+
+    options->param(AlgorithmParameters::Quantize_input_dtype, values.at(0));
+    options->param(AlgorithmParameters::Quantize_output_dtype, values.at(1));
+  }
+
    std::string input_path = arser.get<std::string>("input");
    std::string output_path = arser.get<std::string>("output");
  
    // Load model from the file
    foder::FileLoader file_loader{input_path};
    std::vector<char> model_data = file_loader.load();
+
+  // Verify flatbuffers
+  flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()};
+  if (!circle::VerifyModelBuffer(verifier))
+  {
+    std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl;
+    return EXIT_FAILURE;
+  }
+
    const circle::Model *circle_model = circle::GetModel(model_data.data());
    if (circle_model == nullptr)
    {
@@ -157,7 +211,7 @@ int entry(int argc, char **argv)
    // Export to output Circle file
    luci::CircleExporter exporter;
  
-  CircleExpContract contract(module.get(), output_path);
+  luci::CircleFileExpContract contract(module.get(), output_path);
  
    if (!exporter.invoke(&contract))
    {
diff --git a/compiler/circle2circle-dredd-recipe-test/test.lst b/compiler/circle2circle-dredd-recipe-test/test.lst

index 6328a64..302c3a7 100644 (file)
--- a/compiler/circle2circle-dredd-recipe-test/test.lst
+++ b/compiler/circle2circle-dredd-recipe-test/test.lst
@@ -10,6 +10,7 @@
  
  ## TFLITE RECIPE
  
+Add(Net_TConv_BN_000 PASS fuse_batchnorm_with_tconv)
  Add(Net_InstanceNorm_001 PASS fuse_instnorm)
  Add(Net_InstanceNorm_002 PASS fuse_instnorm)
  Add(BatchMatMulV2_000 PASS resolve_customop_batchmatmul)
diff --git a/compiler/circle2circle/include/CircleExpContract.h b/compiler/circle2circle/include/CircleExpContract.h

deleted file mode 100644 (file)

index 313b16d..0000000
--- a/compiler/circle2circle/include/CircleExpContract.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __CIRCLE2CIRCLE_CIRCLEXPCONTRACT_H__
-#define __CIRCLE2CIRCLE_CIRCLEXPCONTRACT_H__
-
-#include <loco.h>
-#include <luci/CircleExporter.h>
-#include <luci/IR/Module.h>
-#include <mio/circle/schema_generated.h>
-
-#include <memory>
-#include <string>
-
-struct CircleExpContract : public luci::CircleExporter::Contract
-{
-public:
-  CircleExpContract(luci::Module *module, const std::string &filename)
-      : _module(module), _filepath(filename)
-  {
-    // NOTHING TO DO
-  }
-  virtual ~CircleExpContract() = default;
-
-public:
-  loco::Graph *graph(void) const final { return nullptr; }
-  luci::Module *module(void) const final { return _module; };
-
-public:
-  bool store(const char *ptr, const size_t size) const final;
-
-private:
-  luci::Module *_module;
-  const std::string _filepath;
-};
-
-#endif // __CIRCLE2CIRCLE_CIRCLEXPCONTRACT_H__
diff --git a/compiler/circle2circle/src/Circle2Circle.cpp b/compiler/circle2circle/src/Circle2Circle.cpp

index 849597b..39ceade 100644 (file)
--- a/compiler/circle2circle/src/Circle2Circle.cpp
+++ b/compiler/circle2circle/src/Circle2Circle.cpp
@@ -14,14 +14,13 @@
   * limitations under the License.
   */
  
-#include "CircleExpContract.h"
-
  #include <foder/FileLoader.h>
  
  #include <luci/Importer.h>
  #include <luci/CircleOptimizer.h>
  #include <luci/Service/Validate.h>
  #include <luci/CircleExporter.h>
+#include <luci/CircleFileExpContract.h>
  #include <luci/UserSettings.h>
  
  #include <oops/InternalExn.h>
@@ -61,6 +60,12 @@ int entry(int argc, char **argv)
    arser.add_argument("--all").nargs(0).required(false).default_value(false).help(
        "Enable all optimize options");
  
+  arser.add_argument("--fuse_batchnorm_with_tconv")
+      .nargs(0)
+      .required(false)
+      .default_value(false)
+      .help("This will fuse BatchNorm operators to Transposed Convolution operator");
+
    arser.add_argument("--fuse_bcq")
        .nargs(0)
        .required(false)
@@ -101,7 +106,7 @@ int entry(int argc, char **argv)
        .nargs(0)
        .required(false)
        .default_value(false)
-      .help("This will turn off operator vaidations. May help input model investigation.");
+      .help("This will turn off operator validations. May help input model investigation.");
  
    arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
    arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
@@ -125,6 +130,8 @@ int entry(int argc, char **argv)
      options->enable(Algorithms::ResolveCustomOpBatchMatMul);
      options->enable(Algorithms::ResolveCustomOpMatMul);
    }
+  if (arser.get<bool>("--fuse_batchnorm_with_tconv"))
+    options->enable(Algorithms::FuseBatchNormWithTConv);
    if (arser.get<bool>("--fuse_bcq"))
      options->enable(Algorithms::FuseBCQ);
    if (arser.get<bool>("--fuse_instnorm"))
@@ -157,6 +164,14 @@ int entry(int argc, char **argv)
      std::cerr << err.what() << std::endl;
      return EXIT_FAILURE;
    }
+
+  flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()};
+  if (!circle::VerifyModelBuffer(verifier))
+  {
+    std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl;
+    return EXIT_FAILURE;
+  }
+
    const circle::Model *circle_model = circle::GetModel(model_data.data());
    if (circle_model == nullptr)
    {
@@ -177,15 +192,20 @@ int entry(int argc, char **argv)
  
      if (!luci::validate(graph))
      {
-      std::cerr << "ERROR: Optimized graph is invalid" << std::endl;
-      return 255;
+      if (settings->get(luci::UserSettings::Key::DisableValidation))
+        std::cerr << "WARNING: Optimized graph is invalid" << std::endl;
+      else
+      {
+        std::cerr << "ERROR: Optimized graph is invalid" << std::endl;
+        return 255;
+      }
      }
    }
  
    // Export to output Circle file
    luci::CircleExporter exporter;
  
-  CircleExpContract contract(module.get(), output_path);
+  luci::CircleFileExpContract contract(module.get(), output_path);
  
    if (!exporter.invoke(&contract))
    {
diff --git a/compiler/circlechef/circle/CMakeLists.txt b/compiler/circlechef/circle/CMakeLists.txt

index 75165ad..2ca016b 100644 (file)
--- a/compiler/circlechef/circle/CMakeLists.txt
+++ b/compiler/circlechef/circle/CMakeLists.txt
@@ -7,3 +7,4 @@ target_link_libraries(circlechef_circle circlechef_proto)
  target_link_libraries(circlechef_circle mio_circle)
  target_link_libraries(circlechef_circle stdex)
  target_link_libraries(circlechef_circle cwrap)
+target_link_libraries(circlechef_circle souschef)
diff --git a/compiler/circlechef/circle/src/CircleImport.h b/compiler/circlechef/circle/src/CircleImport.h

index a8ef3ee..23ca29b 100644 (file)
--- a/compiler/circlechef/circle/src/CircleImport.h
+++ b/compiler/circlechef/circle/src/CircleImport.h
@@ -19,6 +19,8 @@
  
  #include <mio/circle/schema_generated.h>
  
+#include <souschef/TensorFiller.h>
+
  #include <circlechef.pb.h>
  
  #include <map>
@@ -40,7 +42,7 @@ bool is_custom(const circle::OperatorCode *opcode);
  /**
   * @brief Loads TF lite file and provides helpers to access attributes
   */
-class CircleImport
+class CircleImport : public souschef::TensorFiller
  {
  public:
    CircleImport(const circle::Model *model);
@@ -63,63 +65,6 @@ public:
    std::string opcode_name(const circle::Operator *op) const;
    size_t buffer_info(const circle::Tensor *tensor, const uint8_t **buff_data);
  
-  /**
-   * @brief This will record the tensor by index, if it needs filler option,
-   *        such as kernel, bias.
-   */
-  void set_tensor_filler(uint32_t tensor_index) { _tensor_filler[tensor_index] = true; }
-
-  /**
-   * @brief This will store int32 filler values such as reshape information for the tensor
-   */
-  void set_tensor_filler(uint32_t tensor_index, std::vector<int32_t> &expvalues)
-  {
-    _tensor_filler_vint32[tensor_index] = expvalues;
-  }
-
-  void set_tensor_filler(uint32_t tensor_index, std::vector<float> &expvalues)
-  {
-    _tensor_filler_vfloat[tensor_index] = expvalues;
-  }
-
-  /**
-   * @brief This will return true if the tensor by index, needs a filler option.
-   */
-  bool get_tensor_filler(uint32_t tensor_index)
-  {
-    auto it = _tensor_filler.find(tensor_index);
-    if (it != _tensor_filler.end())
-    {
-      return it->second;
-    }
-    return false;
-  }
-
-  /**
-   * @brief This will return true if the tensor by index, needs a int array filler option.
-   */
-  bool get_tensor_filler(uint32_t tensor_index, std::vector<int32_t> &expvalues)
-  {
-    auto it = _tensor_filler_vint32.find(tensor_index);
-    if (it != _tensor_filler_vint32.end())
-    {
-      expvalues = it->second;
-      return true;
-    }
-    return false;
-  }
-
-  bool get_tensor_filler(uint32_t tensor_index, std::vector<float> &expvalues)
-  {
-    auto it = _tensor_filler_vfloat.find(tensor_index);
-    if (it != _tensor_filler_vfloat.end())
-    {
-      expvalues = it->second;
-      return true;
-    }
-    return false;
-  }
-
  private:
    const CircleSubGraphs_t *_subgraphs{nullptr};
    const CircleBuffers_t *_buffers{nullptr};
@@ -129,10 +74,6 @@ private:
    std::vector<const circle::OperatorCode *> _op_codes{};
    std::vector<int32_t> _inputs{};
    std::vector<int32_t> _outputs{};
-
-  std::map<uint32_t, bool> _tensor_filler{};
-  std::map<uint32_t, std::vector<int32_t>> _tensor_filler_vint32{};
-  std::map<uint32_t, std::vector<float>> _tensor_filler_vfloat{};
  };
  
  } // namespace circlechef
diff --git a/compiler/circlechef/core/src/ModelChef.cpp b/compiler/circlechef/core/src/ModelChef.cpp

index d81467d..aa54678 100644 (file)
--- a/compiler/circlechef/core/src/ModelChef.cpp
+++ b/compiler/circlechef/core/src/ModelChef.cpp
@@ -26,6 +26,7 @@
  #include "OpChefs.h"
  
  #include <souschef/Dataset.h>
+#include <souschef/Dims.h>
  
  #include "Log.h"
  
@@ -41,52 +42,8 @@
  #include <sstream>
  #include <stdexcept>
  
-namespace
-{
-
  using namespace souschef;
  
-template <typename T> std::vector<T> as_vector(const ::google::protobuf::RepeatedPtrField<T> &field)
-{
-  std::vector<T> res;
-  for (const auto &elem : field)
-  {
-    res.emplace_back(elem);
-  }
-  return res;
-}
-
-template <typename T> Dataset<T> as_dataset(const ::google::protobuf::RepeatedPtrField<T> &field)
-{
-  return Dataset<T>(as_vector<T>(field));
-}
-
-} // namespace
-
-namespace
-{
-
-template <typename T> using Dims = std::vector<T>;
-
-Dims<int32_t> as_dims(const circlechef::TensorShape &shape)
-{
-  std::vector<int32_t> res;
-
-  for (auto &dim : shape.dim())
-  {
-    res.emplace_back(static_cast<int32_t>(dim));
-  }
-
-  return res;
-}
-
-int32_t element_count(const Dims<int32_t> &dims)
-{
-  return std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<int32_t>());
-}
-
-} // namespace
-
  namespace
  {
  
diff --git a/compiler/circledump/src/OpPrinter.cpp b/compiler/circledump/src/OpPrinter.cpp

index 3294bb2..a0a063e 100644 (file)
--- a/compiler/circledump/src/OpPrinter.cpp
+++ b/compiler/circledump/src/OpPrinter.cpp
@@ -725,6 +725,7 @@ OpPrinterRegistry::OpPrinterRegistry()
    _op_map[circle::BuiltinOperator_MIRROR_PAD] = make_unique<MirrorPadPrinter>();
    _op_map[circle::BuiltinOperator_MUL] = make_unique<MulPrinter>();
    // There is no Option for NON_MAX_SUPPRESSION_V4
+  // There is no Option for NON_MAX_SUPPRESSION_V5
    _op_map[circle::BuiltinOperator_ONE_HOT] = make_unique<OneHotPrinter>();
    _op_map[circle::BuiltinOperator_PACK] = make_unique<PackPrinter>();
    // There is no Option for PAD
diff --git a/compiler/common-artifacts/CMakeLists.txt b/compiler/common-artifacts/CMakeLists.txt

index ef50e8d..ec9e3cf 100644 (file)
--- a/compiler/common-artifacts/CMakeLists.txt
+++ b/compiler/common-artifacts/CMakeLists.txt
@@ -33,10 +33,12 @@ set(REQUIREMENTS_FILE "requirements.txt")
  set(REQUIREMENTS_OVERLAY_PATH_TF_1_13_2 "${VIRTUALENV_OVERLAY_TF_1_13_2}/${REQUIREMENTS_FILE}")
  set(REQUIREMENTS_OVERLAY_PATH_TF_2_3_0 "${VIRTUALENV_OVERLAY_TF_2_3_0}/${REQUIREMENTS_FILE}")
  
+# TODO remove version number of '--upgrade pip==20.2.1 setuptools==49.3.0'
+# NOTE adding version is for temporary hotfix of setuptools 50.x.y version
  add_custom_command(
    OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2}
    COMMAND ${CMAKE_COMMAND} -E echo "tensorflow==1.13.2" > ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2}
-  COMMAND ${VIRTUALENV_OVERLAY_TF_1_13_2}/bin/python -m pip --default-timeout=1000 install --upgrade pip setuptools
+  COMMAND ${VIRTUALENV_OVERLAY_TF_1_13_2}/bin/python -m pip --default-timeout=1000 install --upgrade pip==20.2.1 setuptools==49.3.0
    COMMAND ${VIRTUALENV_OVERLAY_TF_1_13_2}/bin/python -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2} --upgrade
    DEPENDS ${VIRTUALENV_OVERLAY_TF_1_13_2}
  )
@@ -46,7 +48,7 @@ add_custom_command(
    COMMAND ${CMAKE_COMMAND} -E remove -f ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
    COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.3.0" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
    COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==1.12" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
-  COMMAND ${VIRTUALENV_OVERLAY_TF_2_3_0}/bin/python -m pip --default-timeout=1000 install --upgrade pip setuptools
+  COMMAND ${VIRTUALENV_OVERLAY_TF_2_3_0}/bin/python -m pip --default-timeout=1000 install --upgrade pip==20.2.1 setuptools==49.3.0
    COMMAND ${VIRTUALENV_OVERLAY_TF_2_3_0}/bin/python -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0} --upgrade
    DEPENDS ${VIRTUALENV_OVERLAY_TF_2_3_0}
  )
@@ -233,10 +235,10 @@ foreach(RECIPE IN ITEMS ${RECIPES})
  
    set(INPUT_HDF5_FILE "${RECIPE}${OPT_FORMAT}.input.h5")
    set(INPUT_BIN_PATH "${CMAKE_CURRENT_BINARY_DIR}/${INPUT_HDF5_FILE}")
-  
+
    set(EXPECTED_HDF5_FILE "${RECIPE}${OPT_FORMAT}.expected.h5")
    set(EXPECTED_BIN_PATH "${CMAKE_CURRENT_BINARY_DIR}/${EXPECTED_HDF5_FILE}")
-  
+
    if(NOT DEFINED NO_TCGEN_${RECIPE})
      # Generate input.h5, expected.h5
      add_custom_command(OUTPUT ${INPUT_BIN_PATH} ${EXPECTED_BIN_PATH}
@@ -244,7 +246,7 @@ foreach(RECIPE IN ITEMS ${RECIPES})
        DEPENDS $<TARGET_FILE:testDataGenerator> ${MODEL_FILE}
        COMMENT "Generate ${INPUT_BIN_PATH} and ${EXPECTED_BIN_PATH}"
      )
-    
+
      # Generate test directory
      set(TC_DIRECTORY "${NNPKG_PATH}/metadata/tc")
      add_custom_command(OUTPUT ${TC_DIRECTORY}
@@ -252,7 +254,7 @@ foreach(RECIPE IN ITEMS ${RECIPES})
        DEPENDS ${NNPKG_PATH}
        COMMENT "Generate ${RECIPE} nnpackage test directory"
      )
-    
+
      # Move input hdf5 file to test directory
      set(INPUT_NNPKG_PATH "${TC_DIRECTORY}/input.h5")
      add_custom_command(OUTPUT ${INPUT_NNPKG_PATH}
@@ -260,7 +262,7 @@ foreach(RECIPE IN ITEMS ${RECIPES})
        DEPENDS ${INPUT_BIN_PATH} ${TC_DIRECTORY}
        COMMENT "Move ${INPUT_HDF5_FILE} to nnpackage"
      )
-    
+
      # Move expected hdf5 file to test directory
      set(EXPECTED_NNPKG_PATH "${TC_DIRECTORY}/expected.h5")
      add_custom_command(OUTPUT ${EXPECTED_NNPKG_PATH}
diff --git a/compiler/common-artifacts/exclude.lst b/compiler/common-artifacts/exclude.lst

index fe9933a..886f607 100644 (file)
--- a/compiler/common-artifacts/exclude.lst
+++ b/compiler/common-artifacts/exclude.lst
@@ -96,6 +96,8 @@ tcgenerate(Mean_U8_000)
  tcgenerate(Minimum_000)
  tcgenerate(NonMaxSuppressionV4_000)
  tcgenerate(NonMaxSuppressionV4_001)
+tcgenerate(NonMaxSuppressionV5_000)
+tcgenerate(NonMaxSuppressionV5_001)
  tcgenerate(MirrorPad_000)
  tcgenerate(Mul_U8_000)
  tcgenerate(Neg_000)
diff --git a/compiler/locomotiv/src/Node/BiasAdd.cpp b/compiler/locomotiv/src/Node/BiasAdd.cpp

index dfe32ca..b84fa7e 100644 (file)
--- a/compiler/locomotiv/src/Node/BiasAdd.cpp
+++ b/compiler/locomotiv/src/Node/BiasAdd.cpp
@@ -41,10 +41,12 @@ std::unique_ptr<NodeData> calc(const NodeData *input_data, const NodeData *bias_
  
  } // namespace
  
-namespace locomotiv
+namespace
  {
  
-void NodeExecution::execute(loco::BiasAdd<loco::Domain::Tensor> *bias_add)
+using namespace locomotiv;
+
+void execute_node(loco::BiasAdd<loco::Domain::Tensor> *bias_add)
  {
    validate(bias_add, "BiasAdd is nullptr");
  
@@ -63,7 +65,7 @@ void NodeExecution::execute(loco::BiasAdd<loco::Domain::Tensor> *bias_add)
    annot_domain(bias_add, annot_domain(bias_add->value()));
  }
  
-void NodeExecution::execute(loco::BiasAdd<loco::Domain::Feature> *bias_add)
+void execute_node(loco::BiasAdd<loco::Domain::Feature> *bias_add)
  {
    validate(bias_add, "BiasAdd is nullptr");
  
@@ -82,7 +84,7 @@ void NodeExecution::execute(loco::BiasAdd<loco::Domain::Feature> *bias_add)
    annot_domain(bias_add, loco::Domain::Feature);
  }
  
-} // namespace locomotiv
+} // namespace
  
  namespace
  {
@@ -123,3 +125,18 @@ std::unique_ptr<NodeData> calc(const NodeData *input_data, const NodeData *bias_
  }
  
  } // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::BiasAdd<loco::Domain::Tensor> *bias_add)
+{
+  execute_node(bias_add);
+}
+
+void NodeExecution::execute(loco::BiasAdd<loco::Domain::Feature> *bias_add)
+{
+  execute_node(bias_add);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/BiasEncode.cpp b/compiler/locomotiv/src/Node/BiasEncode.cpp

index c2f2b44..21f00a4 100644 (file)
--- a/compiler/locomotiv/src/Node/BiasEncode.cpp
+++ b/compiler/locomotiv/src/Node/BiasEncode.cpp
@@ -23,10 +23,12 @@
  #include <stdexcept>
  #include <cassert>
  
-namespace locomotiv
+namespace
  {
  
-void NodeExecution::execute(loco::BiasEncode *bias_enc)
+using namespace locomotiv;
+
+void execute_node(loco::BiasEncode *bias_enc)
  {
    auto input_data = annot_data(bias_enc->input());
  
@@ -60,4 +62,11 @@ void NodeExecution::execute(loco::BiasEncode *bias_enc)
    annot_domain(bias_enc, loco::Domain::Bias);
  }
  
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::BiasEncode *bias_enc) { execute_node(bias_enc); }
+
  } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/ConstGen.cpp b/compiler/locomotiv/src/Node/ConstGen.cpp

index 0360b9f..96ffbc2 100644 (file)
--- a/compiler/locomotiv/src/Node/ConstGen.cpp
+++ b/compiler/locomotiv/src/Node/ConstGen.cpp
@@ -53,10 +53,12 @@ inline uint32_t offset_by_index(const Shape &shape, const Index &index)
  
  } // namespace
  
-namespace locomotiv
+namespace
  {
  
-void NodeExecution::execute(loco::ConstGen *constgen)
+using namespace locomotiv;
+
+void execute_node(loco::ConstGen *constgen)
  {
    uint32_t volume = 1;
  
@@ -113,4 +115,11 @@ void NodeExecution::execute(loco::ConstGen *constgen)
    annot_domain(constgen, loco::Domain::Tensor);
  }
  
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::ConstGen *constgen) { execute_node(constgen); }
+
  } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/Conv2D.cpp b/compiler/locomotiv/src/Node/Conv2D.cpp

index 2e41855..cdf0dfd 100644 (file)
--- a/compiler/locomotiv/src/Node/Conv2D.cpp
+++ b/compiler/locomotiv/src/Node/Conv2D.cpp
@@ -139,10 +139,12 @@ Buffer<RET_T> calc_conv2D(const loco::Conv2D *conv2d, const Buffer<IFM_T> *input
  
  } // namespace
  
-namespace locomotiv
+namespace
  {
  
-void NodeExecution::execute(loco::Conv2D *conv2d)
+using namespace locomotiv;
+
+void execute_node(loco::Conv2D *conv2d)
  {
    auto ifm_data = annot_data(conv2d->ifm());
    auto ker_data = annot_data(conv2d->ker());
@@ -176,4 +178,11 @@ void NodeExecution::execute(loco::Conv2D *conv2d)
    annot_domain(conv2d, loco::Domain::Feature);
  }
  
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Conv2D *conv2d) { execute_node(conv2d); }
+
  } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp b/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp

index 92d5aa1..f39cd17 100644 (file)
--- a/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp
+++ b/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp
@@ -143,10 +143,12 @@ Buffer<RET_T> calc_dw_conv2d(const loco::DepthwiseConv2D *dw_conv2d, const Buffe
  
  } // namespace
  
-namespace locomotiv
+namespace
  {
  
-void NodeExecution::execute(loco::DepthwiseConv2D *dw_conv2d)
+using namespace locomotiv;
+
+void execute_node(loco::DepthwiseConv2D *dw_conv2d)
  {
    auto ifm_data = annot_data(dw_conv2d->ifm());
    auto ker_data = annot_data(dw_conv2d->ker());
@@ -182,4 +184,11 @@ void NodeExecution::execute(loco::DepthwiseConv2D *dw_conv2d)
    annot_domain(dw_conv2d, loco::Domain::Feature);
  }
  
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::DepthwiseConv2D *dw_conv2d) { execute_node(dw_conv2d); }
+
  } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp b/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp

index 1700490..03f5bf8 100644 (file)
--- a/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp
+++ b/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp
@@ -79,10 +79,12 @@ std::unique_ptr<locomotiv::NodeData> dw_filter_encode(const loco::DepthwiseFilte
  
  } // namespace
  
-namespace locomotiv
+namespace
  {
  
-void NodeExecution::execute(loco::DepthwiseFilterEncode *enc)
+using namespace locomotiv;
+
+void execute_node(loco::DepthwiseFilterEncode *enc)
  {
    auto input_data = annot_data(enc->input());
  
@@ -110,4 +112,11 @@ void NodeExecution::execute(loco::DepthwiseFilterEncode *enc)
    annot_domain(enc, loco::Domain::DepthwiseFilter);
  }
  
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::DepthwiseFilterEncode *enc) { execute_node(enc); }
+
  } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/FeatureDecode.cpp b/compiler/locomotiv/src/Node/FeatureDecode.cpp

index 8a56a56..8776e1b 100644 (file)
--- a/compiler/locomotiv/src/Node/FeatureDecode.cpp
+++ b/compiler/locomotiv/src/Node/FeatureDecode.cpp
@@ -72,10 +72,12 @@ std::unique_ptr<locomotiv::NodeData> feature_decode(const loco::FeatureDecode *n
  
  } // namespace
  
-namespace locomotiv
+namespace
  {
  
-void NodeExecution::execute(loco::FeatureDecode *dec)
+using namespace locomotiv;
+
+void execute_node(loco::FeatureDecode *dec)
  {
    auto input_data = annot_data(dec->input());
  
@@ -109,4 +111,11 @@ void NodeExecution::execute(loco::FeatureDecode *dec)
    annot_domain(dec, loco::Domain::Tensor);
  }
  
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::FeatureDecode *dec) { execute_node(dec); }
+
  } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/FilterEncode.cpp b/compiler/locomotiv/src/Node/FilterEncode.cpp

index cd9d708..0e2ac91 100644 (file)
--- a/compiler/locomotiv/src/Node/FilterEncode.cpp
+++ b/compiler/locomotiv/src/Node/FilterEncode.cpp
@@ -74,10 +74,12 @@ std::unique_ptr<locomotiv::NodeData> filter_encode(const loco::FilterEncode *nod
  
  } // namespace
  
-namespace locomotiv
+namespace
  {
  
-void NodeExecution::execute(loco::FilterEncode *enc)
+using namespace locomotiv;
+
+void execute_node(loco::FilterEncode *enc)
  {
    auto input_data = annot_data(enc->input());
  
@@ -111,4 +113,11 @@ void NodeExecution::execute(loco::FilterEncode *enc)
    annot_domain(enc, loco::Domain::Filter);
  }
  
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::FilterEncode *enc) { execute_node(enc); }
+
  } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/Forward.cpp b/compiler/locomotiv/src/Node/Forward.cpp

index eb7d44a..9095ecf 100644 (file)
--- a/compiler/locomotiv/src/Node/Forward.cpp
+++ b/compiler/locomotiv/src/Node/Forward.cpp
@@ -23,10 +23,12 @@
  #include <stdexcept>
  #include <cassert>
  
-namespace locomotiv
+namespace
  {
  
-void NodeExecution::execute(loco::Forward *forward)
+using namespace locomotiv;
+
+void execute_node(loco::Forward *forward)
  {
    auto input_data = annot_data(forward->input());
  
@@ -59,4 +61,11 @@ void NodeExecution::execute(loco::Forward *forward)
    annot_domain(forward, annot_domain(forward->input()));
  }
  
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Forward *forward) { execute_node(forward); }
+
  } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/MatMul.cpp b/compiler/locomotiv/src/Node/MatMul.cpp

index 77b7315..e5d149a 100644 (file)
--- a/compiler/locomotiv/src/Node/MatMul.cpp
+++ b/compiler/locomotiv/src/Node/MatMul.cpp
@@ -82,10 +82,12 @@ template <typename T> Buffer<T> calc_mat_mul(const Buffer<T> *lhs_buf, const Buf
  
  } // namespace
  
-namespace locomotiv
+namespace
  {
  
-void NodeExecution::execute(loco::MatMul *mat_mul)
+using namespace locomotiv;
+
+void execute_node(loco::MatMul *mat_mul)
  {
    auto lhs_data = annot_data(mat_mul->lhs());
    auto rhs_data = annot_data(mat_mul->rhs());
@@ -130,4 +132,11 @@ void NodeExecution::execute(loco::MatMul *mat_mul)
    annot_domain(mat_mul, loco::Domain::Matrix);
  }
  
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::MatMul *mat_mul) { execute_node(mat_mul); }
+
  } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/MatrixDecode.cpp b/compiler/locomotiv/src/Node/MatrixDecode.cpp

index c591676..0310015 100644 (file)
--- a/compiler/locomotiv/src/Node/MatrixDecode.cpp
+++ b/compiler/locomotiv/src/Node/MatrixDecode.cpp
@@ -68,10 +68,12 @@ std::unique_ptr<locomotiv::NodeData> matrix_decode(const loco::MatrixDecode *nod
  
  } // namespace
  
-namespace locomotiv
+namespace
  {
  
-void NodeExecution::execute(loco::MatrixDecode *matrix_dec)
+using namespace locomotiv;
+
+void execute_node(loco::MatrixDecode *matrix_dec)
  {
    auto input_data = annot_data(matrix_dec->input());
  
@@ -106,4 +108,11 @@ void NodeExecution::execute(loco::MatrixDecode *matrix_dec)
    annot_domain(matrix_dec, loco::Domain::Tensor);
  }
  
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::MatrixDecode *matrix_dec) { execute_node(matrix_dec); }
+
  } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/MaxPool2D.cpp b/compiler/locomotiv/src/Node/MaxPool2D.cpp

index 5d92f89..8dce1cb 100644 (file)
--- a/compiler/locomotiv/src/Node/MaxPool2D.cpp
+++ b/compiler/locomotiv/src/Node/MaxPool2D.cpp
@@ -129,10 +129,12 @@ nncc::core::ADT::tensor::Buffer<T> maxPool2D(const loco::MaxPool2D *maxpool2d,
  
  } // namespace
  
-namespace locomotiv
+namespace
  {
  
-void NodeExecution::execute(loco::MaxPool2D *maxpool2d)
+using namespace locomotiv;
+
+void execute_node(loco::MaxPool2D *maxpool2d)
  {
    auto ifm_data = annot_data(maxpool2d->ifm());
  
@@ -164,4 +166,11 @@ void NodeExecution::execute(loco::MaxPool2D *maxpool2d)
    annot_domain(maxpool2d, loco::Domain::Feature);
  }
  
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::MaxPool2D *maxpool2d) { execute_node(maxpool2d); }
+
  } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/Pull.cpp b/compiler/locomotiv/src/Node/Pull.cpp

index c482d8b..fe5d7c2 100644 (file)
--- a/compiler/locomotiv/src/Node/Pull.cpp
+++ b/compiler/locomotiv/src/Node/Pull.cpp
@@ -24,10 +24,12 @@
  #include <cassert>
  #include <stdexcept>
  
-namespace locomotiv
+namespace
  {
  
-void NodeExecution::execute(loco::Pull *pull)
+using namespace locomotiv;
+
+void execute_node(loco::Pull *pull)
  {
  // TODO Remove deprecated code
  #if 0
@@ -69,4 +71,11 @@ void NodeExecution::execute(loco::Pull *pull)
    annot_domain(pull, loco::Domain::Tensor);
  }
  
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Pull *pull) { execute_node(pull); }
+
  } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/Push.cpp b/compiler/locomotiv/src/Node/Push.cpp

index fc5808b..4e1c6c3 100644 (file)
--- a/compiler/locomotiv/src/Node/Push.cpp
+++ b/compiler/locomotiv/src/Node/Push.cpp
@@ -23,10 +23,12 @@
  #include <stdexcept>
  #include <cassert>
  
-namespace locomotiv
+namespace
  {
  
-void NodeExecution::execute(loco::Push *push)
+using namespace locomotiv;
+
+void execute_node(loco::Push *push)
  {
    auto from_data = annot_data(push->from());
  
@@ -58,4 +60,11 @@ void NodeExecution::execute(loco::Push *push)
    annot_domain(push, loco::Domain::Tensor);
  }
  
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Push *push) { execute_node(push); }
+
  } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/Reshape.cpp b/compiler/locomotiv/src/Node/Reshape.cpp

index ac16720..a9c07be 100644 (file)
--- a/compiler/locomotiv/src/Node/Reshape.cpp
+++ b/compiler/locomotiv/src/Node/Reshape.cpp
@@ -36,10 +36,12 @@ using nncc::core::ADT::tensor::num_elements;
  #include <cstring>
  #include <vector>
  
-namespace locomotiv
+namespace
  {
  
-void NodeExecution::execute(loco::Reshape<loco::ReshapeType::Fixed> *reshape)
+using namespace locomotiv;
+
+void execute_node(loco::Reshape<loco::ReshapeType::Fixed> *reshape)
  {
    auto input_data = annot_data(reshape->input());
  
@@ -87,4 +89,14 @@ void NodeExecution::execute(loco::Reshape<loco::ReshapeType::Fixed> *reshape)
    annot_domain(reshape, annot_domain(reshape->input()));
  }
  
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Reshape<loco::ReshapeType::Fixed> *reshape)
+{
+  execute_node(reshape);
+}
+
  } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/Softmax.cpp b/compiler/locomotiv/src/Node/Softmax.cpp

index 352598b..0018eb6 100644 (file)
--- a/compiler/locomotiv/src/Node/Softmax.cpp
+++ b/compiler/locomotiv/src/Node/Softmax.cpp
@@ -65,10 +65,12 @@ Shape reduce_shape(const Shape &shape, uint32_t axis)
  
  } // namespace
  
-namespace locomotiv
+namespace
  {
  
-void NodeExecution::execute(loco::TensorSoftmax *softmax)
+using namespace locomotiv;
+
+void execute_node(loco::TensorSoftmax *softmax)
  {
    auto input_data = annot_data(softmax->input());
  
@@ -119,4 +121,11 @@ void NodeExecution::execute(loco::TensorSoftmax *softmax)
    annot_domain(softmax, annot_domain(softmax->input()));
  }
  
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TensorSoftmax *softmax) { execute_node(softmax); }
+
  } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/TensorBroadcast.cpp b/compiler/locomotiv/src/Node/TensorBroadcast.cpp

index 010ca68..38e5a7a 100644 (file)
--- a/compiler/locomotiv/src/Node/TensorBroadcast.cpp
+++ b/compiler/locomotiv/src/Node/TensorBroadcast.cpp
@@ -34,10 +34,12 @@ using nncc::core::ADT::tensor::Shape;
  #include <cassert>
  #include <stdexcept>
  
-namespace locomotiv
+namespace
  {
  
-void NodeExecution::execute(loco::TensorBroadcast *tensor_broadcast)
+using namespace locomotiv;
+
+void execute_node(loco::TensorBroadcast *tensor_broadcast)
  {
    auto input_data = annot_data(tensor_broadcast->input());
  
@@ -103,4 +105,14 @@ void NodeExecution::execute(loco::TensorBroadcast *tensor_broadcast)
    annot_domain(tensor_broadcast, loco::Domain::Tensor);
  }
  
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TensorBroadcast *tensor_broadcast)
+{
+  execute_node(tensor_broadcast);
+}
+
  } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/TensorConcat.cpp b/compiler/locomotiv/src/Node/TensorConcat.cpp

index 3187a7f..188bb63 100644 (file)
--- a/compiler/locomotiv/src/Node/TensorConcat.cpp
+++ b/compiler/locomotiv/src/Node/TensorConcat.cpp
@@ -35,10 +35,12 @@ using nncc::core::ADT::tensor::Shape;
  #include <cassert>
  #include <stdexcept>
  
-namespace locomotiv
+namespace
  {
  
-void NodeExecution::execute(loco::TensorConcat *tensor_concat)
+using namespace locomotiv;
+
+void execute_node(loco::TensorConcat *tensor_concat)
  {
    validate(tensor_concat, "TensorConcat is nullptr");
  
@@ -112,4 +114,11 @@ void NodeExecution::execute(loco::TensorConcat *tensor_concat)
    annot_domain(tensor_concat, loco::Domain::Tensor);
  }
  
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TensorConcat *tensor_concat) { execute_node(tensor_concat); }
+
  } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/TensorConstantPad.cpp b/compiler/locomotiv/src/Node/TensorConstantPad.cpp

index cd81a3a..5d4ad5d 100644 (file)
--- a/compiler/locomotiv/src/Node/TensorConstantPad.cpp
+++ b/compiler/locomotiv/src/Node/TensorConstantPad.cpp
@@ -31,10 +31,12 @@ using nncc::core::ADT::tensor::IndexEnumerator;
  using nncc::core::ADT::tensor::LexicalLayout;
  using nncc::core::ADT::tensor::make_buffer;
  
-namespace locomotiv
+namespace
  {
  
-void NodeExecution::execute(loco::TensorConstantPad *pad)
+using namespace locomotiv;
+
+void execute_node(loco::TensorConstantPad *pad)
  {
    validate(pad, "TensorConstantPad is nullptr");
  
@@ -112,4 +114,11 @@ void NodeExecution::execute(loco::TensorConstantPad *pad)
    annot_domain(pad, annot_domain(pad->input()));
  }
  
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TensorConstantPad *pad) { execute_node(pad); }
+
  } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/TensorReduce.cpp b/compiler/locomotiv/src/Node/TensorReduce.cpp

index a60ebd8..1f619a3 100644 (file)
--- a/compiler/locomotiv/src/Node/TensorReduce.cpp
+++ b/compiler/locomotiv/src/Node/TensorReduce.cpp
@@ -115,10 +115,12 @@ void apply(Buffer<T> &lhs, const Buffer<T> &rhs, const loco::TensorReduce &node)
  
  } // namespace
  
-namespace locomotiv
+namespace
  {
  
-void NodeExecution::execute(loco::TensorReduce *node)
+using namespace locomotiv;
+
+void execute_node(loco::TensorReduce *node)
  {
    auto input_data = annot_data(node->input());
    validate(input_data, "Input not ready");
@@ -149,4 +151,11 @@ void NodeExecution::execute(loco::TensorReduce *node)
    annot_domain(node, annot_domain(node->input()));
  }
  
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TensorReduce *node) { execute_node(node); }
+
  } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/TransposedConv2D.cpp b/compiler/locomotiv/src/Node/TransposedConv2D.cpp

index 3ea4f07..bec15a5 100644 (file)
--- a/compiler/locomotiv/src/Node/TransposedConv2D.cpp
+++ b/compiler/locomotiv/src/Node/TransposedConv2D.cpp
@@ -147,10 +147,12 @@ Buffer<RET_T> calc_tr_conv2D(const loco::TransposedConv2D *tr_conv2d,
  
  } // namespace
  
-namespace locomotiv
+namespace
  {
  
-void NodeExecution::execute(loco::TransposedConv2D *tr_conv2d)
+using namespace locomotiv;
+
+void execute_node(loco::TransposedConv2D *tr_conv2d)
  {
    auto ifm_data = annot_data(tr_conv2d->ifm());
    auto ker_data = annot_data(tr_conv2d->ker());
@@ -186,4 +188,11 @@ void NodeExecution::execute(loco::TransposedConv2D *tr_conv2d)
    annot_domain(tr_conv2d, loco::Domain::Feature);
  }
  
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TransposedConv2D *tr_conv2d) { execute_node(tr_conv2d); }
+
  } // namespace locomotiv
diff --git a/compiler/luci-interpreter/src/CMakeLists.txt b/compiler/luci-interpreter/src/CMakeLists.txt

index 6a66f14..47b68fa 100644 (file)
--- a/compiler/luci-interpreter/src/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/CMakeLists.txt
@@ -1,6 +1,7 @@
-nnas_find_package(TensorFlowSource EXACT 2.1.0 QUIET)
-nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.1.0 QUIET)
-nnas_find_package(TensorFlowEigenSource EXACT 2.1.0 QUIET)
+nnas_find_package(TensorFlowSource EXACT 2.3.0 QUIET)
+nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.3.0 QUIET)
+nnas_find_package(TensorFlowEigenSource EXACT 2.3.0 QUIET)
+nnas_find_package(TensorFlowRuySource EXACT 2.3.0 QUIET)
  
  if (NOT TensorFlowSource_FOUND)
    message(STATUS "Skipping luci-interpreter: TensorFlow not found")
@@ -17,6 +18,11 @@ if (NOT TensorFlowEigenSource_FOUND)
    return()
  endif ()
  
+if (NOT TensorFlowRuySource_FOUND)
+  message(STATUS "Skipping luci-interpreter: Ruy not found")
+  return()
+endif ()
+
  add_subdirectory(core)
  add_subdirectory(kernels)
  add_subdirectory(loader)
diff --git a/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp b/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp

index 5ac3b2f..2ab7ff0 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp
@@ -93,6 +93,21 @@ TYPED_TEST(ArgMaxTest, MultiDimensions)
                              /*dimension_data=*/{3}, /*output_data=*/{3, 1});
  }
  
+TEST(ArgMaxTest, UnsupportedType_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4}, {
+                                                                             1, 2, 7, 8, 1, 9, 7, 3,
+                                                                         });
+  Tensor dimension_tensor = makeInputTensor<DataType::S32>({}, {3});
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  ArgMaxParams params{};
+  params.output_type = DataType::U8;
+  ArgMax kernel(&input_tensor, &dimension_tensor, &output_tensor, params);
+  kernel.configure();
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
  } // namespace
  } // namespace kernels
  } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp b/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp

index 6d1b8ea..cdd81d7 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp
@@ -35,6 +35,14 @@ AveragePool2D::AveragePool2D(const Tensor *input, Tensor *output, const Pool2DPa
  
  void AveragePool2D::configure()
  {
+  if (input()->element_type() != output()->element_type())
+  {
+    throw std::runtime_error("Input Tensor and Output Tensor Type must be same");
+  }
+  if (input()->shape().num_dims() != 4)
+  {
+    throw std::runtime_error("Input Tensor Shape must be 4-D");
+  }
    const Shape &input_shape = input()->shape();
  
    const int32_t batches = input_shape.dim(0);
@@ -51,7 +59,14 @@ void AveragePool2D::configure()
        computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height);
    _padding_width =
        computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width);
-
+  if (input()->element_type() == DataType::U8)
+  {
+    if (input()->scale() != output()->scale() || input()->zero_point() != output()->zero_point())
+    {
+      throw std::runtime_error(
+          "Quantization param for Input and output must be same(scale or zero-point)");
+    }
+  }
    output()->resize({batches, output_height, output_width, depth});
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp b/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp

index 7160e49..cc80e5e 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp
@@ -122,6 +122,80 @@ TEST(AveragePool2DTest, Uint8_1)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 1}));
  }
  
+TEST(AveragePool2DTest, Invalid_Input_Shape_NEG)
+{
+  Shape input_shape{1, 3, 5};
+  std::vector<float> input_data{
+      -4, -3, -2, -1, 0,  //
+      1,  2,  3,  4,  5,  //
+      6,  7,  8,  9,  10, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 3;
+  params.stride_height = 1;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  AveragePool2D kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(AveragePool2DTest, In_Out_Type_NEG)
+{
+  Shape input_shape{1, 3, 5, 1};
+  std::vector<float> input_data{
+      -4, -3, -2, -1, 0,  //
+      1,  2,  3,  4,  5,  //
+      6,  7,  8,  9,  10, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 3;
+  params.stride_height = 1;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  AveragePool2D kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(AveragePool2DTest, Quant_Param_NEG)
+{
+  std::pair<float, int32_t> quant_param1 = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
+  std::pair<float, int32_t> quant_param2 = quantizationParams<uint8_t>(-7.875f, 7.875f);
+  Tensor input_tensor{
+      DataType::U8, {1, 2, 4, 1}, {{quant_param1.first}, {quant_param1.second}}, ""};
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param2.first, quant_param2.second);
+
+  std::vector<uint8_t> quant_input = quantize<uint8_t>(
+      {
+          0, -6, 12, 4,  //
+          -3, -2, 10, 7, //
+      },
+      quant_param1.first, quant_param1.second);
+  input_tensor.writeData(quant_input.data(), quant_input.size() * sizeof(uint8_t));
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  AveragePool2D kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
  } // namespace
  } // namespace kernels
  } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/CMakeLists.txt b/compiler/luci-interpreter/src/kernels/CMakeLists.txt

index a1fd1de..040ac59 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/kernels/CMakeLists.txt
@@ -44,6 +44,8 @@ set(SOURCES
      Reshape.cpp
      Reverse.h
      Reverse.cpp
+    Rsqrt.h
+    Rsqrt.cpp
      Slice.h
      Slice.cpp
      Softmax.h
@@ -54,8 +56,12 @@ set(SOURCES
      Split.cpp
      StridedSlice.h
      StridedSlice.cpp
+    Sqrt.h
+    Sqrt.cpp
      Squeeze.h
      Squeeze.cpp
+    Tanh.h
+    Tanh.cpp
      Transpose.h
      Transpose.cpp
      TransposeConv.h
@@ -63,12 +69,13 @@ set(SOURCES
      Unpack.h
      Unpack.cpp)
  
-list(APPEND SOURCES Utils.h Utils.cpp)
+list(APPEND SOURCES Utils.h Utils.cpp ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
  
  add_library(luci_interpreter_kernels STATIC ${SOURCES})
  set_target_properties(luci_interpreter_kernels PROPERTIES POSITION_INDEPENDENT_CODE ON)
  target_include_directories(luci_interpreter_kernels PUBLIC ${LUCI_INTERPRETER_SOURCE_DIR})
  target_include_directories(luci_interpreter_kernels SYSTEM PRIVATE
+    "${TensorFlowRuySource_DIR}"
      "${TensorFlowGEMMLowpSource_DIR}"
      "${TensorFlowEigenSource_DIR}"
      "${TensorFlowSource_DIR}")
@@ -99,12 +106,15 @@ set(TEST_SOURCES
      Pad.test.cpp
      Reshape.test.cpp
      Reverse.test.cpp
+    Rsqrt.test.cpp
      Slice.test.cpp
      Softmax.test.cpp
      SpaceToDepth.test.cpp
      Split.test.cpp
      StridedSlice.test.cpp
+    Sqrt.test.cpp
      Squeeze.test.cpp
+    Tanh.test.cpp
      Transpose.test.cpp
      TransposeConv.test.cpp
      Unpack.test.cpp)
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.cpp

index 60e6134..a51fb4a 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Conv2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.cpp
@@ -47,21 +47,21 @@ void Conv2D::configure()
    // We only support (1) and (3) for now.
    if (input()->element_type() == DataType::FLOAT32 && filter()->element_type() == DataType::FLOAT32)
    {
-    assert(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
    }
    else if (input()->element_type() == DataType::U8 && filter()->element_type() == DataType::U8)
    {
-    assert(bias() == nullptr || bias()->element_type() == DataType::S32);
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
    }
    else
    {
      throw std::runtime_error("Unsupported type.");
    }
-  assert(output()->element_type() == input()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == input()->element_type());
  
    const Shape &input_shape = input()->shape();
    const Shape &filter_shape = filter()->shape();
-  assert(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4);
  
    const int32_t batches = input_shape.dim(0);
    const int32_t input_height = input_shape.dim(1);
@@ -69,10 +69,10 @@ void Conv2D::configure()
    const int32_t output_depth = filter_shape.dim(0);
    const int32_t filter_height = filter_shape.dim(1);
    const int32_t filter_width = filter_shape.dim(2);
-  assert(filter_shape.dim(3) == input_shape.dim(3));
+  LUCI_INTERPRETER_CHECK(filter_shape.dim(3) == input_shape.dim(3));
  
-  assert(bias() == nullptr ||
-         (bias()->shape().num_dims() == 1 && bias()->shape().dim(0) == output_depth));
+  LUCI_INTERPRETER_CHECK(bias() == nullptr || (bias()->shape().num_dims() == 1 &&
+                                               bias()->shape().dim(0) == output_depth));
  
    const int32_t output_height =
        computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp

index ef9ace9..0446d97 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp
@@ -180,6 +180,146 @@ TEST(Conv2DTest, Uint8)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
  }
  
+TEST(Conv2DTest, Unsupported_Type_Configure_NEG)
+{
+  Shape input_shape{1, 4, 3, 2};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{2};
+  std::vector<int32_t> input_data{
+      1,  2,  3,  4,  5,  6,  // row = 0
+      7,  8,  9,  10, 11, 12, // row = 1
+      13, 14, 15, 16, 17, 18, // row = 2
+      19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+      1,  2,  -3, -4, // out = 0, row = 0
+      -5, 6,  -7, 8,  // out = 1, row = 0
+      4,  -2, 3,  -1, // out = 0, row = 1
+      -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<float> bias_data{1, 2};
+  Tensor input_tensor = makeInputTensor<DataType::S32>(input_shape, input_data);
+  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
+  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(Conv2DTest, Invalid_Bias_Type_NEG)
+{
+  Shape input_shape{1, 4, 3, 2};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{2};
+  std::vector<float> input_data{
+      1,  2,  3,  4,  5,  6,  // row = 0
+      7,  8,  9,  10, 11, 12, // row = 1
+      13, 14, 15, 16, 17, 18, // row = 2
+      19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+      1,  2,  -3, -4, // out = 0, row = 0
+      -5, 6,  -7, 8,  // out = 1, row = 0
+      4,  -2, 3,  -1, // out = 0, row = 1
+      -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<uint8_t> bias_data{1, 2};
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
+  Tensor bias_tensor = makeInputTensor<DataType::U8>(bias_shape, bias_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(Conv2DTest, Invalid_Bias_Data_NEG)
+{
+  Shape input_shape{1, 4, 3, 2};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{3};
+  std::vector<float> input_data{
+      1,  2,  3,  4,  5,  6,  // row = 0
+      7,  8,  9,  10, 11, 12, // row = 1
+      13, 14, 15, 16, 17, 18, // row = 2
+      19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+      1,  2,  -3, -4, // out = 0, row = 0
+      -5, 6,  -7, 8,  // out = 1, row = 0
+      4,  -2, 3,  -1, // out = 0, row = 1
+      -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<float> bias_data{1, 2, 3};
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
+  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(Conv2DTest, Invalid_Input_Shape_NEG)
+{
+  Shape input_shape{1, 4, 6, 1};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{2};
+  std::vector<float> input_data{
+      1,  2,  3,  4,  5,  6,  // row = 0
+      7,  8,  9,  10, 11, 12, // row = 1
+      13, 14, 15, 16, 17, 18, // row = 2
+      19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+      1,  2,  -3, -4, // out = 0, row = 0
+      -5, 6,  -7, 8,  // out = 1, row = 0
+      4,  -2, 3,  -1, // out = 0, row = 1
+      -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<float> bias_data{1, 2};
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
+  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
  } // namespace
  } // namespace kernels
  } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp b/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp

index fce01a6..1a26deb 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp
+++ b/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp
@@ -39,12 +39,10 @@ void LeakyRelu::configure()
    assert(input()->element_type() == output()->element_type());
    if (input()->element_type() == DataType::U8)
    {
-    _q_alpha = static_cast<uint8_t>(std::max<float>(
-        std::numeric_limits<uint8_t>::min(),
-        std::min<float>(std::numeric_limits<uint8_t>::max(),
-                        std::round(input()->zero_point() + (params().alpha / input()->scale())))));
-    double real_multiplier = input()->scale() * input()->scale() / output()->scale();
-    quantizeMultiplierSmallerThanOneExp(real_multiplier, &_output_multiplier, &_output_shift);
+    double alpha_multiplier = input()->scale() * params().alpha / output()->scale();
+    quantizeMultiplier(alpha_multiplier, &_output_multiplier_alpha, &_output_shift_alpha);
+    double identity_multiplier = input()->scale() / output()->scale();
+    quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
    }
    output()->resize(input()->shape());
  }
@@ -77,15 +75,15 @@ void LeakyRelu::evalQuantized() const
  {
    tflite::LeakyReluParams op_params{};
    op_params.input_offset = input()->zero_point();
-  op_params.alpha_offset = input()->zero_point();
    op_params.output_offset = output()->zero_point();
-
-  op_params.output_multiplier = _output_multiplier;
-  op_params.output_shift = _output_shift;
+  op_params.output_multiplier_alpha = _output_multiplier_alpha;
+  op_params.output_shift_alpha = _output_shift_alpha;
+  op_params.output_multiplier_identity = _output_multiplier_identity;
+  op_params.output_shift_identity = _output_shift_identity;
  
    tflite::reference_ops::QuantizeLeakyRelu(
-      op_params, _q_alpha, getTensorShape(input()), getTensorData<uint8_t>(input()),
-      getTensorShape(output()), getTensorData<uint8_t>(output()));
+      op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(output()),
+      getTensorData<uint8_t>(output()));
  }
  
  } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/LeakyRelu.h b/compiler/luci-interpreter/src/kernels/LeakyRelu.h

index dcc2be9..e66f404 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/LeakyRelu.h
+++ b/compiler/luci-interpreter/src/kernels/LeakyRelu.h
@@ -41,9 +41,10 @@ private:
    void evalQuantized() const;
  
  private:
-  uint8_t _q_alpha = 0;
-  int32_t _output_multiplier = 0;
-  int _output_shift = 0;
+  int32_t _output_multiplier_alpha = 0;
+  int _output_shift_alpha = 0;
+  int32_t _output_multiplier_identity = 0;
+  int _output_shift_identity = 0;
  };
  
  } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/Mul.cpp b/compiler/luci-interpreter/src/kernels/Mul.cpp

index a6e721a..dd31aa0 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Mul.cpp
+++ b/compiler/luci-interpreter/src/kernels/Mul.cpp
@@ -19,7 +19,8 @@
  
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
  
  #include <stdexcept>
  
@@ -66,13 +67,13 @@ void Mul::evalFloat() const
  
    if (need_broadcast)
    {
-    tflite::reference_ops::BroadcastMul4DSlow(
+    tflite::optimized_ops::BroadcastMul4DSlow(
          params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
          getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
    }
    else
    {
-    tflite::reference_ops::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()),
+    tflite::optimized_ops::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()),
                                 getTensorShape(input2()), getTensorData<float>(input2()),
                                 getTensorShape(output()), getTensorData<float>(output()));
    }
diff --git a/compiler/luci-interpreter/src/kernels/Rsqrt.cpp b/compiler/luci-interpreter/src/kernels/Rsqrt.cpp

new file mode 100644 (file)

index 0000000..6dd92dc
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Rsqrt.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Rsqrt.h"
+#include "kernels/Utils.h"
+
+#include <stdexcept>
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Rsqrt::Rsqrt(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Rsqrt::configure()
+{
+  if (input()->element_type() != output()->element_type())
+  {
+    throw std::runtime_error("Input/output tensor data type mismatch.");
+  }
+  output()->resize(input()->shape());
+}
+
+void Rsqrt::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Rsqrt::evalFloat() const
+{
+  auto in = getTensorData<float>(input());
+  auto out = getTensorData<float>(output());
+  auto size = getTensorShape(input()).FlatSize();
+  for (auto i = in; i != in + size; ++i)
+  {
+    *out = 1.f / std::sqrt(*i);
+    ++out;
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Rsqrt.h b/compiler/luci-interpreter/src/kernels/Rsqrt.h

new file mode 100644 (file)

index 0000000..adc5bcf
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Rsqrt.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RSQRT_H
+#define LUCI_INTERPRETER_KERNELS_RSQRT_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Rsqrt : public Kernel
+{
+public:
+  Rsqrt(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RSQRT_H
diff --git a/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp b/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp

new file mode 100644 (file)

index 0000000..69b55d2
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Rsqrt.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+  Tensor input_tensor{DataType::FLOAT32, input_shape, {}, ""};
+  input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(float));
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Rsqrt kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ArrayFloatNear(output_data)));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(RsqrtTest, SimpleRsqrt)
+{
+  Check(
+      /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
+      /*input_data=*/
+      {
+          5, 4, 8, 2,     //
+          6, 7.5, 9, 0.3, //
+      },
+      /*output_data=*/
+      {
+          0.44721360, 0.5, 0.35355339, 0.70710678,       //
+          0.40824829, 0.36514837, 0.33333333, 1.8257419, //
+      });
+}
+
+TEST(RsqrtTest, Input_Output_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  Rsqrt kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(RsqrtTest, Invalid_Input_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1});
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  Rsqrt kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Sqrt.cpp b/compiler/luci-interpreter/src/kernels/Sqrt.cpp

new file mode 100644 (file)

index 0000000..46e9fc9
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Sqrt.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Sqrt.h"
+#include "kernels/Utils.h"
+
+#include <stdexcept>
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Sqrt::Sqrt(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Sqrt::configure()
+{
+  if (input()->element_type() != output()->element_type())
+  {
+    throw std::runtime_error("Input/output tensor data type mismatch.");
+  }
+  output()->resize(input()->shape());
+}
+
+void Sqrt::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Sqrt::evalFloat() const
+{
+  auto in = getTensorData<float>(input());
+  auto out = getTensorData<float>(output());
+  auto size = getTensorShape(input()).FlatSize();
+  for (auto i = in; i != in + size; ++i)
+  {
+    *out = std::sqrt(*i);
+    ++out;
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Sqrt.h b/compiler/luci-interpreter/src/kernels/Sqrt.h

new file mode 100644 (file)

index 0000000..4034655
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Sqrt.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SQRT_H
+#define LUCI_INTERPRETER_KERNELS_SQRT_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Sqrt : public Kernel
+{
+public:
+  Sqrt(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SQRT_H
diff --git a/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp b/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp

new file mode 100644 (file)

index 0000000..cdd2082
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Sqrt.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+  Tensor input_tensor{DataType::FLOAT32, input_shape, {}, ""};
+  input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(float));
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Sqrt kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ArrayFloatNear(output_data)));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(SqrtTest, SimpleSqrt)
+{
+  Check(
+      /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
+      /*input_data=*/
+      {
+          0, 8, 2, 4,    //
+          3, 7, 10, 0.3, //
+      },
+      /*output_data=*/
+      {
+          0.0, 2.8284271, 1.4142136, 2,                //
+          1.7320508, 2.6457513, 3.1622777, 0.54772256, //
+      });
+}
+
+TEST(SqrtTest, Input_Output_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  Sqrt kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(AddTest, Invalid_Input_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1});
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  Sqrt kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Tanh.cpp b/compiler/luci-interpreter/src/kernels/Tanh.cpp

new file mode 100644 (file)

index 0000000..b649d5d
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Tanh.cpp
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Tanh.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Tanh::Tanh(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Tanh::configure()
+{
+  assert(input()->element_type() == output()->element_type());
+  if (input()->element_type() == DataType::U8)
+  {
+    populateLookupTable();
+  }
+  output()->resize(input()->shape());
+}
+
+void Tanh::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Tanh::evalFloat() const
+{
+  tflite::reference_ops::Tanh(getTensorShape(input()), getTensorData<float>(input()),
+                              getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void Tanh::evalQuantized() const
+{
+  const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
+  uint8_t *output_data = getTensorData<uint8_t>(output());
+  const uint8_t *input_data = getTensorData<uint8_t>(input());
+  for (int i = 0; i < size; ++i)
+  {
+    output_data[i] = getTableValue(input_data[i]);
+  }
+}
+
+void Tanh::populateLookupTable()
+{
+  const auto input_scale = static_cast<double>(input()->scale());
+  const auto input_zero_point = static_cast<int32_t>(input()->zero_point());
+  const auto output_scale = static_cast<double>(output()->scale());
+  const auto output_zero_point = static_cast<int32_t>(output()->zero_point());
+  const float inverse_scale = 1 / output_scale;
+  int32_t maxval = std::numeric_limits<uint8_t>::max();
+  int32_t minval = std::numeric_limits<uint8_t>::min();
+  for (int32_t val = minval; val <= maxval; ++val)
+  {
+    const float dequantized = input_scale * (val - input_zero_point);
+    const float transformed = std::tanh(dequantized);
+    const float rescaled = std::round(transformed * inverse_scale);
+    const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
+    setTableValue(static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval)),
+                  static_cast<uint8_t>(val));
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Tanh.h b/compiler/luci-interpreter/src/kernels/Tanh.h

new file mode 100644 (file)

index 0000000..8017c96
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Tanh.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_TANH_H
+#define LUCI_INTERPRETER_KERNELS_TANH_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Tanh : public Kernel
+{
+public:
+  Tanh(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void populateLookupTable();
+  void setTableValue(uint8_t value, uint8_t idx) { _table[idx] = value; };
+  uint8_t getTableValue(uint8_t idx) const { return _table[idx]; };
+
+private:
+  uint8_t _table[256]{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_TANH_H
diff --git a/compiler/luci-interpreter/src/kernels/Tanh.test.cpp b/compiler/luci-interpreter/src/kernels/Tanh.test.cpp

new file mode 100644 (file)

index 0000000..392b867
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Tanh.test.cpp
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Tanh.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(TanhTest, Float)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+      0, -6, 2,  4, //
+      3, -2, 10, 1, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Tanh kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+      0,          -0.9999877, 0.9640275, 0.999329,  //
+      0.99505475, -0.9640275, 1,         0.7615941, //
+  };
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+}
+
+TEST(TanhTest, Uint8)
+{
+  float kMin = -1;
+  float kMax = 127.f / 128.f;
+  float kTanhTolerance = 2 * (1. / 256);
+  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(8 * kMin, 8 * kMax);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(kMin, kMax);
+  std::vector<float> input_data{
+      0,  -6, 2, 4, //
+      -4, -2, 8, 1, //
+      0,  -6, 2, 4, //
+      -4, -2, 8, 1, //
+      0,  -6, 2, 4, //
+      -4, -2, 8, 1, //
+      0,  -6, 2, 4, //
+      -4, -2, 8, 1, //
+      0,  -6, 2, 4, //
+      -4, -2, 8, 1, //
+      0,  -6, 2, 4, //
+      -4, -2, 8, 1, //
+  };
+  Tensor input_tensor{
+      DataType::U8, {2, 6, 4, 1}, {{input_quant_param.first}, {input_quant_param.second}}, ""};
+  Tensor output_tensor =
+      makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+  std::vector<uint8_t> quantize_input =
+      quantize<uint8_t>(input_data, input_quant_param.first, input_quant_param.second);
+  input_tensor.writeData(quantize_input.data(), quantize_input.size() * sizeof(uint8_t));
+
+  Tanh kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+      0.0,       -0.999987, 0.964027, 0.999329, //
+      -0.999329, -0.96402,  0.99999,  0.76159,  //
+      0.0,       -0.999987, 0.964027, 0.999329, //
+      -0.999329, -0.96402,  0.99999,  0.76159,  //
+      0.0,       -0.999987, 0.964027, 0.999329, //
+      -0.999329, -0.96402,  0.99999,  0.76159,  //
+      0.0,       -0.999987, 0.964027, 0.999329, //
+      -0.999329, -0.96402,  0.99999,  0.76159,  //
+      0.0,       -0.999987, 0.964027, 0.999329, //
+      -0.999329, -0.96402,  0.99999,  0.76159,  //
+      0.0,       -0.999987, 0.964027, 0.999329, //
+      -0.999329, -0.96402,  0.99999,  0.76159,  //
+  };
+  std::vector<int32_t> ref_output_shape{2, 6, 4, 1};
+  EXPECT_THAT(dequantize<uint8_t>(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(),
+                                  output_tensor.zero_point()),
+              ElementsAreArray(ArrayFloatNear(ref_output_data, kTanhTolerance)));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.cpp b/compiler/luci-interpreter/src/kernels/TransposeConv.cpp

index 46380e2..898bae3 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/TransposeConv.cpp
+++ b/compiler/luci-interpreter/src/kernels/TransposeConv.cpp
@@ -30,8 +30,8 @@ namespace kernels
  {
  
  TransposeConv::TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
-                             Tensor *output, const TransposeConvParams &params)
-    : KernelWithParams<TransposeConvParams>({output_shape, filter, input}, {output}, params)
+                             const Tensor *bias, Tensor *output, const TransposeConvParams &params)
+    : KernelWithParams<TransposeConvParams>({output_shape, filter, input, bias}, {output}, params)
  {
  }
  
@@ -106,8 +106,9 @@ void TransposeConv::evalFloat() const
    op_params.output_multiplier = _output_multiplier;
    tflite::reference_ops::TransposeConv(
        op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
-      getTensorData<float>(filter()), getTensorShape(output()), getTensorData<float>(output()),
-      tflite::RuntimeShape(), (float *)nullptr);
+      getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
+      getTensorShape(output()), getTensorData<float>(output()), tflite::RuntimeShape(),
+      (float *)nullptr);
  }
  
  void TransposeConv::evalQuantized() const
@@ -145,8 +146,9 @@ void TransposeConv::evalQuantized() const
  
    tflite::reference_ops::TransposeConv(
        op_params, getTensorShape(input()), getTensorData<uint8>(input()), getTensorShape(filter()),
-      getTensorData<uint8>(filter()), getTensorShape(output()), getTensorData<uint8>(output()),
-      tflite::RuntimeShape(), (uint8 *)nullptr, getTensorData<int32_t>(_scratch_tensor.get()));
+      getTensorData<uint8>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
+      getTensorShape(output()), getTensorData<uint8>(output()), tflite::RuntimeShape(),
+      (uint8 *)nullptr, getTensorData<int32_t>(_scratch_tensor.get()));
  }
  
  } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.h b/compiler/luci-interpreter/src/kernels/TransposeConv.h

index d73e939..3a0eae7 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/TransposeConv.h
+++ b/compiler/luci-interpreter/src/kernels/TransposeConv.h
@@ -29,11 +29,12 @@ class TransposeConv : public KernelWithParams<TransposeConvParams>
  {
  public:
    TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
-                Tensor *output, const TransposeConvParams &params);
+                const Tensor *bias, Tensor *output, const TransposeConvParams &params);
  
    const Tensor *output_shape() const { return _inputs[0]; }
    const Tensor *filter() const { return _inputs[1]; }
    const Tensor *input() const { return _inputs[2]; }
+  const Tensor *bias() const { return _inputs[3]; }
    Tensor *output() const { return _outputs[0]; }
  
    void configure() override;
diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp b/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp

index b8c0ac4..0fbe932 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp
@@ -26,15 +26,15 @@ namespace
  
  using namespace testing;
  
-template <typename T>
+template <typename T, typename B>
  void Check(std::initializer_list<int32_t> output_shape_shape,
             std::initializer_list<int32_t> weight_shape,
             std::initializer_list<int32_t> input_data_shape,
-           std::initializer_list<int32_t> output_shape,
+           std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
             std::initializer_list<int32_t> output_shape_data, std::initializer_list<T> weight_data,
-           std::initializer_list<T> input_data_data, std::initializer_list<T> output_data,
-           luci::Padding padding, int32_t stride_height, int32_t stride_width,
-           DataType element_type)
+           std::initializer_list<T> input_data_data, std::initializer_list<B> bias_data,
+           std::initializer_list<T> output_data, luci::Padding padding, int32_t stride_height,
+           int32_t stride_width, DataType element_type)
  {
    Tensor output_shape_tensor{element_type, output_shape_shape, {}, ""};
    output_shape_tensor.writeData(output_shape_data.begin(), output_shape_data.size() * sizeof(T));
@@ -50,21 +50,32 @@ void Check(std::initializer_list<int32_t> output_shape_shape,
    params.stride_height = stride_height;
    params.stride_width = stride_width;
  
-  TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, &output_tensor,
-                       params);
-  kernel.configure();
-  kernel.execute();
-
+  if (bias_data.size() != 0)
+  {
+    Tensor bias_tensor = makeInputTensor<getElementType<B>()>(bias_shape, bias_data);
+    TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, &bias_tensor,
+                         &output_tensor, params);
+    kernel.configure();
+    kernel.execute();
+  }
+  else
+  {
+    TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, nullptr,
+                         &output_tensor, params);
+    kernel.configure();
+    kernel.execute();
+  }
    EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
  }
  
  TEST(TransposeConvTest, FloatSimple)
  {
-  Check<float>(
+  Check<float, float>(
        /*outputShape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 1}, /*input_shape=*/{1, 4, 4, 1},
-      /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 4, 4, 1},
+      /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 4, 4, 1},
        /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9},
        /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+      /*bias_data=*/{},
        /*output_data=*/{29, 62, 83, 75, 99, 192, 237, 198, 207, 372, 417, 330, 263, 446, 485, 365},
        /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1,
        getElementType<float>());
@@ -74,12 +85,13 @@ TEST(TransposeConvTest, FloatSimple)
  
  TEST(TransposeConvTest, FloatTwoFiltersTest)
  {
-  Check<float>(
+  Check<float, float>(
        /*outputShape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 2}, /*input_shape=*/{1, 4, 4, 2},
-      /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 4, 4, 1},
+      /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 4, 4, 1},
        /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18},
        /*input_data=*/{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
                        17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32},
+      /*bias_data=*/{},
        /*output_data=*/{184, 412, 568, 528, 678, 1347, 1689, 1434, 1494, 2715, 3057, 2442, 1968,
                         3352, 3652, 2760},
        /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1,
@@ -88,6 +100,24 @@ TEST(TransposeConvTest, FloatTwoFiltersTest)
    SUCCEED();
  }
  
+TEST(TransposeConvTest, SimpleBiasTest)
+{
+  Check<float, float>(
+      /*outputShape_shape=*/{4}, /*weight_shape=*/{2, 3, 3, 1},
+      /*input_shape=*/{1, 2, 2, 1},
+      /*bias_shape=*/{2}, /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 5, 5, 2},
+      /*weight_data=*/{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18},
+      /*input_data=*/{1, 2, 3, 4},
+      /*bias_data=*/{3, 4},
+      /*output_data=*/{4,  6,  6,  8,  10, 14, 9,  12, 13, 16, 10,  12,  12, 14, 28, 32, 21,
+                       24, 25, 28, 19, 24, 27, 32, 65, 76, 45, 52,  57,  64, 24, 28, 30, 34,
+                       64, 72, 39, 44, 47, 52, 42, 46, 48, 52, 106, 114, 63, 68, 71, 76},
+      /*params.padding=*/luci::Padding::VALID, /*stride_height=*/2, /*stride_width=*/2,
+      getElementType<float>());
+
+  SUCCEED();
+}
+
  // TODO Uint8Simple
  // Implement GetDequantizedOutput Function.
  // Create Test for Uint8 Case
diff --git a/compiler/luci-interpreter/src/kernels/Utils.h b/compiler/luci-interpreter/src/kernels/Utils.h

index 3c2cc84..7927151 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Utils.h
+++ b/compiler/luci-interpreter/src/kernels/Utils.h
@@ -31,6 +31,11 @@ namespace luci_interpreter
  namespace kernels
  {
  
+#define LUCI_INTERPRETER_CHECK(cond)                                                         \
+  if (!(cond))                                                                               \
+    throw std::runtime_error(std::string(__FILE__) + ":" + std::to_string(__LINE__) + +"(" + \
+                             std::string(#cond) + ") was not true.");
+
  inline int32_t computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size,
                                int32_t filter_size, int32_t out_size)
  {
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.cpp b/compiler/luci-interpreter/src/loader/KernelBuilder.cpp

index 12c7f45..126a1cb 100644 (file)
--- a/compiler/luci-interpreter/src/loader/KernelBuilder.cpp
+++ b/compiler/luci-interpreter/src/loader/KernelBuilder.cpp
@@ -37,12 +37,15 @@
  #include "kernels/Pad.h"
  #include "kernels/Reshape.h"
  #include "kernels/Reverse.h"
+#include "kernels/Rsqrt.h"
  #include "kernels/Slice.h"
  #include "kernels/Softmax.h"
  #include "kernels/SpaceToDepth.h"
  #include "kernels/Split.h"
  #include "kernels/StridedSlice.h"
+#include "kernels/Sqrt.h"
  #include "kernels/Squeeze.h"
+#include "kernels/Tanh.h"
  #include "kernels/Unpack.h"
  #include "kernels/Transpose.h"
  #include "kernels/TransposeConv.h"
@@ -430,6 +433,16 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleReverseV2 *node)
    return std::make_unique<kernels::Reverse>(input, axes, output);
  }
  
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleRsqrt *node)
+{
+  assert(node->arity() == 1);
+
+  const Tensor *input = getInputTensor(node->x());
+  Tensor *output = getOutputTensor(node);
+
+  return std::make_unique<kernels::Rsqrt>(input, output);
+}
+
  std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSlice *node)
  {
    assert(node->arity() == 3);
@@ -483,6 +496,16 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSplit *node)
    return std::make_unique<kernels::Split>(axis, input, std::move(outputs));
  }
  
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSqrt *node)
+{
+  assert(node->arity() == 1);
+
+  const Tensor *input = getInputTensor(node->x());
+  Tensor *output = getOutputTensor(node);
+
+  return std::make_unique<kernels::Sqrt>(input, output);
+}
+
  std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSqueeze *node)
  {
    assert(node->arity() == 1);
@@ -517,6 +540,16 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleStridedSlice *nod
    return std::make_unique<kernels::StridedSlice>(input, begin, end, strides, output, params);
  }
  
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTanh *node)
+{
+  assert(node->arity() == 1);
+
+  const Tensor *input = getInputTensor(node->x());
+  Tensor *output = getOutputTensor(node);
+
+  return std::make_unique<kernels::Tanh>(input, output);
+}
+
  std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTranspose *node)
  {
    assert(node->arity() == 2);
@@ -530,11 +563,12 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTranspose *node)
  
  std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTransposeConv *node)
  {
-  assert(node->arity() == 3);
+  assert(node->arity() == 4);
  
    const Tensor *input_sizes = getInputTensor(node->inputSizes());
    const Tensor *filter = getInputTensor(node->filter());
    const Tensor *out_backprop = getInputTensor(node->outBackprop());
+  const Tensor *bias = getOptionalInputTensor(node->bias());
  
    Tensor *output = getOutputTensor(node);
  
@@ -543,7 +577,7 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTransposeConv *no
    params.stride_height = node->stride()->h();
    params.stride_width = node->stride()->w();
  
-  return std::make_unique<kernels::TransposeConv>(input_sizes, filter, out_backprop, output,
+  return std::make_unique<kernels::TransposeConv>(input_sizes, filter, out_backprop, bias, output,
                                                    params);
  }
  
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.h b/compiler/luci-interpreter/src/loader/KernelBuilder.h

index d5c5a4b..31cb9d8 100644 (file)
--- a/compiler/luci-interpreter/src/loader/KernelBuilder.h
+++ b/compiler/luci-interpreter/src/loader/KernelBuilder.h
@@ -63,12 +63,15 @@ public:
    std::unique_ptr<Kernel> visit(const luci::CirclePad *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleReshape *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleReverseV2 *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleRsqrt *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleSlice *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleSoftmax *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleSpaceToDepth *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleSplit *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleStridedSlice *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleSqrt *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleSqueeze *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleTanh *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleTranspose *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleTransposeConv *node) override;
    std::unique_ptr<Kernel> visit(const luci::CircleUnpack *node) override;
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp b/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp

index 33bc8ec..4e2bc3d 100644 (file)
--- a/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp
+++ b/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp
@@ -37,12 +37,15 @@
  #include <kernels/Pad.h>
  #include <kernels/Reshape.h>
  #include <kernels/Reverse.h>
+#include <kernels/Rsqrt.h>
  #include <kernels/Slice.h>
  #include <kernels/Softmax.h>
  #include <kernels/SpaceToDepth.h>
  #include <kernels/Split.h>
+#include <kernels/Sqrt.h>
  #include <kernels/Squeeze.h>
  #include <kernels/StridedSlice.h>
+#include <kernels/Tanh.h>
  #include <kernels/Transpose.h>
  #include <kernels/TransposeConv.h>
  #include <kernels/Unpack.h>
@@ -529,6 +532,20 @@ TEST_F(KernelBuilderTest, ReverseV2)
    checkTensor(kernel->output(), op);
  }
  
+TEST_F(KernelBuilderTest, Rsqrt)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleRsqrt>();
+  op->x(input);
+
+  auto kernel = buildKernel<kernels::Rsqrt>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
  TEST_F(KernelBuilderTest, Slice)
  {
    auto *input = createInputNode();
@@ -605,6 +622,20 @@ TEST_F(KernelBuilderTest, Split)
    checkTensor(kernel->output(1), output2);
  }
  
+TEST_F(KernelBuilderTest, Sqrt)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleSqrt>();
+  op->x(input);
+
+  auto kernel = buildKernel<kernels::Sqrt>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
  TEST_F(KernelBuilderTest, Squeeze)
  {
    auto *input = createInputNode();
@@ -656,6 +687,20 @@ TEST_F(KernelBuilderTest, StridedSlice)
    EXPECT_THAT(kernel->params().shrink_axis_mask, Eq(op->shrink_axis_mask()));
  }
  
+TEST_F(KernelBuilderTest, Tanh)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleTanh>();
+  op->x(input);
+
+  auto kernel = buildKernel<kernels::Tanh>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
  TEST_F(KernelBuilderTest, Transpose)
  {
    auto *input = createInputNode();
@@ -678,11 +723,13 @@ TEST_F(KernelBuilderTest, TransposeConv)
    auto *output_shape = createInputNode();
    auto *filter = createInputNode();
    auto *input = createInputNode();
+  auto *bias = createInputNode();
  
    auto *op = createNode<luci::CircleTransposeConv>();
    op->inputSizes(output_shape);
    op->filter(filter);
    op->outBackprop(input);
+  op->bias(bias);
  
    op->padding(luci::Padding::SAME);
    op->stride()->h(11);
@@ -695,6 +742,7 @@ TEST_F(KernelBuilderTest, TransposeConv)
    checkTensor(kernel->filter(), filter);
    checkTensor(kernel->input(), input);
    checkTensor(kernel->output(), op);
+  checkTensor(kernel->bias(), bias);
    EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
    EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
    EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
diff --git a/compiler/luci-value-test/tester/CMakeLists.txt b/compiler/luci-value-test/tester/CMakeLists.txt

index f3b6dfc..f2a4ff4 100644 (file)
--- a/compiler/luci-value-test/tester/CMakeLists.txt
+++ b/compiler/luci-value-test/tester/CMakeLists.txt
@@ -1,8 +1,6 @@
  
  set(SRCS_EVAL_TESTER
        src/EvalTester.cpp
-      src/CircleExpContract.h
-      src/CircleExpContract.cpp
     )
  
  add_executable(luci_eval_tester ${SRCS_EVAL_TESTER})
diff --git a/compiler/luci-value-test/tester/src/CircleExpContract.h b/compiler/luci-value-test/tester/src/CircleExpContract.h

deleted file mode 100644 (file)

index 4d08fb8..0000000
--- a/compiler/luci-value-test/tester/src/CircleExpContract.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_VALUE_TEST_CIRCLEXPCONTRACT_H__
-#define __LUCI_VALUE_TEST_CIRCLEXPCONTRACT_H__
-
-#include <loco.h>
-#include <luci/CircleExporter.h>
-#include <luci/IR/Module.h>
-
-#include <memory>
-#include <string>
-
-struct CircleExpContract : public luci::CircleExporter::Contract
-{
-public:
-  CircleExpContract(luci::Module *module, const std::string &filename)
-      : _module(module), _filepath(filename)
-  {
-    // NOTHING TO DO
-  }
-  virtual ~CircleExpContract() = default;
-
-public:
-  loco::Graph *graph(void) const final { return nullptr; }
-  luci::Module *module(void) const final { return _module; };
-
-public:
-  bool store(const char *ptr, const size_t size) const final;
-
-private:
-  luci::Module *_module;
-  const std::string _filepath;
-};
-
-#endif // __LUCI_VALUE_TEST_CIRCLEXPCONTRACT_H__
diff --git a/compiler/luci-value-test/tester/src/EvalTester.cpp b/compiler/luci-value-test/tester/src/EvalTester.cpp

index 09eef22..b49602e 100644 (file)
--- a/compiler/luci-value-test/tester/src/EvalTester.cpp
+++ b/compiler/luci-value-test/tester/src/EvalTester.cpp
@@ -14,10 +14,10 @@
   * limitations under the License.
   */
  
-#include "CircleExpContract.h"
-
  #include <luci/Importer.h>
  #include <luci_interpreter/Interpreter.h>
+#include <luci/CircleExporter.h>
+#include <luci/CircleFileExpContract.h>
  
  #include <cstdlib>
  #include <fstream>
@@ -104,7 +104,9 @@ int entry(int argc, char **argv)
  
    // Export to a Circle file
    luci::CircleExporter exporter;
-  CircleExpContract contract(initial_module.get(), intermediate_filename);
+
+  luci::CircleFileExpContract contract(initial_module.get(), intermediate_filename);
+
    if (!exporter.invoke(&contract))
    {
      std::cerr << "ERROR: Failed to export '" << intermediate_filename << "'" << std::endl;
diff --git a/compiler/record-minmax/src/CircleExpContract.h b/compiler/luci/export/include/luci/CircleFileExpContract.h

similarity index 56%

rename from compiler/record-minmax/src/CircleExpContract.h

rename to compiler/luci/export/include/luci/CircleFileExpContract.h

index ab00fa8..eeaf2d9 100644 (file)
--- a/compiler/record-minmax/src/CircleExpContract.h
+++ b/compiler/luci/export/include/luci/CircleFileExpContract.h
@@ -14,40 +14,52 @@
   * limitations under the License.
   */
  
-#ifndef __RECORD_MINMAX_CIRCLEXPCONTRACT_H__
-#define __RECORD_MINMAX_CIRCLEXPCONTRACT_H__
+#ifndef __LUCI_CIRCLEFILEEXPCONTRACT_H__
+#define __LUCI_CIRCLEFILEEXPCONTRACT_H__
  
  #include <loco.h>
  #include <luci/CircleExporter.h>
  #include <luci/IR/Module.h>
+#include <oops/InternalExn.h>
  
  #include <string>
+#include <fstream>
+#include <iostream>
  
-namespace record_minmax
+namespace luci
  {
  
-struct CircleExpContract : public luci::CircleExporter::Contract
+struct CircleFileExpContract : public luci::CircleExporter::Contract
  {
  public:
-  CircleExpContract(luci::Module *module, const std::string &filename)
+  CircleFileExpContract(luci::Module *module, const std::string &filename)
        : _module(module), _filepath(filename)
    {
      // NOTHING TO DO
    }
-  virtual ~CircleExpContract() = default;
+  virtual ~CircleFileExpContract() = default;
  
  public:
    loco::Graph *graph(void) const final { return nullptr; }
-  luci::Module *module(void) const final { return _module; };
+  luci::Module *module(void) const final { return _module; }
  
  public:
-  bool store(const char *ptr, const size_t size) const final;
+  bool store(const char *ptr, const size_t size) const final
+  {
+    if (!ptr)
+      INTERNAL_EXN("Graph was not serialized by FlatBuffer for some reason");
+
+    std::ofstream fs(_filepath, std::ofstream::binary);
+    fs.write(ptr, size);
+
+    return fs.good();
+  }
  
  private:
    luci::Module *_module;
    const std::string _filepath;
  };
  
-} // namespace record_minmax
+} // namespace luci
  
-#endif // __RECORD_MINMAX_CIRCLEXPCONTRACT_H__
+#endif // __LUCI_CIRCLEFILEEXPCONTRACT_H__
diff --git a/compiler/luci/export/src/CircleOperationExporter.cpp b/compiler/luci/export/src/CircleOperationExporter.cpp

index bca1220..36d61f6 100644 (file)
--- a/compiler/luci/export/src/CircleOperationExporter.cpp
+++ b/compiler/luci/export/src/CircleOperationExporter.cpp
@@ -38,12 +38,578 @@ namespace
  
  using namespace luci;
  
+struct ExportContext
+{
+  FlatBufferBuilder &builder;
+  SerializedModelData &md;
+  SerializedGraphData &gd;
+};
+
+/**
+ * @brief Exports CircleMaxPool2D or CircleAveragePool2D
+ *
+ * @note  CirclePool2D should be one of CircleMaxPool2D or CircleAveragePool2D
+ */
+template <class CirclePool2D>
+void export_pool_2d(ExportContext &ctx, CirclePool2D *node, circle::BuiltinOperator builtin_op)
+{
+  LUCI_ASSERT(builtin_op == circle::BuiltinOperator_MAX_POOL_2D ||
+                  builtin_op == circle::BuiltinOperator_L2_POOL_2D ||
+                  builtin_op == circle::BuiltinOperator_AVERAGE_POOL_2D,
+              "Should be L2Pool, MaxPool or AvgPool");
+  LUCI_ASSERT(node->padding() != luci::Padding::UNDEFINED, "Padding is not set");
+
+  uint32_t op_idx = ctx.md.registerBuiltinOpcode(builtin_op, node->op_version());
+  std::vector<int32_t> inputs_vec{get_tensor_index(node->value())};
+  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+
+  circle::Padding padding = getOpPadding(node->padding());
+
+  auto options = CreatePool2DOptions(ctx.builder, padding, node->stride()->w(), node->stride()->h(),
+                                     node->filter()->w(), node->filter()->h(),
+                                     to_circle_actfunc(node->fusedActivationFunction()));
+  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+                                  circle::BuiltinOptions_Pool2DOptions, options.Union());
+  ctx.gd._operators.push_back(op_offset);
+}
+
+/**
+ * @brief export simple nodes
+ */
+void export_node(ExportContext &ctx, loco::Node *node, circle::BuiltinOperator bop,
+                 circle::BuiltinOptions bot, flatbuffers::Offset<void> options_offset)
+{
+  uint32_t op_idx =
+      ctx.md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version());
+  std::vector<int32_t> inputs_vec;
+  std::vector<int32_t> outputs_vec{get_tensor_index(node)};
+  for (uint32_t i = 0; i < node->arity(); ++i)
+    inputs_vec.push_back(get_tensor_index(node->arg(i)));
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, bot, options_offset);
+  ctx.gd._operators.push_back(op_offset);
+}
+
+/**
+ * @brief export simple nodes having void options
+ */
+void export_node(ExportContext &ctx, loco::Node *node, circle::BuiltinOperator bop)
+{
+  uint32_t op_idx =
+      ctx.md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version());
+  std::vector<int32_t> inputs_vec;
+  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+  for (uint32_t i = 0; i < node->arity(); ++i)
+    inputs_vec.push_back(get_tensor_index(node->arg(i)));
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs);
+  ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleAddN *node)
+{
+  uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_ADD_N, node->op_version());
+  std::vector<int32_t> inputs_vec;
+  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+
+  for (uint32_t i = 0; i < node->arity(); ++i)
+    inputs_vec.push_back(get_tensor_index(node->inputs(i)));
+
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+  auto options = CreateAddNOptions(ctx.builder);
+  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+                                  circle::BuiltinOptions_AddNOptions, options.Union());
+  ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleCast *node)
+{
+  uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_CAST, node->op_version());
+  std::vector<int32_t> inputs_vec{get_tensor_index(node->x())};
+  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+
+  flatbuffers::Offset<Operator> op_offset;
+  if (node->out_data_type() != loco::DataType::Unknown)
+  {
+    auto options = CreateCastOptions(ctx.builder, to_circle_tensortype(node->in_data_type()),
+                                     to_circle_tensortype(node->out_data_type()));
+    op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+                               circle::BuiltinOptions_CastOptions, options.Union());
+  }
+  else
+  {
+    op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs);
+  }
+  ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleConcatenation *node)
+{
+  uint32_t op_idx =
+      ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_CONCATENATION, node->op_version());
+  std::vector<int32_t> inputs_vec;
+  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+
+  for (uint32_t i = 0; i < node->numValues(); ++i)
+    inputs_vec.push_back(get_tensor_index(node->values(i)));
+
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+  auto options = CreateConcatenationOptions(ctx.builder, node->axis(),
+                                            to_circle_actfunc(node->fusedActivationFunction()));
+  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+                                  circle::BuiltinOptions_ConcatenationOptions, options.Union());
+  ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleCustom *node)
+{
+  auto custom_outputs = loco::succs(node);
+
+  uint32_t op_idx = ctx.md.registerCustomOpcode(node->custom_code());
+  std::vector<int32_t> inputs_vec;
+  std::vector<int32_t> outputs_vec;
+
+  for (uint32_t index = 0; index < node->numInputs(); index++)
+  {
+    inputs_vec.push_back(get_tensor_index(node->inputs(index)));
+  }
+  for (uint32_t index = 0; index < custom_outputs.size(); index++)
+  {
+    // store in order of index
+    bool found = false;
+    for (auto out : custom_outputs)
+    {
+      auto custom_out = loco::must_cast<luci::CircleCustomOut *>(out);
+      if (custom_out->index() == static_cast<int32_t>(index))
+      {
+        outputs_vec.push_back(get_tensor_index(custom_out));
+        found = true;
+        break;
+      }
+    }
+    if (!found)
+    {
+      INTERNAL_EXN("Invalid Custom output");
+    }
+  }
+
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+  flatbuffers::Offset<flatbuffers::Vector<uint8_t>> circle_custom_options;
+  std::vector<uint8_t> custom_options_vec{node->custom_options().begin(),
+                                          node->custom_options().end()};
+  circle_custom_options = ctx.builder.CreateVector(custom_options_vec);
+  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, circle::BuiltinOptions_NONE,
+                                  flatbuffers::Offset<void>(), circle_custom_options);
+  ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleIf *node)
+{
+  auto if_outs = loco::succs(node);
+  assert(if_outs.size() == node->output_count());
+
+  uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_IF, node->op_version());
+  std::vector<int32_t> inputs_vec;
+  std::vector<int32_t> outputs_vec;
+
+  inputs_vec.push_back(get_tensor_index(node->cond()));
+  for (uint32_t idx = 0; idx < node->input_count(); ++idx)
+    inputs_vec.push_back(get_tensor_index(node->input(idx)));
+
+  for (uint32_t idx = 0; idx < node->output_count(); ++idx)
+  {
+    // store in order of index
+    bool found = false;
+    for (auto out : if_outs)
+    {
+      auto if_out = loco::must_cast<luci::CircleIfOut *>(out);
+      if (if_out->index() == static_cast<int32_t>(idx))
+      {
+        outputs_vec.push_back(get_tensor_index(if_out));
+        found = true;
+        break;
+      }
+    }
+    if (!found)
+    {
+      INTERNAL_EXN("Invalid CircleIf output");
+    }
+  }
+
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+  auto options = CreateIfOptions(ctx.builder, node->then_branch(), node->else_branch());
+  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+                                  circle::BuiltinOptions_IfOptions, options.Union());
+  ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleNonMaxSuppressionV4 *node)
+{
+  auto nms_outs = loco::succs(node);
+  assert(nms_outs.size() == 2);
+
+  uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_NON_MAX_SUPPRESSION_V4,
+                                                 node->op_version());
+  std::vector<int32_t> inputs_vec{
+      get_tensor_index(node->boxes()),           get_tensor_index(node->scores()),
+      get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()),
+      get_tensor_index(node->score_threshold()),
+  };
+  std::vector<int32_t> outputs_vec;
+
+  for (uint32_t idx = 0; idx < nms_outs.size(); ++idx)
+  {
+    // store in order of index
+    bool found = false;
+    for (auto out : nms_outs)
+    {
+      auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV4Out *>(out);
+      if (nms_out->index() == static_cast<int32_t>(idx))
+      {
+        outputs_vec.push_back(get_tensor_index(nms_out));
+        found = true;
+        break;
+      }
+    }
+    if (!found)
+    {
+      INTERNAL_EXN("Invalid NonMaxSuppressionV4 output");
+    }
+  }
+
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+  auto options = CreateNonMaxSuppressionV4Options(ctx.builder);
+  auto op_offset =
+      CreateOperator(ctx.builder, op_idx, inputs, outputs,
+                     circle::BuiltinOptions_NonMaxSuppressionV4Options, options.Union());
+  ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleNonMaxSuppressionV5 *node)
+{
+  auto nms_outs = loco::succs(node);
+  assert(nms_outs.size() == 3);
+
+  uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_NON_MAX_SUPPRESSION_V5,
+                                                 node->op_version());
+  std::vector<int32_t> inputs_vec{
+      get_tensor_index(node->boxes()),           get_tensor_index(node->scores()),
+      get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()),
+      get_tensor_index(node->score_threshold()), get_tensor_index(node->soft_nms_sigma()),
+  };
+  std::vector<int32_t> outputs_vec;
+
+  for (uint32_t idx = 0; idx < nms_outs.size(); ++idx)
+  {
+    // store in order of index
+    bool found = false;
+    for (auto out : nms_outs)
+    {
+      auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV5Out *>(out);
+      if (nms_out->index() == static_cast<int32_t>(idx))
+      {
+        outputs_vec.push_back(get_tensor_index(nms_out));
+        found = true;
+        break;
+      }
+    }
+    if (!found)
+    {
+      INTERNAL_EXN("Invalid NonMaxSuppressionV5 output");
+    }
+  }
+
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+  auto options = CreateNonMaxSuppressionV5Options(ctx.builder);
+  auto op_offset =
+      CreateOperator(ctx.builder, op_idx, inputs, outputs,
+                     circle::BuiltinOptions_NonMaxSuppressionV5Options, options.Union());
+  ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleReverseV2 *node)
+{
+  uint32_t op_idx =
+      ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_REVERSE_V2, node->op_version());
+  std::vector<int32_t> inputs_vec{get_tensor_index(node->tensor()), get_tensor_index(node->axis())};
+  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+  auto options = CreateReverseV2Options(ctx.builder);
+  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+                                  circle::BuiltinOptions_ReverseSequenceOptions, options.Union());
+  ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleSplit *node)
+{
+  auto split_outs = loco::succs(node);
+  assert(int32_t(split_outs.size()) == node->num_split());
+
+  uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_SPLIT, node->op_version());
+  // NOTE BuiltinOperator_SPLIT input is placed at second position
+  std::vector<int32_t> inputs_vec{get_tensor_index(node->split_dim()),
+                                  get_tensor_index(node->input())};
+  std::vector<int32_t> outputs_vec;
+
+  for (int32_t index = 0; index < node->num_split(); index++)
+  {
+    // store in order of index
+    bool found = false;
+    for (auto out : split_outs)
+    {
+      auto split_out = loco::must_cast<luci::CircleSplitOut *>(out);
+      if (split_out->index() == index)
+      {
+        outputs_vec.push_back(get_tensor_index(split_out));
+        found = true;
+        break;
+      }
+    }
+    if (!found)
+    {
+      INTERNAL_EXN("Invalid Split output");
+    }
+  }
+
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+  auto options = CreateSplitOptions(ctx.builder, node->num_split());
+  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+                                  circle::BuiltinOptions_SplitOptions, options.Union());
+  ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleSplitV *node)
+{
+  auto split_outs = loco::succs(node);
+  assert(int32_t(split_outs.size()) == node->num_split());
+
+  uint32_t op_idx =
+      ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_SPLIT_V, node->op_version());
+  std::vector<int32_t> inputs_vec{get_tensor_index(node->input()),
+                                  get_tensor_index(node->size_splits()),
+                                  get_tensor_index(node->split_dim())};
+  std::vector<int32_t> outputs_vec;
+
+  for (int32_t index = 0; index < node->num_split(); index++)
+  {
+    // store in order of index
+    bool found = false;
+    for (auto out : split_outs)
+    {
+      auto split_out = loco::must_cast<luci::CircleSplitVOut *>(out);
+      if (split_out->index() == index)
+      {
+        outputs_vec.push_back(get_tensor_index(split_out));
+        found = true;
+        break;
+      }
+    }
+    if (!found)
+    {
+      INTERNAL_EXN("Invalid SplitV output");
+    }
+  }
+
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+  auto options = CreateSplitVOptions(ctx.builder, node->num_split());
+  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+                                  circle::BuiltinOptions_SplitVOptions, options.Union());
+  ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleTopKV2 *node)
+{
+  auto topkv2_outs = loco::succs(node);
+  int outs_count = int32_t(topkv2_outs.size());
+  assert(outs_count == 2);
+
+  uint32_t op_idx =
+      ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_TOPK_V2, node->op_version());
+  std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), get_tensor_index(node->k())};
+  std::vector<int32_t> outputs_vec;
+
+  for (int32_t index = 0; index < outs_count; index++)
+  {
+    // store in order of index
+    bool found = false;
+    for (auto out : topkv2_outs)
+    {
+      auto topkv2_out = loco::must_cast<luci::CircleTopKV2Out *>(out);
+      if (topkv2_out->index() == index)
+      {
+        outputs_vec.push_back(get_tensor_index(topkv2_out));
+        found = true;
+        break;
+      }
+    }
+    if (!found)
+    {
+      INTERNAL_EXN("Invalid TopKV2 output");
+    }
+  }
+
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+  auto options = CreateTopKV2Options(ctx.builder);
+  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+                                  circle::BuiltinOptions_TopKV2Options, options.Union());
+  ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleUnique *node)
+{
+  auto unique_outs = loco::succs(node);
+  assert(int32_t(unique_outs.size()) == 2);
+  uint32_t op_idx =
+      ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_UNIQUE, node->op_version());
+
+  std::vector<int32_t> inputs_vec{get_tensor_index(node->input())};
+  std::vector<int32_t> outputs_vec;
+
+  for (int32_t index = 0; index < 2; index++)
+  {
+    // store in order of index
+    bool found = false;
+    for (auto out : unique_outs)
+    {
+      auto unique_out = loco::must_cast<luci::CircleUniqueOut *>(out);
+      if (unique_out->index() == index)
+      {
+        outputs_vec.push_back(get_tensor_index(unique_out));
+        found = true;
+        break;
+      }
+    }
+    if (!found)
+    {
+      INTERNAL_EXN("Invalid Unique output");
+    }
+  }
+
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+  auto options = CreateUniqueOptions(ctx.builder, to_circle_tensortype(node->idx_out_type()));
+  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+                                  circle::BuiltinOptions_UniqueOptions, options.Union());
+  ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleUnpack *node)
+{
+  LOGGER(l);
+  auto settings = luci::UserSettings::settings();
+
+  auto unpack_outs = loco::succs(node);
+  // NOTE real models may not use all of the outputs
+  if (static_cast<int32_t>(unpack_outs.size()) != node->num())
+  {
+    if (settings->get(luci::UserSettings::Key::DisableValidation))
+    {
+      WARN(l) << "Warning: export Unpack(" << node->name() << ") 'num' not same as outputs";
+    }
+    else
+      assert(false);
+  }
+
+  uint32_t op_idx =
+      ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_UNPACK, node->op_version());
+  std::vector<int32_t> inputs_vec{get_tensor_index(node->value())};
+  std::vector<int32_t> outputs_vec;
+
+  for (int32_t index = 0; index < node->num(); index++)
+  {
+    // store in order of index
+    bool found = false;
+    for (auto out : unpack_outs)
+    {
+      auto unpack_out = loco::must_cast<luci::CircleUnpackOut *>(out);
+      if (unpack_out->index() == index)
+      {
+        outputs_vec.push_back(get_tensor_index(unpack_out));
+        found = true;
+        break;
+      }
+    }
+    // NOTE real models may not use all of the outputs
+    if (!found)
+    {
+      if (settings->get(luci::UserSettings::Key::DisableValidation))
+      {
+        WARN(l) << "Warning: export Unpack(" << node->name() << ") output " << index << " not used";
+      }
+      else
+        assert(false);
+    }
+  }
+
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+  auto options = CreateUnpackOptions(ctx.builder, node->num(), node->axis());
+  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+                                  circle::BuiltinOptions_UnpackOptions, options.Union());
+  ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleWhile *node)
+{
+  auto while_outs = loco::succs(node);
+  assert(while_outs.size() == node->output_count());
+
+  uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_WHILE, node->op_version());
+  std::vector<int32_t> inputs_vec;
+  std::vector<int32_t> outputs_vec;
+
+  for (uint32_t idx = 0; idx < node->input_count(); ++idx)
+    inputs_vec.push_back(get_tensor_index(node->input(idx)));
+
+  for (uint32_t idx = 0; idx < node->output_count(); ++idx)
+  {
+    // store in order of index
+    bool found = false;
+    for (auto out : while_outs)
+    {
+      auto while_out = loco::must_cast<luci::CircleWhileOut *>(out);
+      if (while_out->index() == static_cast<int32_t>(idx))
+      {
+        outputs_vec.push_back(get_tensor_index(while_out));
+        found = true;
+        break;
+      }
+    }
+    if (!found)
+    {
+      INTERNAL_EXN("Invalid CircleWhile output");
+    }
+  }
+
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+  auto options = CreateWhileOptions(ctx.builder, node->cond_branch(), node->body_branch());
+  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+                                  circle::BuiltinOptions_WhileOptions, options.Union());
+  ctx.gd._operators.push_back(op_offset);
+}
+
  class OperationExporter final : public luci::CircleNodeMutableVisitor<void>,
                                  public loco::CanonicalNodeMutableVisitor<void>
  {
  public:
-  OperationExporter(FlatBufferBuilder &fbb, SerializedModelData &m, SerializedGraphData &g)
-      : builder{fbb}, md{m}, gd{g}
+  OperationExporter(ExportContext &ctx) : _ctx{ctx}
    {
      // DO NOTHING
    }
@@ -103,10 +669,12 @@ public:
    void visit(luci::CircleMul *) final;
    void visit(luci::CircleNeg *) final;
    void visit(luci::CircleNonMaxSuppressionV4 *) final;
+  void visit(luci::CircleNonMaxSuppressionV5 *) final;
    void visit(luci::CircleNotEqual *) final;
    void visit(luci::CircleOneHot *) final;
    void visit(luci::CirclePack *) final;
    void visit(luci::CirclePad *) final;
+  void visit(luci::CirclePadV2 *) final;
    void visit(luci::CirclePow *) final;
    void visit(luci::CirclePRelu *) final;
    void visit(luci::CircleRange *) final;
@@ -168,6 +736,7 @@ public:
    void visit(luci::CircleCustomOut *) final {}
    void visit(luci::CircleIfOut *) final {}
    void visit(luci::CircleNonMaxSuppressionV4Out *) final {}
+  void visit(luci::CircleNonMaxSuppressionV5Out *) final {}
    void visit(luci::CircleSplitOut *) final {}
    void visit(luci::CircleSplitVOut *) final {}
    void visit(luci::CircleTopKV2Out *) final {}
@@ -177,14 +746,6 @@ public:
  
  private:
    /**
-   * @brief Exports CircleMaxPool2D or CircleAveragePool2D
-   *
-   * @note  CirclePool2D should be one of CircleMaxPool2D or CircleAveragePool2D
-   */
-  template <class CirclePool2D>
-  void export_pool_2d(CirclePool2D *node, circle::BuiltinOperator builtin_op);
-
-  /**
     * @brief export simple nodes
     */
    void export_simple(loco::Node *node, circle::BuiltinOperator bop, circle::BuiltinOptions bot,
@@ -196,179 +757,83 @@ private:
    void export_simple(loco::Node *node, circle::BuiltinOperator bop);
  
  private:
-  FlatBufferBuilder &builder;
-  SerializedModelData &md;
-  SerializedGraphData &gd;
+  ExportContext &_ctx;
  };
  
-template <class CirclePool2D>
-void OperationExporter::export_pool_2d(CirclePool2D *node, circle::BuiltinOperator builtin_op)
-{
-  LUCI_ASSERT(builtin_op == circle::BuiltinOperator_MAX_POOL_2D ||
-                  builtin_op == circle::BuiltinOperator_L2_POOL_2D ||
-                  builtin_op == circle::BuiltinOperator_AVERAGE_POOL_2D,
-              "Should be L2Pool, MaxPool or AvgPool");
-  LUCI_ASSERT(node->padding() != luci::Padding::UNDEFINED, "Padding is not set");
-
-  uint32_t op_idx = md.registerBuiltinOpcode(builtin_op, node->op_version());
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->value())};
-  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
-  auto inputs = builder.CreateVector(inputs_vec);
-  auto outputs = builder.CreateVector(outputs_vec);
-
-  circle::Padding padding = getOpPadding(node->padding());
-
-  auto options = CreatePool2DOptions(builder, padding, node->stride()->w(), node->stride()->h(),
-                                     node->filter()->w(), node->filter()->h(),
-                                     to_circle_actfunc(node->fusedActivationFunction()));
-  auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_Pool2DOptions, options.Union());
-  gd._operators.push_back(op_offset);
-}
-
  void OperationExporter::export_simple(loco::Node *node, circle::BuiltinOperator bop,
                                        circle::BuiltinOptions bot,
                                        flatbuffers::Offset<void> options_offset)
  {
-  uint32_t op_idx =
-      md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version());
-  std::vector<int32_t> inputs_vec;
-  std::vector<int32_t> outputs_vec{get_tensor_index(node)};
-  for (uint32_t i = 0; i < node->arity(); ++i)
-    inputs_vec.push_back(get_tensor_index(node->arg(i)));
-  auto inputs = builder.CreateVector(inputs_vec);
-  auto outputs = builder.CreateVector(outputs_vec);
-  auto op_offset = CreateOperator(builder, op_idx, inputs, outputs, bot, options_offset);
-  gd._operators.push_back(op_offset);
+  export_node(_ctx, node, bop, bot, options_offset);
  }
  
  void OperationExporter::export_simple(loco::Node *node, circle::BuiltinOperator bop)
  {
-  uint32_t op_idx =
-      md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version());
-  std::vector<int32_t> inputs_vec;
-  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
-  for (uint32_t i = 0; i < node->arity(); ++i)
-    inputs_vec.push_back(get_tensor_index(node->arg(i)));
-  auto inputs = builder.CreateVector(inputs_vec);
-  auto outputs = builder.CreateVector(outputs_vec);
-  auto op_offset = CreateOperator(builder, op_idx, inputs, outputs);
-  gd._operators.push_back(op_offset);
+  export_node(_ctx, node, bop);
  }
  
  void OperationExporter::visit(luci::CircleAbs *node)
  {
    export_simple(node, circle::BuiltinOperator_ABS, circle::BuiltinOptions_AbsOptions,
-                CreateAbsOptions(builder).Union());
+                CreateAbsOptions(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CircleAdd *node)
  {
    export_simple(
        node, circle::BuiltinOperator_ADD, circle::BuiltinOptions_AddOptions,
-      CreateAddOptions(builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
+      CreateAddOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
  }
  
-void OperationExporter::visit(luci::CircleAddN *node)
-{
-  uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_ADD_N, node->op_version());
-  std::vector<int32_t> inputs_vec;
-  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
-
-  for (uint32_t i = 0; i < node->arity(); ++i)
-    inputs_vec.push_back(get_tensor_index(node->inputs(i)));
-
-  auto inputs = builder.CreateVector(inputs_vec);
-  auto outputs = builder.CreateVector(outputs_vec);
-  auto options = CreateAddNOptions(builder);
-  auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_AddNOptions, options.Union());
-  gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleAddN *node) { export_node(_ctx, node); }
  
  void OperationExporter::visit(luci::CircleArgMax *node)
  {
-  export_simple(node, circle::BuiltinOperator_ARG_MAX, circle::BuiltinOptions_ArgMaxOptions,
-                CreateArgMaxOptions(builder, to_circle_tensortype(node->output_type())).Union());
+  export_simple(
+      node, circle::BuiltinOperator_ARG_MAX, circle::BuiltinOptions_ArgMaxOptions,
+      CreateArgMaxOptions(_ctx.builder, to_circle_tensortype(node->output_type())).Union());
  }
  
  void OperationExporter::visit(luci::CircleArgMin *node)
  {
-  export_simple(node, circle::BuiltinOperator_ARG_MIN, circle::BuiltinOptions_ArgMinOptions,
-                CreateArgMinOptions(builder, to_circle_tensortype(node->output_type())).Union());
+  export_simple(
+      node, circle::BuiltinOperator_ARG_MIN, circle::BuiltinOptions_ArgMinOptions,
+      CreateArgMinOptions(_ctx.builder, to_circle_tensortype(node->output_type())).Union());
  }
  
  void OperationExporter::visit(luci::CircleAveragePool2D *node)
  {
-  export_pool_2d<luci::CircleAveragePool2D>(node, circle::BuiltinOperator_AVERAGE_POOL_2D);
+  export_pool_2d<luci::CircleAveragePool2D>(_ctx, node, circle::BuiltinOperator_AVERAGE_POOL_2D);
  }
  
  void OperationExporter::visit(luci::CircleBatchMatMul *node)
  {
    export_simple(node, circle::BuiltinOperator_BATCH_MATMUL,
                  circle::BuiltinOptions_BatchMatMulOptions,
-                CreateBatchMatMulOptions(builder, node->adj_x(), node->adj_y()).Union());
+                CreateBatchMatMulOptions(_ctx.builder, node->adj_x(), node->adj_y()).Union());
  }
  
-void OperationExporter::visit(luci::CircleCast *node)
-{
-  uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_CAST, node->op_version());
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->x())};
-  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
-  auto inputs = builder.CreateVector(inputs_vec);
-  auto outputs = builder.CreateVector(outputs_vec);
-
-  flatbuffers::Offset<Operator> op_offset;
-  if (node->out_data_type() != loco::DataType::Unknown)
-  {
-    auto options = CreateCastOptions(builder, to_circle_tensortype(node->in_data_type()),
-                                     to_circle_tensortype(node->out_data_type()));
-    op_offset = CreateOperator(builder, op_idx, inputs, outputs, circle::BuiltinOptions_CastOptions,
-                               options.Union());
-  }
-  else
-  {
-    op_offset = CreateOperator(builder, op_idx, inputs, outputs);
-  }
-  gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleCast *node) { export_node(_ctx, node); }
  
  void OperationExporter::visit(luci::CircleCeil *node)
  {
    export_simple(node, circle::BuiltinOperator_CEIL);
  }
  
-void OperationExporter::visit(luci::CircleConcatenation *node)
-{
-  uint32_t op_idx =
-      md.registerBuiltinOpcode(circle::BuiltinOperator_CONCATENATION, node->op_version());
-  std::vector<int32_t> inputs_vec;
-  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
-
-  for (uint32_t i = 0; i < node->numValues(); ++i)
-    inputs_vec.push_back(get_tensor_index(node->values(i)));
-
-  auto inputs = builder.CreateVector(inputs_vec);
-  auto outputs = builder.CreateVector(outputs_vec);
-  auto options = CreateConcatenationOptions(builder, node->axis(),
-                                            to_circle_actfunc(node->fusedActivationFunction()));
-  auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_ConcatenationOptions, options.Union());
-  gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleConcatenation *node) { export_node(_ctx, node); }
  
  void OperationExporter::visit(luci::CircleBatchToSpaceND *node)
  {
    export_simple(node, circle::BuiltinOperator_BATCH_TO_SPACE_ND,
                  circle::BuiltinOptions_BatchToSpaceNDOptions,
-                CreateBatchToSpaceNDOptions(builder).Union());
+                CreateBatchToSpaceNDOptions(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CircleConv2D *node)
  {
    export_simple(node, circle::BuiltinOperator_CONV_2D, circle::BuiltinOptions_Conv2DOptions,
-                CreateConv2DOptions(builder, getOpPadding(node->padding()), node->stride()->w(),
-                                    node->stride()->h(),
+                CreateConv2DOptions(_ctx.builder, getOpPadding(node->padding()),
+                                    node->stride()->w(), node->stride()->h(),
                                      to_circle_actfunc(node->fusedActivationFunction()),
                                      node->dilation()->w(), node->dilation()->h())
                      .Union());
@@ -377,64 +842,23 @@ void OperationExporter::visit(luci::CircleConv2D *node)
  void OperationExporter::visit(luci::CircleCos *node)
  {
    export_simple(node, circle::BuiltinOperator_COS, circle::BuiltinOptions_CosOptions,
-                CreateCosOptions(builder).Union());
+                CreateCosOptions(_ctx.builder).Union());
  }
  
-void OperationExporter::visit(luci::CircleCustom *node)
-{
-  auto custom_outputs = loco::succs(node);
-
-  uint32_t op_idx = md.registerCustomOpcode(node->custom_code());
-  std::vector<int32_t> inputs_vec;
-  std::vector<int32_t> outputs_vec;
-
-  for (uint32_t index = 0; index < node->numInputs(); index++)
-  {
-    inputs_vec.push_back(get_tensor_index(node->inputs(index)));
-  }
-  for (uint32_t index = 0; index < custom_outputs.size(); index++)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : custom_outputs)
-    {
-      auto custom_out = loco::must_cast<luci::CircleCustomOut *>(out);
-      if (custom_out->index() == static_cast<int32_t>(index))
-      {
-        outputs_vec.push_back(get_tensor_index(custom_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid Custom output");
-    }
-  }
-
-  auto inputs = builder.CreateVector(inputs_vec);
-  auto outputs = builder.CreateVector(outputs_vec);
-  flatbuffers::Offset<flatbuffers::Vector<uint8_t>> circle_custom_options;
-  std::vector<uint8_t> custom_options_vec{node->custom_options().begin(),
-                                          node->custom_options().end()};
-  circle_custom_options = builder.CreateVector(custom_options_vec);
-  auto op_offset = CreateOperator(builder, op_idx, inputs, outputs, circle::BuiltinOptions_NONE,
-                                  flatbuffers::Offset<void>(), circle_custom_options);
-  gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleCustom *node) { export_node(_ctx, node); }
  
  void OperationExporter::visit(luci::CircleDepthToSpace *node)
  {
    export_simple(node, circle::BuiltinOperator_DEPTH_TO_SPACE,
                  circle::BuiltinOptions_DepthToSpaceOptions,
-                CreateDepthToSpaceOptions(builder, node->block_size()).Union());
+                CreateDepthToSpaceOptions(_ctx.builder, node->block_size()).Union());
  }
  
  void OperationExporter::visit(luci::CircleDepthwiseConv2D *node)
  {
    export_simple(node, circle::BuiltinOperator_DEPTHWISE_CONV_2D,
                  circle::BuiltinOptions_DepthwiseConv2DOptions,
-                CreateDepthwiseConv2DOptions(builder, getOpPadding(node->padding()),
+                CreateDepthwiseConv2DOptions(_ctx.builder, getOpPadding(node->padding()),
                                               node->stride()->w(), node->stride()->h(),
                                               node->depthMultiplier(),
                                               to_circle_actfunc(node->fusedActivationFunction()),
@@ -446,7 +870,7 @@ void OperationExporter::visit(luci::CircleDiv *node)
  {
    export_simple(
        node, circle::BuiltinOperator_DIV, circle::BuiltinOptions_DivOptions,
-      CreateDivOptions(builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
+      CreateDivOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
  }
  
  void OperationExporter::visit(luci::CircleElu *node)
@@ -457,25 +881,25 @@ void OperationExporter::visit(luci::CircleElu *node)
  void OperationExporter::visit(luci::CircleEqual *node)
  {
    export_simple(node, circle::BuiltinOperator_EQUAL, circle::BuiltinOptions_EqualOptions,
-                CreateEqualOptions(builder).Union());
+                CreateEqualOptions(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CircleExp *node)
  {
    export_simple(node, circle::BuiltinOperator_EXP, circle::BuiltinOptions_ExpOptions,
-                CreateExpOptions(builder).Union());
+                CreateExpOptions(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CircleExpandDims *node)
  {
    export_simple(node, circle::BuiltinOperator_EXPAND_DIMS, circle::BuiltinOptions_ExpandDimsOptions,
-                CreateExpandDimsOptions(builder).Union());
+                CreateExpandDimsOptions(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CircleFill *node)
  {
    export_simple(node, circle::BuiltinOperator_FILL, circle::BuiltinOptions_FillOptions,
-                CreateFillOptions(builder).Union());
+                CreateFillOptions(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CircleFloor *node)
@@ -486,124 +910,86 @@ void OperationExporter::visit(luci::CircleFloor *node)
  void OperationExporter::visit(luci::CircleFloorDiv *node)
  {
    export_simple(node, circle::BuiltinOperator_FLOOR_DIV, circle::BuiltinOptions_FloorDivOptions,
-                CreateFloorDivOptions(builder).Union());
+                CreateFloorDivOptions(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CircleFloorMod *node)
  {
    export_simple(node, circle::BuiltinOperator_FLOOR_MOD, circle::BuiltinOptions_FloorModOptions,
-                CreateFloorModOptions(builder).Union());
+                CreateFloorModOptions(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CircleFullyConnected *node)
  {
    export_simple(
        node, circle::BuiltinOperator_FULLY_CONNECTED, circle::BuiltinOptions_FullyConnectedOptions,
-      CreateFullyConnectedOptions(builder, to_circle_actfunc(node->fusedActivationFunction()))
+      CreateFullyConnectedOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction()))
            .Union());
  }
  
  void OperationExporter::visit(luci::CircleGather *node)
  {
    export_simple(node, circle::BuiltinOperator_GATHER, circle::BuiltinOptions_GatherOptions,
-                CreateGatherOptions(builder, node->axis()).Union());
+                CreateGatherOptions(_ctx.builder, node->axis()).Union());
  }
  
  void OperationExporter::visit(luci::CircleGatherNd *node)
  {
    export_simple(node, circle::BuiltinOperator_GATHER_ND, circle::BuiltinOptions_GatherNdOptions,
-                CreateGatherNdOptions(builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleGreater *node)
-{
-  export_simple(node, circle::BuiltinOperator_GREATER, circle::BuiltinOptions_GreaterOptions,
-                CreateGreaterOptions(builder).Union());
-}
-
-void OperationExporter::visit(luci::CircleGreaterEqual *node)
-{
-  export_simple(node, circle::BuiltinOperator_GREATER_EQUAL,
-                circle::BuiltinOptions_GreaterEqualOptions,
-                CreateGreaterEqualOptions(builder).Union());
+                CreateGatherNdOptions(_ctx.builder).Union());
  }
  
-void OperationExporter::visit(luci::CircleIf *node)
-{
-  auto if_outs = loco::succs(node);
-  assert(if_outs.size() == node->output_count());
-
-  uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_IF, node->op_version());
-  std::vector<int32_t> inputs_vec;
-  std::vector<int32_t> outputs_vec;
-
-  inputs_vec.push_back(get_tensor_index(node->cond()));
-  for (uint32_t idx = 0; idx < node->input_count(); ++idx)
-    inputs_vec.push_back(get_tensor_index(node->input(idx)));
-
-  for (uint32_t idx = 0; idx < node->output_count(); ++idx)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : if_outs)
-    {
-      auto if_out = loco::must_cast<luci::CircleIfOut *>(out);
-      if (if_out->index() == static_cast<int32_t>(idx))
-      {
-        outputs_vec.push_back(get_tensor_index(if_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid CircleIf output");
-    }
-  }
+void OperationExporter::visit(luci::CircleGreater *node)
+{
+  export_simple(node, circle::BuiltinOperator_GREATER, circle::BuiltinOptions_GreaterOptions,
+                CreateGreaterOptions(_ctx.builder).Union());
+}
  
-  auto inputs = builder.CreateVector(inputs_vec);
-  auto outputs = builder.CreateVector(outputs_vec);
-  auto options = CreateIfOptions(builder, node->then_branch(), node->else_branch());
-  auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_IfOptions, options.Union());
-  gd._operators.push_back(op_offset);
+void OperationExporter::visit(luci::CircleGreaterEqual *node)
+{
+  export_simple(node, circle::BuiltinOperator_GREATER_EQUAL,
+                circle::BuiltinOptions_GreaterEqualOptions,
+                CreateGreaterEqualOptions(_ctx.builder).Union());
  }
  
+void OperationExporter::visit(luci::CircleIf *node) { export_node(_ctx, node); }
+
  void OperationExporter::visit(luci::CircleL2Normalize *node)
  {
    export_simple(
        node, circle::BuiltinOperator_L2_NORMALIZATION, circle::BuiltinOptions_L2NormOptions,
-      CreateL2NormOptions(builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
+      CreateL2NormOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction()))
+          .Union());
  }
  
  void OperationExporter::visit(luci::CircleL2Pool2D *node)
  {
-  export_pool_2d<luci::CircleL2Pool2D>(node, circle::BuiltinOperator_L2_POOL_2D);
+  export_pool_2d<luci::CircleL2Pool2D>(_ctx, node, circle::BuiltinOperator_L2_POOL_2D);
  }
  
  void OperationExporter::visit(luci::CircleLeakyRelu *node)
  {
    export_simple(node, circle::BuiltinOperator_LEAKY_RELU, circle::BuiltinOptions_LeakyReluOptions,
-                CreateLeakyReluOptions(builder, node->alpha()).Union());
+                CreateLeakyReluOptions(_ctx.builder, node->alpha()).Union());
  }
  
  void OperationExporter::visit(luci::CircleLess *node)
  {
    export_simple(node, circle::BuiltinOperator_LESS, circle::BuiltinOptions_LessOptions,
-                CreateLessOptions(builder).Union());
+                CreateLessOptions(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CircleLessEqual *node)
  {
    export_simple(node, circle::BuiltinOperator_LESS_EQUAL, circle::BuiltinOptions_LessEqualOptions,
-                CreateLessEqualOptions(builder).Union());
+                CreateLessEqualOptions(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CircleLocalResponseNormalization *node)
  {
    export_simple(node, circle::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION,
                  circle::BuiltinOptions_LocalResponseNormalizationOptions,
-                CreateLocalResponseNormalizationOptions(builder, node->radius(), node->bias(),
+                CreateLocalResponseNormalizationOptions(_ctx.builder, node->radius(), node->bias(),
                                                          node->alpha(), node->beta())
                      .Union());
  }
@@ -616,19 +1002,19 @@ void OperationExporter::visit(luci::CircleLog *node)
  void OperationExporter::visit(luci::CircleLogicalAnd *node)
  {
    export_simple(node, circle::BuiltinOperator_LOGICAL_AND, circle::BuiltinOptions_LogicalAndOptions,
-                CreateLogicalAndOptions(builder).Union());
+                CreateLogicalAndOptions(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CircleLogicalNot *node)
  {
    export_simple(node, circle::BuiltinOperator_LOGICAL_NOT, circle::BuiltinOptions_LogicalNotOptions,
-                CreateLogicalNotOptions(builder).Union());
+                CreateLogicalNotOptions(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CircleLogicalOr *node)
  {
    export_simple(node, circle::BuiltinOperator_LOGICAL_OR, circle::BuiltinOptions_LogicalOrOptions,
-                CreateLogicalOrOptions(builder).Union());
+                CreateLogicalOrOptions(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CircleLogistic *node)
@@ -639,135 +1025,103 @@ void OperationExporter::visit(luci::CircleLogistic *node)
  void OperationExporter::visit(luci::CircleLogSoftmax *node)
  {
    export_simple(node, circle::BuiltinOperator_LOG_SOFTMAX, circle::BuiltinOptions_LogSoftmaxOptions,
-                CreateLogSoftmaxOptions(builder).Union());
+                CreateLogSoftmaxOptions(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CircleMatrixDiag *node)
  {
    export_simple(node, circle::BuiltinOperator_MATRIX_DIAG, circle::BuiltinOptions_MatrixDiagOptions,
-                CreateMatrixDiagOptions(builder).Union());
+                CreateMatrixDiagOptions(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CircleMatrixSetDiag *node)
  {
    export_simple(node, circle::BuiltinOperator_MATRIX_SET_DIAG,
                  circle::BuiltinOptions_MatrixSetDiagOptions,
-                CreateMatrixSetDiagOptions(builder).Union());
+                CreateMatrixSetDiagOptions(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CircleMaximum *node)
  {
    export_simple(node, circle::BuiltinOperator_MAXIMUM, circle::BuiltinOptions_MaximumMinimumOptions,
-                CreateMaximumMinimumOptions(builder).Union());
+                CreateMaximumMinimumOptions(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CircleMaxPool2D *node)
  {
-  export_pool_2d<luci::CircleMaxPool2D>(node, circle::BuiltinOperator_MAX_POOL_2D);
+  export_pool_2d<luci::CircleMaxPool2D>(_ctx, node, circle::BuiltinOperator_MAX_POOL_2D);
  }
  
  void OperationExporter::visit(luci::CircleMean *node)
  {
    export_simple(node, circle::BuiltinOperator_MEAN, circle::BuiltinOptions_ReducerOptions,
-                CreateReducerOptions(builder, node->keep_dims()).Union());
+                CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
  }
  
  void OperationExporter::visit(luci::CircleMinimum *node)
  {
    export_simple(node, circle::BuiltinOperator_MINIMUM, circle::BuiltinOptions_MaximumMinimumOptions,
-                CreateMaximumMinimumOptions(builder).Union());
+                CreateMaximumMinimumOptions(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CircleMirrorPad *node)
  {
-  export_simple(node, circle::BuiltinOperator_MIRROR_PAD, circle::BuiltinOptions_MirrorPadOptions,
-                CreateMirrorPadOptions(builder, to_circle_mirrorpadmode(node->mode())).Union());
+  export_simple(
+      node, circle::BuiltinOperator_MIRROR_PAD, circle::BuiltinOptions_MirrorPadOptions,
+      CreateMirrorPadOptions(_ctx.builder, to_circle_mirrorpadmode(node->mode())).Union());
  }
  
  void OperationExporter::visit(luci::CircleMul *node)
  {
    export_simple(
        node, circle::BuiltinOperator_MUL, circle::BuiltinOptions_MulOptions,
-      CreateMulOptions(builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
+      CreateMulOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
  }
  
  void OperationExporter::visit(luci::CircleNeg *node)
  {
    export_simple(node, circle::BuiltinOperator_NEG, circle::BuiltinOptions_NegOptions,
-                CreateNegOptions(builder).Union());
+                CreateNegOptions(_ctx.builder).Union());
  }
  
-void OperationExporter::visit(luci::CircleNonMaxSuppressionV4 *node)
-{
-  auto nms_outs = loco::succs(node);
-  assert(nms_outs.size() == 2);
-
-  uint32_t op_idx =
-      md.registerBuiltinOpcode(circle::BuiltinOperator_NON_MAX_SUPPRESSION_V4, node->op_version());
-  std::vector<int32_t> inputs_vec{
-      get_tensor_index(node->boxes()),           get_tensor_index(node->scores()),
-      get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()),
-      get_tensor_index(node->score_threshold()),
-  };
-  std::vector<int32_t> outputs_vec;
-
-  for (uint32_t idx = 0; idx < nms_outs.size(); ++idx)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : nms_outs)
-    {
-      auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV4Out *>(out);
-      if (nms_out->index() == static_cast<int32_t>(idx))
-      {
-        outputs_vec.push_back(get_tensor_index(nms_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid NonMaxSuppressionV4 output");
-    }
-  }
+void OperationExporter::visit(luci::CircleNonMaxSuppressionV4 *node) { export_node(_ctx, node); }
  
-  auto inputs = builder.CreateVector(inputs_vec);
-  auto outputs = builder.CreateVector(outputs_vec);
-  auto options = CreateNonMaxSuppressionV4Options(builder);
-  auto op_offset =
-      CreateOperator(builder, op_idx, inputs, outputs,
-                     circle::BuiltinOptions_NonMaxSuppressionV4Options, options.Union());
-  gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleNonMaxSuppressionV5 *node) { export_node(_ctx, node); }
  
  void OperationExporter::visit(luci::CircleNotEqual *node)
  {
    export_simple(node, circle::BuiltinOperator_NOT_EQUAL, circle::BuiltinOptions_NotEqualOptions,
-                CreateNotEqualOptions(builder).Union());
+                CreateNotEqualOptions(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CircleOneHot *node)
  {
    export_simple(node, circle::BuiltinOperator_ONE_HOT, circle::BuiltinOptions_OneHotOptions,
-                CreateOneHotOptions(builder, node->axis()).Union());
+                CreateOneHotOptions(_ctx.builder, node->axis()).Union());
  }
  
  void OperationExporter::visit(luci::CirclePack *node)
  {
    export_simple(node, circle::BuiltinOperator_PACK, circle::BuiltinOptions_PackOptions,
-                CreatePackOptions(builder, node->values_count(), node->axis()).Union());
+                CreatePackOptions(_ctx.builder, node->values_count(), node->axis()).Union());
  }
  
  void OperationExporter::visit(luci::CirclePad *node)
  {
    export_simple(node, circle::BuiltinOperator_PAD, circle::BuiltinOptions_PadOptions,
-                CreatePadOptions(builder).Union());
+                CreatePadOptions(_ctx.builder).Union());
+}
+
+void OperationExporter::visit(luci::CirclePadV2 *node)
+{
+  export_simple(node, circle::BuiltinOperator_PADV2, circle::BuiltinOptions_PadV2Options,
+                CreatePadV2Options(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CirclePow *node)
  {
    export_simple(node, circle::BuiltinOperator_POW, circle::BuiltinOptions_PowOptions,
-                CreatePowOptions(builder).Union());
+                CreatePowOptions(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CirclePRelu *node)
@@ -778,37 +1132,37 @@ void OperationExporter::visit(luci::CirclePRelu *node)
  void OperationExporter::visit(luci::CircleRange *node)
  {
    export_simple(node, circle::BuiltinOperator_RANGE, circle::BuiltinOptions_RangeOptions,
-                CreateRangeOptions(builder).Union());
+                CreateRangeOptions(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CircleRank *node)
  {
    export_simple(node, circle::BuiltinOperator_RANK, circle::BuiltinOptions_RankOptions,
-                CreateRankOptions(builder).Union());
+                CreateRankOptions(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CircleReduceAny *node)
  {
    export_simple(node, circle::BuiltinOperator_REDUCE_ANY, circle::BuiltinOptions_ReducerOptions,
-                CreateReducerOptions(builder, node->keep_dims()).Union());
+                CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
  }
  
  void OperationExporter::visit(luci::CircleReduceMax *node)
  {
    export_simple(node, circle::BuiltinOperator_REDUCE_MAX, circle::BuiltinOptions_ReducerOptions,
-                CreateReducerOptions(builder, node->keep_dims()).Union());
+                CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
  }
  
  void OperationExporter::visit(luci::CircleReduceMin *node)
  {
    export_simple(node, circle::BuiltinOperator_REDUCE_MIN, circle::BuiltinOptions_ReducerOptions,
-                CreateReducerOptions(builder, node->keep_dims()).Union());
+                CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
  }
  
  void OperationExporter::visit(luci::CircleReduceProd *node)
  {
    export_simple(node, circle::BuiltinOperator_REDUCE_PROD, circle::BuiltinOptions_ReducerOptions,
-                CreateReducerOptions(builder, node->keep_dims()).Union());
+                CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
  }
  
  void OperationExporter::visit(luci::CircleRelu *node)
@@ -828,18 +1182,18 @@ void OperationExporter::visit(luci::CircleReluN1To1 *node)
  
  void OperationExporter::visit(luci::CircleReshape *node)
  {
-  auto new_shape = builder.CreateVector<int32_t>(
+  auto new_shape = _ctx.builder.CreateVector<int32_t>(
        node->newShape()->rank(), [node](size_t i) { return node->newShape()->dim(i); });
  
    export_simple(node, circle::BuiltinOperator_RESHAPE, circle::BuiltinOptions_ReshapeOptions,
-                CreateReshapeOptions(builder, new_shape).Union());
+                CreateReshapeOptions(_ctx.builder, new_shape).Union());
  }
  
  void OperationExporter::visit(luci::CircleResizeBilinear *node)
  {
    export_simple(
        node, circle::BuiltinOperator_RESIZE_BILINEAR, circle::BuiltinOptions_ResizeBilinearOptions,
-      CreateResizeBilinearOptions(builder, node->align_corners(), node->half_pixel_centers())
+      CreateResizeBilinearOptions(_ctx.builder, node->align_corners(), node->half_pixel_centers())
            .Union());
  }
  
@@ -847,29 +1201,17 @@ void OperationExporter::visit(luci::CircleResizeNearestNeighbor *node)
  {
    export_simple(node, circle::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
                  circle::BuiltinOptions_ResizeNearestNeighborOptions,
-                CreateResizeNearestNeighborOptions(builder, node->align_corners()).Union());
+                CreateResizeNearestNeighborOptions(_ctx.builder, node->align_corners()).Union());
  }
  
  void OperationExporter::visit(luci::CircleReverseSequence *node)
  {
    export_simple(
        node, circle::BuiltinOperator_REVERSE_SEQUENCE, circle::BuiltinOptions_ReverseSequenceOptions,
-      CreateReverseSequenceOptions(builder, node->seq_axis(), node->batch_axis()).Union());
+      CreateReverseSequenceOptions(_ctx.builder, node->seq_axis(), node->batch_axis()).Union());
  }
  
-void OperationExporter::visit(luci::CircleReverseV2 *node)
-{
-  uint32_t op_idx =
-      md.registerBuiltinOpcode(circle::BuiltinOperator_REVERSE_V2, node->op_version());
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->tensor()), get_tensor_index(node->axis())};
-  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
-  auto inputs = builder.CreateVector(inputs_vec);
-  auto outputs = builder.CreateVector(outputs_vec);
-  auto options = CreateReverseV2Options(builder);
-  auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_ReverseSequenceOptions, options.Union());
-  gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleReverseV2 *node) { export_node(_ctx, node); }
  
  void OperationExporter::visit(luci::CircleRound *node)
  {
@@ -884,31 +1226,31 @@ void OperationExporter::visit(luci::CircleRsqrt *node)
  void OperationExporter::visit(luci::CircleScatterNd *node)
  {
    export_simple(node, circle::BuiltinOperator_SCATTER_ND, circle::BuiltinOptions_ScatterNdOptions,
-                CreateScatterNdOptions(builder).Union());
+                CreateScatterNdOptions(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CircleSegmentSum *node)
  {
    export_simple(node, circle::BuiltinOperator_SEGMENT_SUM, circle::BuiltinOptions_SegmentSumOptions,
-                CreateSegmentSumOptions(builder).Union());
+                CreateSegmentSumOptions(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CircleSelect *node)
  {
    export_simple(node, circle::BuiltinOperator_SELECT, circle::BuiltinOptions_SelectOptions,
-                CreateSelectOptions(builder).Union());
+                CreateSelectOptions(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CircleSelectV2 *node)
  {
    export_simple(node, circle::BuiltinOperator_SELECT_V2, circle::BuiltinOptions_SelectV2Options,
-                CreateSelectV2Options(builder).Union());
+                CreateSelectV2Options(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CircleShape *node)
  {
    export_simple(node, circle::BuiltinOperator_SHAPE, circle::BuiltinOptions_ShapeOptions,
-                CreateShapeOptions(builder, to_circle_tensortype(node->out_type())).Union());
+                CreateShapeOptions(_ctx.builder, to_circle_tensortype(node->out_type())).Union());
  }
  
  void OperationExporter::visit(luci::CircleSin *node)
@@ -919,113 +1261,39 @@ void OperationExporter::visit(luci::CircleSin *node)
  void OperationExporter::visit(luci::CircleSlice *node)
  {
    export_simple(node, circle::BuiltinOperator_SLICE, circle::BuiltinOptions_SliceOptions,
-                CreateSliceOptions(builder).Union());
+                CreateSliceOptions(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CircleSoftmax *node)
  {
    export_simple(node, circle::BuiltinOperator_SOFTMAX, circle::BuiltinOptions_SoftmaxOptions,
-                CreateSoftmaxOptions(builder, node->beta()).Union());
+                CreateSoftmaxOptions(_ctx.builder, node->beta()).Union());
  }
  
  void OperationExporter::visit(luci::CircleSpaceToBatchND *node)
  {
    export_simple(node, circle::BuiltinOperator_SPACE_TO_BATCH_ND,
                  circle::BuiltinOptions_SpaceToBatchNDOptions,
-                CreateSpaceToBatchNDOptions(builder).Union());
+                CreateSpaceToBatchNDOptions(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CircleSpaceToDepth *node)
  {
    export_simple(node, circle::BuiltinOperator_SPACE_TO_DEPTH,
                  circle::BuiltinOptions_SpaceToDepthOptions,
-                CreateSpaceToDepthOptions(builder, node->block_size()).Union());
+                CreateSpaceToDepthOptions(_ctx.builder, node->block_size()).Union());
  }
  
  void OperationExporter::visit(luci::CircleSparseToDense *node)
  {
    export_simple(node, circle::BuiltinOperator_SPARSE_TO_DENSE,
                  circle::BuiltinOptions_SparseToDenseOptions,
-                CreateSparseToDenseOptions(builder, node->validate_indices()).Union());
-}
-
-void OperationExporter::visit(luci::CircleSplit *node)
-{
-  auto split_outs = loco::succs(node);
-  assert(int32_t(split_outs.size()) == node->num_split());
-
-  uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_SPLIT, node->op_version());
-  // NOTE BuiltinOperator_SPLIT input is placed at second position
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->split_dim()),
-                                  get_tensor_index(node->input())};
-  std::vector<int32_t> outputs_vec;
-
-  for (int32_t index = 0; index < node->num_split(); index++)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : split_outs)
-    {
-      auto split_out = loco::must_cast<luci::CircleSplitOut *>(out);
-      if (split_out->index() == index)
-      {
-        outputs_vec.push_back(get_tensor_index(split_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid Split output");
-    }
-  }
-
-  auto inputs = builder.CreateVector(inputs_vec);
-  auto outputs = builder.CreateVector(outputs_vec);
-  auto options = CreateSplitOptions(builder, node->num_split());
-  auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_SplitOptions, options.Union());
-  gd._operators.push_back(op_offset);
+                CreateSparseToDenseOptions(_ctx.builder, node->validate_indices()).Union());
  }
  
-void OperationExporter::visit(luci::CircleSplitV *node)
-{
-  auto split_outs = loco::succs(node);
-  assert(int32_t(split_outs.size()) == node->num_split());
-
-  uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_SPLIT_V, node->op_version());
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->input()),
-                                  get_tensor_index(node->size_splits()),
-                                  get_tensor_index(node->split_dim())};
-  std::vector<int32_t> outputs_vec;
-
-  for (int32_t index = 0; index < node->num_split(); index++)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : split_outs)
-    {
-      auto split_out = loco::must_cast<luci::CircleSplitVOut *>(out);
-      if (split_out->index() == index)
-      {
-        outputs_vec.push_back(get_tensor_index(split_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid SplitV output");
-    }
-  }
+void OperationExporter::visit(luci::CircleSplit *node) { export_node(_ctx, node); }
  
-  auto inputs = builder.CreateVector(inputs_vec);
-  auto outputs = builder.CreateVector(outputs_vec);
-  auto options = CreateSplitVOptions(builder, node->num_split());
-  auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_SplitVOptions, options.Union());
-  gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleSplitV *node) { export_node(_ctx, node); }
  
  void OperationExporter::visit(luci::CircleSqrt *node)
  {
@@ -1035,28 +1303,28 @@ void OperationExporter::visit(luci::CircleSqrt *node)
  void OperationExporter::visit(luci::CircleSquare *node)
  {
    export_simple(node, circle::BuiltinOperator_SQUARE, circle::BuiltinOptions_SquareOptions,
-                CreateSquareOptions(builder).Union());
+                CreateSquareOptions(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CircleSquaredDifference *node)
  {
    export_simple(node, circle::BuiltinOperator_SQUARED_DIFFERENCE,
                  circle::BuiltinOptions_SquaredDifferenceOptions,
-                CreateSquaredDifferenceOptions(builder).Union());
+                CreateSquaredDifferenceOptions(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CircleSqueeze *node)
  {
-  auto squeeze_dims = builder.CreateVector<int32_t>(node->squeeze_dims());
+  auto squeeze_dims = _ctx.builder.CreateVector<int32_t>(node->squeeze_dims());
    export_simple(node, circle::BuiltinOperator_SQUEEZE, circle::BuiltinOptions_SqueezeOptions,
-                CreateSqueezeOptions(builder, squeeze_dims).Union());
+                CreateSqueezeOptions(_ctx.builder, squeeze_dims).Union());
  }
  
  void OperationExporter::visit(luci::CircleStridedSlice *node)
  {
    export_simple(node, circle::BuiltinOperator_STRIDED_SLICE,
                  circle::BuiltinOptions_StridedSliceOptions,
-                CreateStridedSliceOptions(builder, node->begin_mask(), node->end_mask(),
+                CreateStridedSliceOptions(_ctx.builder, node->begin_mask(), node->end_mask(),
                                            node->ellipsis_mask(), node->new_axis_mask(),
                                            node->shrink_axis_mask())
                      .Union());
@@ -1066,13 +1334,13 @@ void OperationExporter::visit(luci::CircleSub *node)
  {
    export_simple(
        node, circle::BuiltinOperator_SUB, circle::BuiltinOptions_SubOptions,
-      CreateSubOptions(builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
+      CreateSubOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
  }
  
  void OperationExporter::visit(luci::CircleSum *node)
  {
    export_simple(node, circle::BuiltinOperator_SUM, circle::BuiltinOptions_ReducerOptions,
-                CreateReducerOptions(builder, node->keep_dims()).Union());
+                CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
  }
  
  void OperationExporter::visit(luci::CircleTanh *node)
@@ -1083,226 +1351,65 @@ void OperationExporter::visit(luci::CircleTanh *node)
  void OperationExporter::visit(luci::CircleTile *node)
  {
    export_simple(node, circle::BuiltinOperator_TILE, circle::BuiltinOptions_TileOptions,
-                CreateTileOptions(builder).Union());
+                CreateTileOptions(_ctx.builder).Union());
  }
  
-void OperationExporter::visit(luci::CircleTopKV2 *node)
-{
-  auto topkv2_outs = loco::succs(node);
-  int outs_count = int32_t(topkv2_outs.size());
-  assert(outs_count == 2);
-
-  uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_TOPK_V2, node->op_version());
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), get_tensor_index(node->k())};
-  std::vector<int32_t> outputs_vec;
-
-  for (int32_t index = 0; index < outs_count; index++)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : topkv2_outs)
-    {
-      auto topkv2_out = loco::must_cast<luci::CircleTopKV2Out *>(out);
-      if (topkv2_out->index() == index)
-      {
-        outputs_vec.push_back(get_tensor_index(topkv2_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid TopKV2 output");
-    }
-  }
-
-  auto inputs = builder.CreateVector(inputs_vec);
-  auto outputs = builder.CreateVector(outputs_vec);
-  auto options = CreateTopKV2Options(builder);
-  auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_TopKV2Options, options.Union());
-  gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleTopKV2 *node) { export_node(_ctx, node); }
  
  void OperationExporter::visit(luci::CircleTranspose *node)
  {
    export_simple(node, circle::BuiltinOperator_TRANSPOSE, circle::BuiltinOptions_TransposeOptions,
-                CreateTransposeOptions(builder).Union());
+                CreateTransposeOptions(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CircleTransposeConv *node)
  {
    export_simple(node, circle::BuiltinOperator_TRANSPOSE_CONV,
                  circle::BuiltinOptions_TransposeConvOptions,
-                CreateTransposeConvOptions(builder, getOpPadding(node->padding()),
+                CreateTransposeConvOptions(_ctx.builder, getOpPadding(node->padding()),
                                             node->stride()->w(), node->stride()->h())
                      .Union());
  }
  
-void OperationExporter::visit(luci::CircleUnique *node)
-{
-  auto unique_outs = loco::succs(node);
-  assert(int32_t(unique_outs.size()) == 2);
-  uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_UNIQUE, node->op_version());
-
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->input())};
-  std::vector<int32_t> outputs_vec;
-
-  for (int32_t index = 0; index < 2; index++)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : unique_outs)
-    {
-      auto unique_out = loco::must_cast<luci::CircleUniqueOut *>(out);
-      if (unique_out->index() == index)
-      {
-        outputs_vec.push_back(get_tensor_index(unique_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid Unique output");
-    }
-  }
-
-  auto inputs = builder.CreateVector(inputs_vec);
-  auto outputs = builder.CreateVector(outputs_vec);
-  auto options = CreateUniqueOptions(builder, to_circle_tensortype(node->idx_out_type()));
-  auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_UniqueOptions, options.Union());
-  gd._operators.push_back(op_offset);
-}
-
-void OperationExporter::visit(luci::CircleUnpack *node)
-{
-  LOGGER(l);
-  auto settings = luci::UserSettings::settings();
-
-  auto unpack_outs = loco::succs(node);
-  // NOTE real models may not use all of the outputs
-  if (static_cast<int32_t>(unpack_outs.size()) != node->num())
-  {
-    if (settings->get(luci::UserSettings::Key::DisableValidation))
-    {
-      WARN(l) << "Warning: export Unpack(" << node->name() << ") 'num' not same as outputs";
-    }
-    else
-      assert(false);
-  }
-
-  uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_UNPACK, node->op_version());
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->value())};
-  std::vector<int32_t> outputs_vec;
-
-  for (int32_t index = 0; index < node->num(); index++)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : unpack_outs)
-    {
-      auto unpack_out = loco::must_cast<luci::CircleUnpackOut *>(out);
-      if (unpack_out->index() == index)
-      {
-        outputs_vec.push_back(get_tensor_index(unpack_out));
-        found = true;
-        break;
-      }
-    }
-    // NOTE real models may not use all of the outputs
-    if (!found)
-    {
-      if (settings->get(luci::UserSettings::Key::DisableValidation))
-      {
-        WARN(l) << "Warning: export Unpack(" << node->name() << ") output " << index << " not used";
-      }
-      else
-        assert(false);
-    }
-  }
+void OperationExporter::visit(luci::CircleUnique *node) { export_node(_ctx, node); }
  
-  auto inputs = builder.CreateVector(inputs_vec);
-  auto outputs = builder.CreateVector(outputs_vec);
-  auto options = CreateUnpackOptions(builder, node->num(), node->axis());
-  auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_UnpackOptions, options.Union());
-  gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleUnpack *node) { export_node(_ctx, node); }
  
  void OperationExporter::visit(luci::CircleWhere *node)
  {
    export_simple(node, circle::BuiltinOperator_WHERE, circle::BuiltinOptions_WhereOptions,
-                CreateWhereOptions(builder).Union());
+                CreateWhereOptions(_ctx.builder).Union());
  }
  
-void OperationExporter::visit(luci::CircleWhile *node)
-{
-  auto while_outs = loco::succs(node);
-  assert(while_outs.size() == node->output_count());
-
-  uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_WHILE, node->op_version());
-  std::vector<int32_t> inputs_vec;
-  std::vector<int32_t> outputs_vec;
-
-  for (uint32_t idx = 0; idx < node->input_count(); ++idx)
-    inputs_vec.push_back(get_tensor_index(node->input(idx)));
-
-  for (uint32_t idx = 0; idx < node->output_count(); ++idx)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : while_outs)
-    {
-      auto while_out = loco::must_cast<luci::CircleWhileOut *>(out);
-      if (while_out->index() == static_cast<int32_t>(idx))
-      {
-        outputs_vec.push_back(get_tensor_index(while_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid CircleWhile output");
-    }
-  }
-
-  auto inputs = builder.CreateVector(inputs_vec);
-  auto outputs = builder.CreateVector(outputs_vec);
-  auto options = CreateWhileOptions(builder, node->cond_branch(), node->body_branch());
-  auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_WhileOptions, options.Union());
-  gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleWhile *node) { export_node(_ctx, node); }
  
  void OperationExporter::visit(luci::CircleZerosLike *node)
  {
    export_simple(node, circle::BuiltinOperator_ZEROS_LIKE, circle::BuiltinOptions_ZerosLikeOptions,
-                CreateZerosLikeOptions(builder).Union());
+                CreateZerosLikeOptions(_ctx.builder).Union());
  }
  
  void OperationExporter::visit(luci::CircleBCQFullyConnected *node)
  {
    export_simple(node, circle::BuiltinOperator_BCQ_FULLY_CONNECTED,
                  circle::BuiltinOptions_BCQFullyConnectedOptions,
-                CreateBCQFullyConnectedOptions(builder, node->weights_hidden_size(),
+                CreateBCQFullyConnectedOptions(_ctx.builder, node->weights_hidden_size(),
                                                 to_circle_actfunc(node->fusedActivationFunction()))
                      .Union());
  }
  
  void OperationExporter::visit(luci::CircleBCQGather *node)
  {
-  export_simple(node, circle::BuiltinOperator_BCQ_GATHER, circle::BuiltinOptions_BCQGatherOptions,
-                CreateBCQGatherOptions(builder, node->input_hidden_size(), node->axis()).Union());
+  export_simple(
+      node, circle::BuiltinOperator_BCQ_GATHER, circle::BuiltinOptions_BCQGatherOptions,
+      CreateBCQGatherOptions(_ctx.builder, node->input_hidden_size(), node->axis()).Union());
  }
  
  void OperationExporter::visit(luci::CircleInstanceNorm *node)
  {
    export_simple(node, circle::BuiltinOperator_INSTANCE_NORM,
                  circle::BuiltinOptions_InstanceNormOptions,
-                CreateInstanceNormOptions(builder, node->epsilon(),
+                CreateInstanceNormOptions(_ctx.builder, node->epsilon(),
                                            to_circle_actfunc(node->fusedActivationFunction()))
                      .Union());
  }
@@ -1312,7 +1419,8 @@ void exportNode(loco::Node *node, flatbuffers::FlatBufferBuilder &builder, Seria
  {
    if (auto circle_node = dynamic_cast<luci::CircleNode *>(node))
    {
-    OperationExporter exporter{builder, md, gd};
+    ExportContext ctx{builder, md, gd};
+    OperationExporter exporter{ctx};
      circle_node->accept(&exporter);
    }
    else
diff --git a/compiler/luci/import/include/luci/Import/Nodes.h b/compiler/luci/import/include/luci/Import/Nodes.h

index 825c214..0b21d38 100644 (file)
--- a/compiler/luci/import/include/luci/Import/Nodes.h
+++ b/compiler/luci/import/include/luci/Import/Nodes.h
@@ -74,10 +74,12 @@
  #include "Nodes/CircleMul.h"
  #include "Nodes/CircleNeg.h"
  #include "Nodes/CircleNonMaxSuppressionV4.h"
+#include "Nodes/CircleNonMaxSuppressionV5.h"
  #include "Nodes/CircleNotEqual.h"
  #include "Nodes/CircleOneHot.h"
  #include "Nodes/CirclePack.h"
  #include "Nodes/CirclePad.h"
+#include "Nodes/CirclePadV2.h"
  #include "Nodes/CirclePow.h"
  #include "Nodes/CirclePRelu.h"
  #include "Nodes/CircleRange.h"
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleNonMaxSuppressionV5.h b/compiler/luci/import/include/luci/Import/Nodes/CircleNonMaxSuppressionV5.h

new file mode 100644 (file)

index 0000000..62be075
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleNonMaxSuppressionV5.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V5_H__
+#define __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V5_H__
+
+#include "luci/Import/GraphBuilderBase.h"
+
+namespace luci
+{
+
+class CircleNonMaxSuppressionV5GraphBuilder : public GraphBuilderBase
+{
+public:
+  bool validate(const ValidateArgs &args) const final;
+
+  void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V5_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CirclePadV2.h b/compiler/luci/import/include/luci/Import/Nodes/CirclePadV2.h

new file mode 100644 (file)

index 0000000..089f52c
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CirclePadV2.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_PADV2_H__
+#define __LUCI_IMPORT_OP_CIRCLE_PADV2_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CirclePadV2GraphBuilder : public GraphBuilder
+{
+public:
+  bool validate(const ValidateArgs &args) const final;
+
+private:
+  CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+                         loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_PADV2_H__
diff --git a/compiler/luci/import/src/GraphBuilderRegistry.cpp b/compiler/luci/import/src/GraphBuilderRegistry.cpp

index cc328cc..c6bcacb 100644 (file)
--- a/compiler/luci/import/src/GraphBuilderRegistry.cpp
+++ b/compiler/luci/import/src/GraphBuilderRegistry.cpp
@@ -83,10 +83,12 @@ GraphBuilderRegistry::GraphBuilderRegistry()
    CIRCLE_NODE(MUL, CircleMulGraphBuilder);                                                 // 18
    CIRCLE_NODE(NEG, CircleNegGraphBuilder);                                                 // 59
    CIRCLE_NODE(NON_MAX_SUPPRESSION_V4, CircleNonMaxSuppressionV4GraphBuilder);              // 120,
+  CIRCLE_NODE(NON_MAX_SUPPRESSION_V5, CircleNonMaxSuppressionV5GraphBuilder);              // 121,
    CIRCLE_NODE(NOT_EQUAL, CircleNotEqualGraphBuilder);                                      // 72
    CIRCLE_NODE(ONE_HOT, CircleOneHotGraphBuilder);                                          // 85
    CIRCLE_NODE(PACK, CirclePackGraphBuilder);                                               // 83
    CIRCLE_NODE(PAD, CirclePadGraphBuilder);                                                 // 34
+  CIRCLE_NODE(PADV2, CirclePadV2GraphBuilder);                                             // 60
    CIRCLE_NODE(POW, CirclePowGraphBuilder);                                                 // 78
    CIRCLE_NODE(PRELU, CirclePReluGraphBuilder);                                             // 54,
    CIRCLE_NODE(RANGE, CircleRangeGraphBuilder);                                             // 96
@@ -155,11 +157,9 @@ GraphBuilderRegistry::GraphBuilderRegistry()
    // BuiltinOperator_DELEGATE = 51,
    // BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM = 52,
    // BuiltinOperator_ARG_MAX = 56,
-  // BuiltinOperator_PADV2 = 60,
    // BuiltinOperator_FAKE_QUANT = 80,
    // BuiltinOperator_QUANTIZE = 114,
    // BuiltinOperator_HARD_SWISH = 117,
-  // BuiltinOperator_NON_MAX_SUPPRESSION_V5 = 121,
    // BuiltinOperator_DENSIFY = 124,
  }
  
diff --git a/compiler/luci/import/src/Nodes/CircleBatchToSpaceND.cpp b/compiler/luci/import/src/Nodes/CircleBatchToSpaceND.cpp

index 8c2039f..7faab14 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleBatchToSpaceND.cpp
+++ b/compiler/luci/import/src/Nodes/CircleBatchToSpaceND.cpp
@@ -18,49 +18,16 @@
  
  #include <luci/IR/Nodes/CircleBatchToSpaceND.h>
  
-#include <loco.h>
+#include "ValidateHelpers.h"
  
-#include <cassert>
+#include <loco.h>
  
  namespace luci
  {
  
  bool CircleBatchToSpaceNDGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  if (inputs.size() != 3)
-    return false;
-
-  // input 1 and 2 should have INT32/INT64 type
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor_1 = tensors.at(inputs.at(1));
-  switch (tensor_1->type)
-  {
-    case circle::TensorType_INT32:
-    case circle::TensorType_INT64:
-      break;
-    default:
-      return false;
-  }
-  const auto &tensor_2 = tensors.at(inputs.at(2));
-  switch (tensor_2->type)
-  {
-    case circle::TensorType_INT32:
-    case circle::TensorType_INT64:
-      break;
-    default:
-      return false;
-  }
-
-  // Only support input shape dimension 3 and 4 only
-  const auto &tensor_0 = tensors.at(inputs.at(0));
-  const auto t_0_s = tensor_0->shape.size();
-  if (t_0_s != 3 && t_0_s != 4)
-    return false;
-
-  // TODO check input shape
-
-  return true;
+  return validate_batch_space_nd(args);
  }
  
  CircleNode *CircleBatchToSpaceNDGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleConst.cpp b/compiler/luci/import/src/Nodes/CircleConst.cpp

index 7131dc1..fad7a07 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleConst.cpp
+++ b/compiler/luci/import/src/Nodes/CircleConst.cpp
@@ -118,6 +118,10 @@ CircleConst *create_circleconst(GraphBuilderContext *context, int32_t tensor_ind
          copy_data<loco::DataType::U8>(buffer, num_elements, const_node);
          break;
  
+      case loco::DataType::S8:
+        copy_data<loco::DataType::S8>(buffer, num_elements, const_node);
+        break;
+
        case loco::DataType::S16:
          copy_data<loco::DataType::S16>(buffer, num_elements, const_node);
          break;
diff --git a/compiler/luci/import/src/Nodes/CircleMaximum.cpp b/compiler/luci/import/src/Nodes/CircleMaximum.cpp

index 4d1468f..805d5bc 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleMaximum.cpp
+++ b/compiler/luci/import/src/Nodes/CircleMaximum.cpp
@@ -18,6 +18,8 @@
  
  #include <luci/IR/Nodes/CircleMaximum.h>
  
+#include "ValidateHelpers.h"
+
  #include <loco.h>
  
  namespace luci
@@ -25,37 +27,7 @@ namespace luci
  
  bool CircleMaximumGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 2)
-    return false;
-
-  if (outputs.size() != 1)
-    return false;
-
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
-
-  switch (tensor->type)
-  {
-    case circle::TensorType_FLOAT16:
-    case circle::TensorType_FLOAT32:
-    case circle::TensorType_FLOAT64:
-    case circle::TensorType_INT32:
-    case circle::TensorType_INT64:
-      break;
-    default:
-      return false;
-  }
-
-  if (tensors[inputs.at(1)]->type != tensor->type)
-    return false;
-
-  if (tensors[outputs[0]]->type != tensor->type)
-    return false;
-
-  return true;
+  return validate_minmax(args);
  }
  
  CircleNode *CircleMaximumGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleMinimum.cpp b/compiler/luci/import/src/Nodes/CircleMinimum.cpp

index 8b4daf1..381039e 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleMinimum.cpp
+++ b/compiler/luci/import/src/Nodes/CircleMinimum.cpp
@@ -18,6 +18,8 @@
  
  #include <luci/IR/Nodes/CircleMinimum.h>
  
+#include "ValidateHelpers.h"
+
  #include <loco.h>
  
  namespace luci
@@ -25,37 +27,7 @@ namespace luci
  
  bool CircleMinimumGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 2)
-    return false;
-
-  if (outputs.size() != 1)
-    return false;
-
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
-
-  switch (tensor->type)
-  {
-    case circle::TensorType_FLOAT16:
-    case circle::TensorType_FLOAT32:
-    case circle::TensorType_FLOAT64:
-    case circle::TensorType_INT32:
-    case circle::TensorType_INT64:
-      break;
-    default:
-      return false;
-  }
-
-  if (tensors[inputs.at(1)]->type != tensor->type)
-    return false;
-
-  if (tensors[outputs[0]]->type != tensor->type)
-    return false;
-
-  return true;
+  return validate_minmax(args);
  }
  
  CircleNode *CircleMinimumGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp b/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp

new file mode 100644 (file)

index 0000000..241dbf5
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleNonMaxSuppressionV5.h"
+
+#include <luci/IR/Nodes/CircleNonMaxSuppressionV5.h>
+#include <luci/IR/Nodes/CircleNonMaxSuppressionV5Out.h>
+
+#include <loco.h>
+#include <oops/UserExn.h>
+
+namespace luci
+{
+
+bool CircleNonMaxSuppressionV5GraphBuilder::validate(const ValidateArgs &args) const
+{
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
+
+  if (inputs.size() != 6)
+    return false;
+  if (outputs.size() != 3)
+    return false;
+
+  const auto &tensors = args.reader.tensors();
+  const auto &boxes_tensor = tensors.at(inputs[0]);
+  if (boxes_tensor->shape.size() != 2)
+    return false;
+  if (boxes_tensor->shape.at(1) != 4)
+    return false;
+  if (boxes_tensor->shape.at(0) != tensors.at(inputs[1])->shape.at(0))
+    return false;
+
+  if (tensors.at(inputs[2])->type != circle::TensorType_INT32)
+    return false;
+  if (tensors.at(inputs[3])->type != circle::TensorType_FLOAT32)
+    return false;
+  if (tensors.at(inputs[4])->type != circle::TensorType_FLOAT32)
+    return false;
+  if (tensors.at(inputs[5])->type != circle::TensorType_FLOAT32)
+    return false;
+
+  return true;
+}
+
+/**
+ * @brief  NonMaxSuppressionV5 Node builder
+ *
+ * @note   Current loco does not provide multiple outputs
+ *         We will create multiple NonMasSuppressionV5Oout nodes to emulate this
+ */
+
+void CircleNonMaxSuppressionV5GraphBuilder::build(const circle::OperatorT &op,
+                                                  GraphBuilderContext *context) const
+{
+  assert(context != nullptr);
+
+  auto graph = context->graph();
+
+  const std::vector<int32_t> &inputs = op.inputs;
+  const std::vector<int32_t> &outputs = op.outputs;
+  const auto &tensors = context->reader()->tensors();
+  const auto &opcodes = context->reader()->opcodes();
+  auto tensors_ptr = context->reader()->tensors_ptr();
+  assert(tensors_ptr != nullptr);
+
+  std::vector<CircleNode *> input_nodes;
+  for (const int32_t input_tensor_index : inputs)
+  {
+    input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
+  }
+
+  // Create CircleNonMaxSuppressionV5
+  auto node = graph->nodes()->create<CircleNonMaxSuppressionV5>();
+  node->boxes(input_nodes[0]);
+  node->scores(input_nodes[1]);
+  node->max_output_size(input_nodes[2]);
+  node->iou_threshold(input_nodes[3]);
+  node->score_threshold(input_nodes[4]);
+  node->soft_nms_sigma(input_nodes[5]);
+
+  assert(outputs.size() == 3);
+  {
+    // Let's use name of output 0 as NonMaxSuppressionV5 name
+    const circle::TensorT &output_tensor = *tensors[outputs[0]];
+    node->name(tensor_name(output_tensor));
+    node->op_version(opcodes[op.opcode_index].get()->version);
+
+    // NOTE We don't set quantization for NonMaxSuppressionV5 itself but to virtual outputs
+  }
+
+  // Create virtual outputs of NonMaxSuppressionV5
+  for (size_t n = 0; n < outputs.size(); ++n)
+  {
+    const circle::TensorT &output_tensor = *tensors[outputs[n]];
+
+    auto *nodeout = graph->nodes()->create<CircleNonMaxSuppressionV5Out>();
+    copy_tensor_attributes(output_tensor, nodeout);
+
+    // mark shape_status
+    if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
+      nodeout->shape_status(ShapeStatus::NOSHAPE);
+    else
+      nodeout->shape_status(ShapeStatus::VALID);
+
+    nodeout->input(node);
+    nodeout->index(n);
+
+    context->nodefinder()->enroll(outputs[n], nodeout);
+  }
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CirclePadV2.cpp b/compiler/luci/import/src/Nodes/CirclePadV2.cpp

new file mode 100644 (file)

index 0000000..493876e
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CirclePadV2.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CirclePadV2.h"
+
+#include <luci/IR/Nodes/CirclePadV2.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CirclePadV2GraphBuilder::validate(const ValidateArgs &args) const
+{
+  if (args.op.inputs.size() != 3)
+    return false;
+
+  if (args.op.outputs.size() != 1)
+    return false;
+
+  return true;
+}
+
+CircleNode *CirclePadV2GraphBuilder::build_node(const circle::OperatorT &op,
+                                                const std::vector<CircleNode *> &inputs,
+                                                loco::Graph *graph) const
+{
+  auto *node = graph->nodes()->create<CirclePadV2>();
+  node->input(inputs[0]);
+  node->paddings(inputs[1]);
+  node->constant_values(inputs[2]);
+
+  const auto *options = op.builtin_options.AsPadV2Options();
+  (void)options; // There are no options.
+
+  return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleReduceMax.cpp b/compiler/luci/import/src/Nodes/CircleReduceMax.cpp

index 05492db..e633abf 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleReduceMax.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReduceMax.cpp
@@ -18,33 +18,14 @@
  
  #include <luci/IR/Nodes/CircleReduceMax.h>
  
+#include "ValidateHelpers.h"
+
  namespace luci
  {
  
  bool CircleReduceMaxGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 2)
-    return false;
-
-  if (outputs.size() != 1)
-    return false;
-
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor_axis = tensors.at(inputs.at(1));
-
-  switch (tensor_axis->type)
-  {
-    case circle::TensorType_INT32:
-    case circle::TensorType_INT64:
-      break;
-    default:
-      return false;
-  }
-
-  return true;
+  return validate_reduce_minmax(args);
  }
  
  CircleNode *CircleReduceMaxGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleReduceMin.cpp b/compiler/luci/import/src/Nodes/CircleReduceMin.cpp

index 117d529..bfc3001 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleReduceMin.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReduceMin.cpp
@@ -18,33 +18,14 @@
  
  #include <luci/IR/Nodes/CircleReduceMin.h>
  
+#include "ValidateHelpers.h"
+
  namespace luci
  {
  
  bool CircleReduceMinGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 2)
-    return false;
-
-  if (outputs.size() != 1)
-    return false;
-
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor_axis = tensors.at(inputs.at(1));
-
-  switch (tensor_axis->type)
-  {
-    case circle::TensorType_INT32:
-    case circle::TensorType_INT64:
-      break;
-    default:
-      return false;
-  }
-
-  return true;
+  return validate_reduce_minmax(args);
  }
  
  CircleNode *CircleReduceMinGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleSpaceToBatchND.cpp b/compiler/luci/import/src/Nodes/CircleSpaceToBatchND.cpp

index c1d508e..fbf9f6b 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSpaceToBatchND.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSpaceToBatchND.cpp
@@ -18,49 +18,16 @@
  
  #include <luci/IR/Nodes/CircleSpaceToBatchND.h>
  
-#include <loco.h>
+#include "ValidateHelpers.h"
  
-#include <cassert>
+#include <loco.h>
  
  namespace luci
  {
  
  bool CircleSpaceToBatchNDGraphBuilder::validate(const ValidateArgs &args) const
  {
-  const auto &inputs = args.op.inputs;
-  if (inputs.size() != 3)
-    return false;
-
-  // input 1 and 2 should have INT32/INT64 type
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor_1 = tensors.at(inputs.at(1));
-  switch (tensor_1->type)
-  {
-    case circle::TensorType_INT32:
-    case circle::TensorType_INT64:
-      break;
-    default:
-      return false;
-  }
-  const auto &tensor_2 = tensors.at(inputs.at(2));
-  switch (tensor_2->type)
-  {
-    case circle::TensorType_INT32:
-    case circle::TensorType_INT64:
-      break;
-    default:
-      return false;
-  }
-
-  // Only support input shape dimension 3 and 4 only
-  const auto &tensor_0 = tensors.at(inputs.at(0));
-  const auto t_0_s = tensor_0->shape.size();
-  if (t_0_s != 3 && t_0_s != 4)
-    return false;
-
-  // TODO check input shape
-
-  return true;
+  return validate_batch_space_nd(args);
  }
  
  CircleNode *CircleSpaceToBatchNDGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleSparseToDense.cpp b/compiler/luci/import/src/Nodes/CircleSparseToDense.cpp

index 26d575e..ac756b1 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSparseToDense.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSparseToDense.cpp
@@ -42,7 +42,8 @@ CircleNode *CircleSparseToDenseGraphBuilder::build_node(const circle::OperatorT
    node->default_value(inputs.at(3));
  
    const auto *options = op.builtin_options.AsSparseToDenseOptions();
-  node->validate_indices(options->validate_indices);
+  if (options)
+    node->validate_indices(options->validate_indices);
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp b/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp

index ddb1966..c280faa 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp
+++ b/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp
@@ -27,7 +27,7 @@ namespace luci
  
  bool CircleTransposeConvGraphBuilder::validate(const ValidateArgs &args) const
  {
-  if (args.op.inputs.size() != 3)
+  if (args.op.inputs.size() != 3 && args.op.inputs.size() != 4)
      return false;
  
    const auto &inputs = args.op.inputs;
@@ -60,6 +60,17 @@ CircleNode *CircleTransposeConvGraphBuilder::build_node(const circle::OperatorT
    node->inputSizes(inputs.at(0));
    node->filter(inputs.at(1));
    node->outBackprop(inputs.at(2));
+  if (inputs.size() == 3)
+    node->bias(graph->nodes()->create<CircleOutputExclude>());
+  else
+    node->bias(inputs.at(3));
+
+  if (auto bias = dynamic_cast<luci::CircleOutputExclude *>(node->bias()))
+  {
+    // CircleOutputExclude doesn't need a type, but since all nodes must have a type, a dummy type
+    // is inserted.
+    bias->dtype(loco::DataType::FLOAT32);
+  }
  
    const auto *options = op.builtin_options.AsTransposeConvOptions();
    node->padding(luci_padding(options->padding));
diff --git a/compiler/luci/import/src/ValidateHelpers.cpp b/compiler/luci/import/src/ValidateHelpers.cpp

new file mode 100644 (file)

index 0000000..12a6548
--- /dev/null
+++ b/compiler/luci/import/src/ValidateHelpers.cpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ValidateHelpers.h"
+
+namespace luci
+{
+
+bool validate_batch_space_nd(const GraphBuilderBase::ValidateArgs &args)
+{
+  const auto &inputs = args.op.inputs;
+  if (inputs.size() != 3)
+    return false;
+
+  // input 1 and 2 should have INT32/INT64 type
+  const auto &tensors = args.reader.tensors();
+  const auto &tensor_1 = tensors.at(inputs.at(1));
+  switch (tensor_1->type)
+  {
+    case circle::TensorType_INT32:
+    case circle::TensorType_INT64:
+      break;
+    default:
+      return false;
+  }
+  const auto &tensor_2 = tensors.at(inputs.at(2));
+  switch (tensor_2->type)
+  {
+    case circle::TensorType_INT32:
+    case circle::TensorType_INT64:
+      break;
+    default:
+      return false;
+  }
+
+  // Only support input shape dimension 3 and 4 only
+  const auto &tensor_0 = tensors.at(inputs.at(0));
+  const auto t_0_s = tensor_0->shape.size();
+  if (t_0_s != 3 && t_0_s != 4)
+    return false;
+
+  // TODO check input shape
+
+  return true;
+}
+
+bool validate_minmax(const GraphBuilderBase::ValidateArgs &args)
+{
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
+
+  if (inputs.size() != 2)
+    return false;
+
+  if (outputs.size() != 1)
+    return false;
+
+  const auto &tensors = args.reader.tensors();
+  const auto &tensor = tensors.at(inputs.at(0));
+
+  switch (tensor->type)
+  {
+    case circle::TensorType_FLOAT16:
+    case circle::TensorType_FLOAT32:
+    case circle::TensorType_FLOAT64:
+    case circle::TensorType_INT32:
+    case circle::TensorType_INT64:
+      break;
+    default:
+      return false;
+  }
+
+  if (tensors[inputs.at(1)]->type != tensor->type)
+    return false;
+
+  if (tensors[outputs[0]]->type != tensor->type)
+    return false;
+
+  return true;
+}
+
+bool validate_reduce_minmax(const GraphBuilderBase::ValidateArgs &args)
+{
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
+
+  if (inputs.size() != 2)
+    return false;
+
+  if (outputs.size() != 1)
+    return false;
+
+  const auto &tensors = args.reader.tensors();
+  const auto &tensor_axis = tensors.at(inputs.at(1));
+
+  switch (tensor_axis->type)
+  {
+    case circle::TensorType_INT32:
+    case circle::TensorType_INT64:
+      break;
+    default:
+      return false;
+  }
+
+  return true;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/ValidateHelpers.h b/compiler/luci/import/src/ValidateHelpers.h

new file mode 100644 (file)

index 0000000..4047b2f
--- /dev/null
+++ b/compiler/luci/import/src/ValidateHelpers.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_VALIDATE_HELPERS_H__
+#define __LUCI_VALIDATE_HELPERS_H__
+
+#include "luci/Import/GraphBuilderBase.h"
+
+/**
+ * @Note Methods in this file provides helper functions to reduce duplicate codes
+ */
+
+namespace luci
+{
+
+bool validate_batch_space_nd(const GraphBuilderBase::ValidateArgs &args);
+bool validate_minmax(const GraphBuilderBase::ValidateArgs &args);
+bool validate_reduce_minmax(const GraphBuilderBase::ValidateArgs &args);
+
+} // namespace luci
+
+#endif // __LUCI_VALIDATE_HELPERS_H__
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodes.h b/compiler/luci/lang/include/luci/IR/CircleNodes.h

index e57f5bb..25b86d2 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/CircleNodes.h
+++ b/compiler/luci/lang/include/luci/IR/CircleNodes.h
@@ -71,6 +71,7 @@
  #include "Nodes/CircleMul.h"
  #include "Nodes/CircleNeg.h"
  #include "Nodes/CircleNonMaxSuppressionV4.h"
+#include "Nodes/CircleNonMaxSuppressionV5.h"
  #include "Nodes/CircleNotEqual.h"
  #include "Nodes/CircleOneHot.h"
  #include "Nodes/CirclePack.h"
@@ -134,6 +135,7 @@
  #include "Nodes/CircleCustomOut.h"
  #include "Nodes/CircleIfOut.h"
  #include "Nodes/CircleNonMaxSuppressionV4Out.h"
+#include "Nodes/CircleNonMaxSuppressionV5Out.h"
  #include "Nodes/CircleUnpackOut.h"
  #include "Nodes/CircleUniqueOut.h"
  #include "Nodes/CircleSplitOut.h"
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodes.lst b/compiler/luci/lang/include/luci/IR/CircleNodes.lst

index 8010518..9f0a1b1 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/CircleNodes.lst
+++ b/compiler/luci/lang/include/luci/IR/CircleNodes.lst
@@ -64,6 +64,7 @@ CIRCLE_NODE(MIRROR_PAD, luci::CircleMirrorPad)
  CIRCLE_NODE(MUL, luci::CircleMul)
  CIRCLE_NODE(NEG, luci::CircleNeg)
  CIRCLE_NODE(NON_MAX_SUPPRESSION_V4, luci::CircleNonMaxSuppressionV4)
+CIRCLE_NODE(NON_MAX_SUPPRESSION_V5, luci::CircleNonMaxSuppressionV5)
  CIRCLE_NODE(NOT_EQUAL, luci::CircleNotEqual)
  CIRCLE_NODE(ONE_HOT, luci::CircleOneHot)
  CIRCLE_NODE(PACK, luci::CirclePack)
@@ -130,6 +131,7 @@ CIRCLE_NODE(CIRCLEOUTPUTEXCLUDE, luci::CircleOutputExclude)
  CIRCLE_NODE(CIRCLECUSTOMOUT, luci::CircleCustomOut)
  CIRCLE_NODE(CIRCLEIFOUT, luci::CircleIfOut)
  CIRCLE_NODE(CIRCLENONMAXSUPPRESSIONV4OUT, luci::CircleNonMaxSuppressionV4Out)
+CIRCLE_NODE(CIRCLENONMAXSUPPRESSIONV5OUT, luci::CircleNonMaxSuppressionV5Out)
  CIRCLE_NODE(CIRCLESPLITOUT, luci::CircleSplitOut)
  CIRCLE_NODE(CIRCLESPLITVOUT, luci::CircleSplitVOut)
  CIRCLE_NODE(CIRCLETOPKV2OUT, luci::CircleTopKV2Out)
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5.h

new file mode 100644 (file)

index 0000000..52d6821
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_NON_MAX_SUPPRESSION_V5_H__
+#define __LUCI_IR_CIRCLE_NON_MAX_SUPPRESSION_V5_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief NON_MAX_SUPPRESSION_V5 in Circle
+ */
+class CircleNonMaxSuppressionV5 final
+    : public FixedArityNode<6, CircleNodeImpl<CircleOpcode::NON_MAX_SUPPRESSION_V5>>
+{
+public:
+  loco::Node *boxes(void) const { return at(0)->node(); }
+  void boxes(loco::Node *node) { at(0)->node(node); }
+
+  loco::Node *scores(void) const { return at(1)->node(); }
+  void scores(loco::Node *node) { at(1)->node(node); }
+
+  loco::Node *max_output_size(void) const { return at(2)->node(); }
+  void max_output_size(loco::Node *node) { at(2)->node(node); }
+
+  loco::Node *iou_threshold(void) const { return at(3)->node(); }
+  void iou_threshold(loco::Node *node) { at(3)->node(node); }
+
+  loco::Node *score_threshold(void) const { return at(4)->node(); }
+  void score_threshold(loco::Node *node) { at(4)->node(node); }
+
+  loco::Node *soft_nms_sigma(void) const { return at(5)->node(); }
+  void soft_nms_sigma(loco::Node *node) { at(5)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_NON_MAX_SUPPRESSION_V5_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5Out.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5Out.h

new file mode 100644 (file)

index 0000000..0c6989c
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5Out.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_NONMAXSUPPRESSIONV5OUT_H__
+#define __LUCI_IR_CIRCLE_NONMAXSUPPRESSIONV5OUT_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief Virtual NONMAXSUPPRESSIONV5OUT in Circle
+ */
+class CircleNonMaxSuppressionV5Out final
+    : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT>>
+{
+public:
+  CircleNonMaxSuppressionV5Out() = default;
+
+public:
+  loco::Node *input(void) const { return at(0)->node(); }
+  void input(loco::Node *node) { at(0)->node(node); }
+
+public:
+  int32_t index(void) const { return _index; }
+  void index(int32_t index) { _index = index; }
+
+private:
+  int32_t _index{-1};
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_NONMAXSUPPRESSIONV5OUT_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSparseToDense.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSparseToDense.h

index 9f50513..7e80304 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSparseToDense.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSparseToDense.h
@@ -49,7 +49,7 @@ public:
    void validate_indices(bool validate_indices) { _validate_indices = validate_indices; }
  
  private:
-  bool _validate_indices{true};
+  bool _validate_indices{false};
  };
  
  } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleTransposeConv.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleTransposeConv.h

index fc638d4..e355102 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleTransposeConv.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleTransposeConv.h
@@ -34,7 +34,8 @@ namespace luci
   *        'out' acutally means 'out' and 'in' of the this node.
   */
  class CircleTransposeConv final
-    : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::TRANSPOSE_CONV>>
+    : public FixedArityNode<4, CircleNodeImpl<CircleOpcode::TRANSPOSE_CONV>>,
+      public LuciNodeMixin<LuciNodeTrait::Bias>
  {
  public:
    loco::Node *inputSizes(void) const { return at(0)->node(); }
@@ -46,6 +47,21 @@ public:
    loco::Node *outBackprop(void) const { return at(2)->node(); }
    void outBackprop(Node *node) { at(2)->node(node); }
  
+  /**
+   * @note  "bias" is optional. When this node has no conceptual bias, "bias()"
+   *        expected to be `luci::CircleOutputExclude` type.
+   *
+   * <Comment on tflite TRANSPOSE_CONV>
+   *
+   * (Circle node has no dependency on tflite, but just for information on converting)
+   * Before TF v2.3.0, tflite TRANSPOSE_CONV didn't support fused bias as argument.
+   * From TF v2.3.0, tflite TRANSPOSE_CONV supports bias as optional 4th argument.
+   *
+   * Ref: https://github.com/tensorflow/tensorflow/commit/43b8f6e710
+   */
+  loco::Node *bias(void) const override { return at(3)->node(); }
+  void bias(loco::Node *node) override { at(3)->node(node); }
+
  public:
    const Padding &padding(void) const { return _padding; }
    void padding(const Padding &padding) { _padding = padding; }
diff --git a/compiler/luci/lang/src/Nodes/CircleConst.cpp b/compiler/luci/lang/src/Nodes/CircleConst.cpp

index 17ff853..0d02d32 100644 (file)
--- a/compiler/luci/lang/src/Nodes/CircleConst.cpp
+++ b/compiler/luci/lang/src/Nodes/CircleConst.cpp
@@ -73,6 +73,7 @@ template <loco::DataType DT> typename loco::DataTypeImpl<DT>::Type &CircleConst:
  INSTANTIATE(loco::DataType::S64);
  INSTANTIATE(loco::DataType::S32);
  INSTANTIATE(loco::DataType::S16);
+INSTANTIATE(loco::DataType::S8);
  INSTANTIATE(loco::DataType::FLOAT32);
  INSTANTIATE(loco::DataType::U8);
  INSTANTIATE(loco::DataType::BOOL);
diff --git a/compiler/luci/lang/src/Nodes/CircleNonMaxSuppressionV5.test.cpp b/compiler/luci/lang/src/Nodes/CircleNonMaxSuppressionV5.test.cpp

new file mode 100644 (file)

index 0000000..ceb74e3
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleNonMaxSuppressionV5.test.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleNonMaxSuppressionV5.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleNonMaxSuppressionV5Test, constructor)
+{
+  luci::CircleNonMaxSuppressionV5 nmsv5_node;
+
+  ASSERT_EQ(luci::CircleDialect::get(), nmsv5_node.dialect());
+  ASSERT_EQ(luci::CircleOpcode::NON_MAX_SUPPRESSION_V5, nmsv5_node.opcode());
+
+  ASSERT_EQ(nullptr, nmsv5_node.boxes());
+  ASSERT_EQ(nullptr, nmsv5_node.scores());
+  ASSERT_EQ(nullptr, nmsv5_node.max_output_size());
+  ASSERT_EQ(nullptr, nmsv5_node.iou_threshold());
+  ASSERT_EQ(nullptr, nmsv5_node.score_threshold());
+  ASSERT_EQ(nullptr, nmsv5_node.soft_nms_sigma());
+}
+
+TEST(CircleNonMaxSuppressionV5Test, input_NEG)
+{
+  luci::CircleNonMaxSuppressionV5 nmsv5_node;
+  luci::CircleNonMaxSuppressionV5 node;
+
+  nmsv5_node.boxes(&node);
+  nmsv5_node.scores(&node);
+  nmsv5_node.max_output_size(&node);
+  nmsv5_node.iou_threshold(&node);
+  nmsv5_node.score_threshold(&node);
+  nmsv5_node.soft_nms_sigma(&node);
+  ASSERT_NE(nullptr, nmsv5_node.boxes());
+  ASSERT_NE(nullptr, nmsv5_node.scores());
+  ASSERT_NE(nullptr, nmsv5_node.max_output_size());
+  ASSERT_NE(nullptr, nmsv5_node.iou_threshold());
+  ASSERT_NE(nullptr, nmsv5_node.score_threshold());
+  ASSERT_NE(nullptr, nmsv5_node.soft_nms_sigma());
+
+  nmsv5_node.boxes(nullptr);
+  nmsv5_node.scores(nullptr);
+  nmsv5_node.max_output_size(nullptr);
+  nmsv5_node.iou_threshold(nullptr);
+  nmsv5_node.score_threshold(nullptr);
+  nmsv5_node.soft_nms_sigma(nullptr);
+  ASSERT_EQ(nullptr, nmsv5_node.boxes());
+  ASSERT_EQ(nullptr, nmsv5_node.scores());
+  ASSERT_EQ(nullptr, nmsv5_node.max_output_size());
+  ASSERT_EQ(nullptr, nmsv5_node.iou_threshold());
+  ASSERT_EQ(nullptr, nmsv5_node.score_threshold());
+  ASSERT_EQ(nullptr, nmsv5_node.soft_nms_sigma());
+}
+
+TEST(CircleNonMaxSuppressionV5Test, arity_NEG)
+{
+  luci::CircleNonMaxSuppressionV5 nmsv5_node;
+
+  ASSERT_NO_THROW(nmsv5_node.arg(5));
+  ASSERT_THROW(nmsv5_node.arg(6), std::out_of_range);
+}
+
+TEST(CircleNonMaxSuppressionV5Test, visit_mutable_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeMutableVisitor<void>
+  {
+  };
+
+  luci::CircleNonMaxSuppressionV5 nmsv5_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(nmsv5_node.accept(&tv), std::exception);
+}
+
+TEST(CircleNonMaxSuppressionV5Test, visit_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeVisitor<void>
+  {
+  };
+
+  luci::CircleNonMaxSuppressionV5 nmsv5_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(nmsv5_node.accept(&tv), std::exception);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp b/compiler/luci/lang/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp

new file mode 100644 (file)

index 0000000..7b427ea
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleNonMaxSuppressionV5Out.h"
+
+#include "luci/IR/CircleDialect.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleNonMaxSuppressionV5OutTest, constructor)
+{
+  luci::CircleNonMaxSuppressionV5Out vout_node;
+
+  ASSERT_EQ(luci::CircleDialect::get(), vout_node.dialect());
+  ASSERT_EQ(luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT, vout_node.opcode());
+
+  ASSERT_EQ(nullptr, vout_node.input());
+  ASSERT_EQ(-1, vout_node.index());
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleSparseToDense.test.cpp b/compiler/luci/lang/src/Nodes/CircleSparseToDense.test.cpp

index de3cf6e..03f612b 100644 (file)
--- a/compiler/luci/lang/src/Nodes/CircleSparseToDense.test.cpp
+++ b/compiler/luci/lang/src/Nodes/CircleSparseToDense.test.cpp
@@ -33,7 +33,7 @@ TEST(CircleSparseToDenseTest, constructor)
    ASSERT_EQ(nullptr, stb_node.values());
    ASSERT_EQ(nullptr, stb_node.default_value());
  
-  ASSERT_EQ(true, stb_node.validate_indices());
+  ASSERT_EQ(false, stb_node.validate_indices());
  }
  
  TEST(CircleSparseToDenseTest, input_NEG)
diff --git a/compiler/luci/lang/src/Nodes/CircleTransposeConv.test.cpp b/compiler/luci/lang/src/Nodes/CircleTransposeConv.test.cpp

index 4291697..3e0db80 100644 (file)
--- a/compiler/luci/lang/src/Nodes/CircleTransposeConv.test.cpp
+++ b/compiler/luci/lang/src/Nodes/CircleTransposeConv.test.cpp
@@ -69,8 +69,8 @@ TEST(CircleTransposeConvTest, arity_NEG)
  {
    luci::CircleTransposeConv trc_node;
  
-  ASSERT_NO_THROW(trc_node.arg(2));
-  ASSERT_THROW(trc_node.arg(3), std::out_of_range);
+  ASSERT_NO_THROW(trc_node.arg(3));
+  ASSERT_THROW(trc_node.arg(4), std::out_of_range);
  }
  
  TEST(CircleTransposeConvTest, visit_mutable_NEG)
diff --git a/compiler/luci/logex/src/FormattedGraph.cpp b/compiler/luci/logex/src/FormattedGraph.cpp

index f04a418..bb7c73d 100644 (file)
--- a/compiler/luci/logex/src/FormattedGraph.cpp
+++ b/compiler/luci/logex/src/FormattedGraph.cpp
@@ -245,10 +245,12 @@ private:
    IMPLEMENT(luci::CircleMul)
    IMPLEMENT(luci::CircleNeg)
    IMPLEMENT(luci::CircleNonMaxSuppressionV4)
+  IMPLEMENT(luci::CircleNonMaxSuppressionV5)
    IMPLEMENT(luci::CircleNotEqual)
    IMPLEMENT(luci::CircleOneHot)
    IMPLEMENT(luci::CirclePack)
    IMPLEMENT(luci::CirclePad)
+  IMPLEMENT(luci::CirclePadV2)
    IMPLEMENT(luci::CirclePow)
    IMPLEMENT(luci::CirclePRelu)
    IMPLEMENT(luci::CircleRange)
@@ -306,6 +308,7 @@ private:
    IMPLEMENT(luci::CircleOutput)
    IMPLEMENT(luci::CircleIfOut)
    IMPLEMENT(luci::CircleNonMaxSuppressionV4Out)
+  IMPLEMENT(luci::CircleNonMaxSuppressionV5Out)
    IMPLEMENT(luci::CircleSplitOut)
    IMPLEMENT(luci::CircleSplitVOut)
    IMPLEMENT(luci::CircleTopKV2Out)
@@ -380,6 +383,739 @@ bool use_ido(const locop::SymbolTable *tbl, const CIRCLENODE *node, locop::NodeS
    return true;
  }
  
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleAddN *node,
+                  locop::NodeSummary &s)
+{
+  for (uint32_t i = 0; i < node->arity(); ++i)
+    s.args().append("inputs", tbl->lookup(node->inputs(i)));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleAveragePool2D *node,
+                  locop::NodeSummary &s)
+{
+  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
+
+  s.args().append("value", tbl->lookup(node->value()));
+  s.args().append("filter(h,w)", to_str(node->filter()));
+  s.args().append("stride(h,w)", to_str(node->stride()));
+  s.args().append("padding", to_str(node->padding()));
+  s.args().append("fused", to_str(node->fusedActivationFunction()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBatchMatMul *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("x", tbl->lookup(node->x()));
+  s.args().append("y", tbl->lookup(node->y()));
+  s.args().append("adj_x", to_str(node->adj_x()));
+  s.args().append("adj_y", to_str(node->adj_y()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBatchToSpaceND *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("block_shape", tbl->lookup(node->block_shape()));
+  s.args().append("crops", tbl->lookup(node->crops()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleCast *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("x", tbl->lookup(node->x()));
+  s.args().append("in_data_type", to_str(node->in_data_type()));
+  s.args().append("out_data_type", to_str(node->out_data_type()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleConcatenation *node,
+                  locop::NodeSummary &s)
+{
+  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
+
+  for (uint32_t i = 0; i < node->numValues(); ++i)
+    s.args().append("values", tbl->lookup(node->values(i)));
+  s.args().append("axis", pepper::str(node->axis()));
+  s.args().append("fused", to_str(node->fusedActivationFunction()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleConv2D *node,
+                  locop::NodeSummary &s)
+{
+  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
+  assert(node->padding() != luci::Padding::UNDEFINED);
+
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("filter", tbl->lookup(node->filter()));
+  s.args().append("bias", tbl->lookup(node->bias()));
+  s.args().append("stride(h,w)", to_str(node->stride()));
+  s.args().append("dilation(h,w)", to_str(node->dilation()));
+  s.args().append("padding", to_str(node->padding()));
+  s.args().append("fused", to_str(node->fusedActivationFunction()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleCustom *node,
+                  locop::NodeSummary &s)
+{
+  for (uint32_t i = 0; i < node->numInputs(); i++)
+  {
+    s.args().append("input" + std::to_string(i), tbl->lookup(node->inputs(i)));
+  }
+  s.args().append("custom_code", node->custom_code());
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleDepthToSpace *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("block_size", std::to_string(node->block_size()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleDepthwiseConv2D *node,
+                  locop::NodeSummary &s)
+{
+  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
+  assert(node->padding() != luci::Padding::UNDEFINED);
+
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("filter", tbl->lookup(node->filter()));
+  s.args().append("bias", tbl->lookup(node->bias()));
+  s.args().append("stride(h,w)", to_str(node->stride()));
+  s.args().append("dilation(h,w)", to_str(node->dilation()));
+  s.args().append("padding", to_str(node->padding()));
+  s.args().append("depthMultiplier", std::to_string(node->depthMultiplier()));
+  s.args().append("fused", to_str(node->fusedActivationFunction()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleExpandDims *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("axis", tbl->lookup(node->axis()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleFill *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("dims", tbl->lookup(node->dims()));
+  s.args().append("value", tbl->lookup(node->value()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleFullyConnected *node,
+                  locop::NodeSummary &s)
+{
+  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
+
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("weights", tbl->lookup(node->weights()));
+  s.args().append("bias", tbl->lookup(node->bias()));
+  s.args().append("fused", to_str(node->fusedActivationFunction()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleGather *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("params", tbl->lookup(node->params()));
+  s.args().append("indices", tbl->lookup(node->indices()));
+  s.args().append("axis", pepper::str(node->axis()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleGatherNd *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("params", tbl->lookup(node->params()));
+  s.args().append("indices", tbl->lookup(node->indices()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleIf *node, locop::NodeSummary &s)
+{
+  s.args().append("cond", tbl->lookup(node->cond()));
+  for (uint32_t i = 0; i < node->input_count(); ++i)
+    s.args().append("input", tbl->lookup(node->input(i)));
+
+  if (node->then_graph() != nullptr)
+    s.args().append("then_graph", node->then_graph()->name());
+  else
+    s.args().append("then_branch", pepper::str(node->then_branch()));
+
+  if (node->else_graph() != nullptr)
+    s.args().append("else_graph", node->else_graph()->name());
+  else
+    s.args().append("else_branch", pepper::str(node->else_branch()));
+
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleL2Normalize *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("x", tbl->lookup(node->x()));
+  s.args().append("fused_activation_function", to_str(node->fusedActivationFunction()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleLeakyRelu *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("features", tbl->lookup(node->features()));
+  s.args().append("alpha", std::to_string(node->alpha()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleLocalResponseNormalization *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("radius", pepper::str(node->radius()));
+  s.args().append("bias", pepper::str(node->bias()));
+  s.args().append("alpha", pepper::str(node->alpha()));
+  s.args().append("beta", pepper::str(node->beta()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleLogSoftmax *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("logits", tbl->lookup(node->logits()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMatrixDiag *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("diagonal", tbl->lookup(node->diagonal()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMatrixSetDiag *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("diagonal", tbl->lookup(node->diagonal()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMaxPool2D *node,
+                  locop::NodeSummary &s)
+{
+  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
+
+  s.args().append("value", tbl->lookup(node->value()));
+  s.args().append("filter(h,w)", to_str(node->filter()));
+  s.args().append("stride(h,w)", to_str(node->stride()));
+  s.args().append("padding", to_str(node->padding()));
+  s.args().append("fused", to_str(node->fusedActivationFunction()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMirrorPad *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("paddings", tbl->lookup(node->paddings()));
+  s.args().append("mode", to_str(node->mode()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleNonMaxSuppressionV4 *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("boxes", tbl->lookup(node->boxes()));
+  s.args().append("scores", tbl->lookup(node->scores()));
+  s.args().append("max_output_size", tbl->lookup(node->max_output_size()));
+  s.args().append("iou_threshold", tbl->lookup(node->iou_threshold()));
+  s.args().append("score_threshold", tbl->lookup(node->score_threshold()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleNonMaxSuppressionV5 *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("boxes", tbl->lookup(node->boxes()));
+  s.args().append("scores", tbl->lookup(node->scores()));
+  s.args().append("max_output_size", tbl->lookup(node->max_output_size()));
+  s.args().append("iou_threshold", tbl->lookup(node->iou_threshold()));
+  s.args().append("score_threshold", tbl->lookup(node->score_threshold()));
+  s.args().append("soft_nms_sigma", tbl->lookup(node->soft_nms_sigma()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleOneHot *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("indices", tbl->lookup(node->indices()));
+  s.args().append("depth", tbl->lookup(node->depth()));
+  s.args().append("on_value", tbl->lookup(node->on_value()));
+  s.args().append("off_value", tbl->lookup(node->off_value()));
+  s.args().append("axis", pepper::str(node->axis()));
+
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePack *node,
+                  locop::NodeSummary &s)
+{
+  for (uint32_t i = 0; i < node->values_count(); ++i)
+    s.args().append("values", tbl->lookup(node->values(i)));
+  s.args().append("values_count", pepper::str(node->values_count()));
+  s.args().append("axis", pepper::str(node->axis()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePad *node, locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("paddings", tbl->lookup(node->paddings()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePadV2 *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("paddings", tbl->lookup(node->paddings()));
+  s.args().append("constant_values", tbl->lookup(node->constant_values()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePRelu *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("alpha", tbl->lookup(node->alpha()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleRange *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("start", tbl->lookup(node->start()));
+  s.args().append("limit", tbl->lookup(node->limit()));
+  s.args().append("delta", tbl->lookup(node->delta()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleReshape *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("tensor", tbl->lookup(node->tensor()));
+  s.args().append("shape", tbl->lookup(node->shape()));
+  // TODO Show newShape info
+  s.state(locop::NodeSummary::State::PartiallyKnown);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleResizeBilinear *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("size", tbl->lookup(node->size()));
+  s.args().append("align_corners", node->align_corners() ? "true" : "false");
+  s.args().append("half_pixel_centers", node->half_pixel_centers() ? "true" : "false");
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleResizeNearestNeighbor *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("size", tbl->lookup(node->size()));
+  s.args().append("align_corners", node->align_corners() ? "true" : "false");
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleReverseSequence *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("seq_lengths", tbl->lookup(node->seq_lengths()));
+  s.args().append("seq_axis", std::to_string(node->seq_axis()));
+  s.args().append("batch_axis", std::to_string(node->batch_axis()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleReverseV2 *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("tensor", tbl->lookup(node->tensor()));
+  s.args().append("axis", tbl->lookup(node->axis()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleScatterNd *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("indices", tbl->lookup(node->indices()));
+  s.args().append("updates", tbl->lookup(node->updates()));
+  s.args().append("shape", tbl->lookup(node->shape()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSegmentSum *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("segment_ids", tbl->lookup(node->segment_ids()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSelect *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("condition", tbl->lookup(node->condition()));
+  s.args().append("t", tbl->lookup(node->t()));
+  s.args().append("e", tbl->lookup(node->e()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSelectV2 *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("condition", tbl->lookup(node->condition()));
+  s.args().append("t", tbl->lookup(node->t()));
+  s.args().append("e", tbl->lookup(node->e()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleShape *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("out_type", to_str(node->out_type()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSlice *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("begin", tbl->lookup(node->begin()));
+  s.args().append("size", tbl->lookup(node->size()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSoftmax *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("logits", tbl->lookup(node->logits()));
+  s.args().append("beta", pepper::str(node->beta()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSpaceToBatchND *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("block_shape", tbl->lookup(node->block_shape()));
+  s.args().append("paddings", tbl->lookup(node->paddings()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSpaceToDepth *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("block_size", pepper::str(node->block_size()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSparseToDense *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("indices", tbl->lookup(node->indices()));
+  s.args().append("output_shape", tbl->lookup(node->output_shape()));
+  s.args().append("values", tbl->lookup(node->values()));
+  s.args().append("default_value", tbl->lookup(node->default_value()));
+  s.args().append("Validate_indices", pepper::str(node->validate_indices()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSplit *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("split_dim", tbl->lookup(node->split_dim()));
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("num_split", pepper::str(node->num_split()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSplitV *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("size_splits", tbl->lookup(node->size_splits()));
+  s.args().append("split_dim", tbl->lookup(node->split_dim()));
+  s.args().append("num_split", pepper::str(node->num_split()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSqueeze *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+
+  std::stringstream ss{"("};
+  for (size_t i = 0; i < node->squeeze_dims().size(); ++i)
+  {
+    if (i != 0)
+      ss << ", ";
+    ss << node->squeeze_dims()[i];
+  }
+  ss << ")";
+  s.args().append("squeeze_dims", ss.str());
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleStridedSlice *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("begin", tbl->lookup(node->begin()));
+  s.args().append("end", tbl->lookup(node->end()));
+  s.args().append("strides", tbl->lookup(node->strides()));
+  s.args().append("begin_mask", pepper::str(node->begin_mask()));
+  s.args().append("end_mask", pepper::str(node->end_mask()));
+  s.args().append("ellipsis_mask", pepper::str(node->ellipsis_mask()));
+  s.args().append("new_axis_mask", pepper::str(node->new_axis_mask()));
+  s.args().append("shrink_axis_mask", pepper::str(node->shrink_axis_mask()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTile *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("multiples", tbl->lookup(node->multiples()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTopKV2 *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("k", tbl->lookup(node->k()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTranspose *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("a", tbl->lookup(node->a()));
+  s.args().append("perm", tbl->lookup(node->perm()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTransposeConv *node,
+                  locop::NodeSummary &s)
+{
+  assert(node->padding() != luci::Padding::UNDEFINED);
+
+  s.args().append("inputSizes", tbl->lookup(node->inputSizes()));
+  s.args().append("filter", tbl->lookup(node->filter()));
+  s.args().append("outBackprop", tbl->lookup(node->outBackprop()));
+  s.args().append("stride(h,w)", to_str(node->stride()));
+  s.args().append("padding", to_str(node->padding()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUnique *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("idx_out_type", to_str(node->idx_out_type()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUnpack *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("value", tbl->lookup(node->value()));
+  s.args().append("num", pepper::str(node->num()));
+  s.args().append("axis", pepper::str(node->axis()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleWhere *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("condition", tbl->lookup(node->condition()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleWhile *node,
+                  locop::NodeSummary &s)
+{
+  for (uint32_t i = 0; i < node->input_count(); ++i)
+    s.args().append("input", tbl->lookup(node->input(i)));
+
+  if (node->cond_graph() != nullptr)
+    s.args().append("cond_graph", node->cond_graph()->name());
+  else
+    s.args().append("cond_branch", pepper::str(node->cond_branch()));
+
+  if (node->body_graph() != nullptr)
+    s.args().append("body_graph", node->body_graph()->name());
+  else
+    s.args().append("body_branch", pepper::str(node->body_branch()));
+
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTopKV2Out *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("topkv2", tbl->lookup(node->input()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUniqueOut *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("unique", tbl->lookup(node->input()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUnpackOut *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("unpack", tbl->lookup(node->input()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleWhileOut *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("while", tbl->lookup(node->input()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleOutput *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("from", tbl->lookup(node->from()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBCQFullyConnected *node,
+                  locop::NodeSummary &s)
+{
+  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("weights_scales", tbl->lookup(node->weights_scales()));
+  s.args().append("weights_binary", tbl->lookup(node->weights_binary()));
+  s.args().append("bias", tbl->lookup(node->bias()));
+  s.args().append("weights_clusters", tbl->lookup(node->weights_clusters()));
+  s.args().append("fused", to_str(node->fusedActivationFunction()));
+  s.args().append("weights_hidden_size", pepper::str(node->weights_hidden_size()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBCQGather *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input_scales", tbl->lookup(node->input_scales()));
+  s.args().append("input_binary", tbl->lookup(node->input_binary()));
+  s.args().append("indices", tbl->lookup(node->indices()));
+  s.args().append("input_clusters", tbl->lookup(node->input_clusters()));
+  s.args().append("axis", pepper::str(node->axis()));
+  s.args().append("input_hidden_size", pepper::str(node->input_hidden_size()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleInstanceNorm *node,
+                  locop::NodeSummary &s)
+{
+  auto fused = node->fusedActivationFunction();
+  assert(fused != luci::FusedActFunc::UNDEFINED);
+
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("gamma", tbl->lookup(node->gamma()));
+  s.args().append("beta", tbl->lookup(node->beta()));
+  s.args().append("epsilon", pepper::str(node->epsilon()));
+  s.args().append("fused_activation_function", to_str(fused));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
  bool CircleNodeSummaryBuilderBase::build(const loco::Node *node, locop::NodeSummary &s) const
  {
    if (node->dialect() != luci::CircleDialect::get())
@@ -409,11 +1145,7 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleAdd *node, locop::NodeS
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleAddN *node, locop::NodeSummary &s) const
  {
-  for (uint32_t i = 0; i < node->arity(); ++i)
-    s.args().append("inputs", tbl()->lookup(node->inputs(i)));
-
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleArgMax *node, locop::NodeSummary &s) const
@@ -429,49 +1161,24 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleArgMin *node, locop::No
  bool CircleNodeSummaryBuilder::summary(const luci::CircleAveragePool2D *node,
                                         locop::NodeSummary &s) const
  {
-  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
-  s.args().append("value", tbl()->lookup(node->value()));
-  s.args().append("filter(h,w)", to_str(node->filter()));
-  s.args().append("stride(h,w)", to_str(node->stride()));
-  s.args().append("padding", to_str(node->padding()));
-  s.args().append("fused", to_str(node->fusedActivationFunction()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleBatchMatMul *node,
                                         locop::NodeSummary &s) const
  {
-  s.args().append("x", tbl()->lookup(node->x()));
-  s.args().append("y", tbl()->lookup(node->y()));
-  s.args().append("adj_x", to_str(node->adj_x()));
-  s.args().append("adj_y", to_str(node->adj_y()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleBatchToSpaceND *node,
                                         locop::NodeSummary &s) const
  {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("block_shape", tbl()->lookup(node->block_shape()));
-  s.args().append("crops", tbl()->lookup(node->crops()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleCast *node, locop::NodeSummary &s) const
  {
-  s.args().append("x", tbl()->lookup(node->x()));
-  s.args().append("in_data_type", to_str(node->in_data_type()));
-  s.args().append("out_data_type", to_str(node->out_data_type()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleCeil *node, locop::NodeSummary &s) const
@@ -482,14 +1189,7 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleCeil *node, locop::Node
  bool CircleNodeSummaryBuilder::summary(const luci::CircleConcatenation *node,
                                         locop::NodeSummary &s) const
  {
-  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
-  for (uint32_t i = 0; i < node->numValues(); ++i)
-    s.args().append("values", tbl()->lookup(node->values(i)));
-  s.args().append("axis", pepper::str(node->axis()));
-  s.args().append("fused", to_str(node->fusedActivationFunction()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleConst *, locop::NodeSummary &s) const
@@ -500,22 +1200,7 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleConst *, locop::NodeSum
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleConv2D *node, locop::NodeSummary &s) const
  {
-  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-  assert(node->padding() != luci::Padding::UNDEFINED);
-
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("filter", tbl()->lookup(node->filter()));
-  s.args().append("bias", tbl()->lookup(node->bias()));
-
-  s.args().append("stride(h,w)", to_str(node->stride()));
-  s.args().append("dilation(h,w)", to_str(node->dilation()));
-
-  s.args().append("padding", to_str(node->padding()));
-  s.args().append("fused", to_str(node->fusedActivationFunction()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleCos *node, locop::NodeSummary &s) const
@@ -525,45 +1210,19 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleCos *node, locop::NodeS
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleCustom *node, locop::NodeSummary &s) const
  {
-  for (uint32_t i = 0; i < node->numInputs(); i++)
-  {
-    s.args().append("input" + std::to_string(i), tbl()->lookup(node->inputs(i)));
-  }
-  s.args().append("custom_code", node->custom_code());
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleDepthToSpace *node,
                                         locop::NodeSummary &s) const
  {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("block_size", std::to_string(node->block_size()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleDepthwiseConv2D *node,
                                         locop::NodeSummary &s) const
  {
-  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-  assert(node->padding() != luci::Padding::UNDEFINED);
-
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("filter", tbl()->lookup(node->filter()));
-  s.args().append("bias", tbl()->lookup(node->bias()));
-
-  s.args().append("stride(h,w)", to_str(node->stride()));
-  s.args().append("dilation(h,w)", to_str(node->dilation()));
-  s.args().append("padding", to_str(node->padding()));
-  s.args().append("depthMultiplier", std::to_string(node->depthMultiplier()));
-  s.args().append("fused", to_str(node->fusedActivationFunction()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleDiv *node, locop::NodeSummary &s) const
@@ -583,11 +1242,8 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleExp *node, locop::NodeS
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleExpandDims *node,
                                         locop::NodeSummary &s) const
-{
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("axis", tbl()->lookup(node->axis()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+{
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleFloor *node, locop::NodeSummary &s) const
@@ -609,44 +1265,24 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleFloorMod *node,
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleFill *node, locop::NodeSummary &s) const
  {
-  s.args().append("dims", tbl()->lookup(node->dims()));
-  s.args().append("value", tbl()->lookup(node->value()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleFullyConnected *node,
                                         locop::NodeSummary &s) const
  {
-  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("weights", tbl()->lookup(node->weights()));
-  s.args().append("bias", tbl()->lookup(node->bias()));
-  s.args().append("fused", to_str(node->fusedActivationFunction()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleGather *node, locop::NodeSummary &s) const
  {
-  s.args().append("params", tbl()->lookup(node->params()));
-  s.args().append("indices", tbl()->lookup(node->indices()));
-  s.args().append("axis", pepper::str(node->axis()));
-
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleGatherNd *node,
                                         locop::NodeSummary &s) const
  {
-  s.args().append("params", tbl()->lookup(node->params()));
-  s.args().append("indices", tbl()->lookup(node->indices()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleGreater *node, locop::NodeSummary &s) const
@@ -662,32 +1298,13 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleGreaterEqual *node,
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleIf *node, locop::NodeSummary &s) const
  {
-  s.args().append("cond", tbl()->lookup(node->cond()));
-  for (uint32_t i = 0; i < node->input_count(); ++i)
-    s.args().append("input", tbl()->lookup(node->input(i)));
-
-  if (node->then_graph() != nullptr)
-    s.args().append("then_graph", node->then_graph()->name());
-  else
-    s.args().append("then_branch", pepper::str(node->then_branch()));
-
-  if (node->else_graph() != nullptr)
-    s.args().append("else_graph", node->else_graph()->name());
-  else
-    s.args().append("else_branch", pepper::str(node->else_branch()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleL2Normalize *node,
                                         locop::NodeSummary &s) const
  {
-  s.args().append("x", tbl()->lookup(node->x()));
-  s.args().append("fused_activation_function", to_str(node->fusedActivationFunction()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleLess *node, locop::NodeSummary &s) const
@@ -704,22 +1321,13 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleLessEqual *node,
  bool CircleNodeSummaryBuilder::summary(const luci::CircleLeakyRelu *node,
                                         locop::NodeSummary &s) const
  {
-  s.args().append("features", tbl()->lookup(node->features()));
-  s.args().append("alpha", std::to_string(node->alpha()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleLocalResponseNormalization *node,
                                         locop::NodeSummary &s) const
  {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("radius", pepper::str(node->radius()));
-  s.args().append("bias", pepper::str(node->bias()));
-  s.args().append("alpha", pepper::str(node->alpha()));
-  s.args().append("beta", pepper::str(node->beta()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleLog *node, locop::NodeSummary &s) const
@@ -754,26 +1362,19 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleLogistic *node,
  bool CircleNodeSummaryBuilder::summary(const luci::CircleLogSoftmax *node,
                                         locop::NodeSummary &s) const
  {
-  s.args().append("logits", tbl()->lookup(node->logits()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleMatrixDiag *node,
                                         locop::NodeSummary &s) const
  {
-  s.args().append("diagonal", tbl()->lookup(node->diagonal()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleMatrixSetDiag *node,
                                         locop::NodeSummary &s) const
  {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("diagonal", tbl()->lookup(node->diagonal()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleMaximum *node, locop::NodeSummary &s) const
@@ -784,17 +1385,7 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleMaximum *node, locop::N
  bool CircleNodeSummaryBuilder::summary(const luci::CircleMaxPool2D *node,
                                         locop::NodeSummary &s) const
  {
-  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
-  s.args().append("value", tbl()->lookup(node->value()));
-  s.args().append("filter(h,w)", to_str(node->filter()));
-  s.args().append("stride(h,w)", to_str(node->stride()));
-  s.args().append("padding", to_str(node->padding()));
-  s.args().append("fused", to_str(node->fusedActivationFunction()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleMean *node, locop::NodeSummary &s) const
@@ -810,11 +1401,7 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleMinimum *node, locop::N
  bool CircleNodeSummaryBuilder::summary(const luci::CircleMirrorPad *node,
                                         locop::NodeSummary &s) const
  {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("paddings", tbl()->lookup(node->paddings()));
-  s.args().append("mode", to_str(node->mode()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleMul *node, locop::NodeSummary &s) const
@@ -830,14 +1417,13 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleNeg *node, locop::NodeS
  bool CircleNodeSummaryBuilder::summary(const luci::CircleNonMaxSuppressionV4 *node,
                                         locop::NodeSummary &s) const
  {
-  s.args().append("boxes", pepper::str(node->boxes()));
-  s.args().append("scores", pepper::str(node->scores()));
-  s.args().append("max_output_size", pepper::str(node->max_output_size()));
-  s.args().append("iou_threshold", pepper::str(node->iou_threshold()));
-  s.args().append("score_threshold", pepper::str(node->score_threshold()));
+  return summary_node(tbl(), node, s);
+}
  
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+bool CircleNodeSummaryBuilder::summary(const luci::CircleNonMaxSuppressionV5 *node,
+                                       locop::NodeSummary &s) const
+{
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleNotEqual *node,
@@ -848,32 +1434,22 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleNotEqual *node,
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleOneHot *node, locop::NodeSummary &s) const
  {
-  s.args().append("indices", tbl()->lookup(node->indices()));
-  s.args().append("depth", tbl()->lookup(node->depth()));
-  s.args().append("on_value", tbl()->lookup(node->on_value()));
-  s.args().append("off_value", tbl()->lookup(node->off_value()));
-  s.args().append("axis", pepper::str(node->axis()));
-
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CirclePack *node, locop::NodeSummary &s) const
  {
-  for (uint32_t i = 0; i < node->values_count(); ++i)
-    s.args().append("values", tbl()->lookup(node->values(i)));
-  s.args().append("values_count", pepper::str(node->values_count()));
-  s.args().append("axis", pepper::str(node->axis()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CirclePad *node, locop::NodeSummary &s) const
  {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("paddings", tbl()->lookup(node->paddings()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CirclePadV2 *node, locop::NodeSummary &s) const
+{
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CirclePow *node, locop::NodeSummary &s) const
@@ -883,20 +1459,12 @@ bool CircleNodeSummaryBuilder::summary(const luci::CirclePow *node, locop::NodeS
  
  bool CircleNodeSummaryBuilder::summary(const luci::CirclePRelu *node, locop::NodeSummary &s) const
  {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("alpha", tbl()->lookup(node->alpha()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleRange *node, locop::NodeSummary &s) const
  {
-  s.args().append("start", tbl()->lookup(node->start()));
-  s.args().append("limit", tbl()->lookup(node->limit()));
-  s.args().append("delta", tbl()->lookup(node->delta()));
-
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleRank *node, locop::NodeSummary &s) const
@@ -946,52 +1514,31 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleReluN1To1 *node,
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleReshape *node, locop::NodeSummary &s) const
  {
-  s.args().append("tensor", tbl()->lookup(node->tensor()));
-  s.args().append("shape", tbl()->lookup(node->shape()));
-  // TODO Show newShape info
-  s.state(locop::NodeSummary::State::PartiallyKnown);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleResizeBilinear *node,
                                         locop::NodeSummary &s) const
  {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("size", tbl()->lookup(node->size()));
-  s.args().append("align_corners", node->align_corners() ? "true" : "false");
-  s.args().append("half_pixel_centers", node->half_pixel_centers() ? "true" : "false");
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleResizeNearestNeighbor *node,
                                         locop::NodeSummary &s) const
  {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("size", tbl()->lookup(node->size()));
-  s.args().append("align_corners", node->align_corners() ? "true" : "false");
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleReverseSequence *node,
                                         locop::NodeSummary &s) const
  {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("seq_lengths", tbl()->lookup(node->seq_lengths()));
-  s.args().append("seq_axis", std::to_string(node->seq_axis()));
-  s.args().append("batch_axis", std::to_string(node->batch_axis()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleReverseV2 *node,
                                         locop::NodeSummary &s) const
  {
-  s.args().append("tensor", tbl()->lookup(node->tensor()));
-  s.args().append("axis", tbl()->lookup(node->axis()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleRound *node, locop::NodeSummary &s) const
@@ -1007,47 +1554,29 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleRsqrt *node, locop::Nod
  bool CircleNodeSummaryBuilder::summary(const luci::CircleScatterNd *node,
                                         locop::NodeSummary &s) const
  {
-  s.args().append("indices", tbl()->lookup(node->indices()));
-  s.args().append("updates", tbl()->lookup(node->updates()));
-  s.args().append("shape", tbl()->lookup(node->shape()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleSegmentSum *node,
                                         locop::NodeSummary &s) const
  {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("segment_ids", tbl()->lookup(node->segment_ids()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleSelect *node, locop::NodeSummary &s) const
  {
-  s.args().append("condition", tbl()->lookup(node->condition()));
-  s.args().append("t", tbl()->lookup(node->t()));
-  s.args().append("e", tbl()->lookup(node->e()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleSelectV2 *node,
                                         locop::NodeSummary &s) const
  {
-  s.args().append("condition", tbl()->lookup(node->condition()));
-  s.args().append("t", tbl()->lookup(node->t()));
-  s.args().append("e", tbl()->lookup(node->e()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleShape *node, locop::NodeSummary &s) const
  {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("out_type", to_str(node->out_type()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleSin *node, locop::NodeSummary &s) const
@@ -1057,82 +1586,40 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleSin *node, locop::NodeS
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleSlice *node, locop::NodeSummary &s) const
  {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("begin", tbl()->lookup(node->begin()));
-  s.args().append("size", tbl()->lookup(node->size()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleSoftmax *node, locop::NodeSummary &s) const
  {
-  s.args().append("logits", tbl()->lookup(node->logits()));
-  s.args().append("beta", pepper::str(node->beta()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleSpaceToBatchND *node,
                                         locop::NodeSummary &s) const
  {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("block_shape", tbl()->lookup(node->block_shape()));
-  s.args().append("paddings", tbl()->lookup(node->paddings()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleSpaceToDepth *node,
                                         locop::NodeSummary &s) const
  {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("block_size", pepper::str(node->block_size()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleSparseToDense *node,
                                         locop::NodeSummary &s) const
  {
-  s.args().append("indices", tbl()->lookup(node->indices()));
-  s.args().append("output_shape", tbl()->lookup(node->output_shape()));
-  s.args().append("values", tbl()->lookup(node->values()));
-  s.args().append("default_value", tbl()->lookup(node->default_value()));
-
-  s.args().append("Validate_indices", pepper::str(node->validate_indices()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleSplit *node, locop::NodeSummary &s) const
  {
-  s.args().append("split_dim", tbl()->lookup(node->split_dim()));
-  s.args().append("input", tbl()->lookup(node->input()));
-
-  s.args().append("num_split", pepper::str(node->num_split()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleSplitV *node, locop::NodeSummary &s) const
  {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("size_splits", tbl()->lookup(node->size_splits()));
-  s.args().append("split_dim", tbl()->lookup(node->split_dim()));
-
-  s.args().append("num_split", pepper::str(node->num_split()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleSqrt *node, locop::NodeSummary &s) const
@@ -1153,38 +1640,13 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleSquaredDifference *node
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleSqueeze *node, locop::NodeSummary &s) const
  {
-  s.args().append("input", tbl()->lookup(node->input()));
-
-  std::stringstream ss{"("};
-  for (size_t i = 0; i < node->squeeze_dims().size(); ++i)
-  {
-    if (i != 0)
-      ss << ", ";
-    ss << node->squeeze_dims()[i];
-  }
-  ss << ")";
-
-  s.args().append("squeeze_dims", ss.str());
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleStridedSlice *node,
                                         locop::NodeSummary &s) const
  {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("begin", tbl()->lookup(node->begin()));
-  s.args().append("end", tbl()->lookup(node->end()));
-  s.args().append("strides", tbl()->lookup(node->strides()));
-
-  s.args().append("begin_mask", pepper::str(node->begin_mask()));
-  s.args().append("end_mask", pepper::str(node->end_mask()));
-  s.args().append("ellipsis_mask", pepper::str(node->ellipsis_mask()));
-  s.args().append("new_axis_mask", pepper::str(node->new_axis_mask()));
-  s.args().append("shrink_axis_mask", pepper::str(node->shrink_axis_mask()));
-
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleSub *node, locop::NodeSummary &s) const
@@ -1204,92 +1666,44 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleTanh *node, locop::Node
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleTile *node, locop::NodeSummary &s) const
  {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("multiples", tbl()->lookup(node->multiples()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleTopKV2 *node, locop::NodeSummary &s) const
  {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("k", tbl()->lookup(node->k()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleTranspose *node,
                                         locop::NodeSummary &s) const
  {
-  s.args().append("a", tbl()->lookup(node->a()));
-  s.args().append("perm", tbl()->lookup(node->perm()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleTransposeConv *node,
                                         locop::NodeSummary &s) const
  {
-  assert(node->padding() != luci::Padding::UNDEFINED);
-
-  s.args().append("inputSizes", tbl()->lookup(node->inputSizes()));
-  s.args().append("filter", tbl()->lookup(node->filter()));
-  s.args().append("outBackprop", tbl()->lookup(node->outBackprop()));
-
-  s.args().append("stride(h,w)", to_str(node->stride()));
-  s.args().append("padding", to_str(node->padding()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleUnique *node, locop::NodeSummary &s) const
  {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("idx_out_type", to_str(node->idx_out_type()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleUnpack *node, locop::NodeSummary &s) const
  {
-  s.args().append("value", tbl()->lookup(node->value()));
-
-  s.args().append("num", pepper::str(node->num()));
-  s.args().append("axis", pepper::str(node->axis()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleWhere *node, locop::NodeSummary &s) const
  {
-  s.args().append("condition", tbl()->lookup(node->condition()));
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleWhile *node, locop::NodeSummary &s) const
  {
-  for (uint32_t i = 0; i < node->input_count(); ++i)
-    s.args().append("input", tbl()->lookup(node->input(i)));
-
-  if (node->cond_graph() != nullptr)
-    s.args().append("cond_graph", node->cond_graph()->name());
-  else
-    s.args().append("cond_branch", pepper::str(node->cond_branch()));
-
-  if (node->body_graph() != nullptr)
-    s.args().append("body_graph", node->body_graph()->name());
-  else
-    s.args().append("body_branch", pepper::str(node->body_branch()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleZerosLike *node,
@@ -1313,29 +1727,19 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleSplitVOut *node,
  bool CircleNodeSummaryBuilder::summary(const luci::CircleTopKV2Out *node,
                                         locop::NodeSummary &s) const
  {
-  s.args().append("topkv2", tbl()->lookup(node->input()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleUniqueOut *node,
                                         locop::NodeSummary &s) const
  {
-  s.args().append("unique", tbl()->lookup(node->input()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleUnpackOut *node,
                                         locop::NodeSummary &s) const
  {
-  s.args().append("unpack", tbl()->lookup(node->input()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleIfOut *node, locop::NodeSummary &s) const
@@ -1349,14 +1753,16 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleNonMaxSuppressionV4Out
    return use_input(tbl(), node, s);
  }
  
-bool CircleNodeSummaryBuilder::summary(const luci::CircleWhileOut *node,
+bool CircleNodeSummaryBuilder::summary(const luci::CircleNonMaxSuppressionV5Out *node,
                                         locop::NodeSummary &s) const
  {
-  s.args().append("while", tbl()->lookup(node->input()));
-
-  s.state(locop::NodeSummary::State::Complete);
+  return use_input(tbl(), node, s);
+}
  
-  return true;
+bool CircleNodeSummaryBuilder::summary(const luci::CircleWhileOut *node,
+                                       locop::NodeSummary &s) const
+{
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleInput *, locop::NodeSummary &s) const
@@ -1367,61 +1773,25 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleInput *, locop::NodeSum
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleOutput *node, locop::NodeSummary &s) const
  {
-  s.args().append("from", tbl()->lookup(node->from()));
-
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleBCQFullyConnected *node,
                                         locop::NodeSummary &s) const
  {
-  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("weights_scales", tbl()->lookup(node->weights_scales()));
-  s.args().append("weights_binary", tbl()->lookup(node->weights_binary()));
-  s.args().append("bias", tbl()->lookup(node->bias()));
-  s.args().append("weights_clusters", tbl()->lookup(node->weights_clusters()));
-
-  s.args().append("fused", to_str(node->fusedActivationFunction()));
-  s.args().append("weights_hidden_size", pepper::str(node->weights_hidden_size()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleBCQGather *node,
                                         locop::NodeSummary &s) const
  {
-  s.args().append("input_scales", tbl()->lookup(node->input_scales()));
-  s.args().append("input_binary", tbl()->lookup(node->input_binary()));
-  s.args().append("indices", tbl()->lookup(node->indices()));
-  s.args().append("input_clusters", tbl()->lookup(node->input_clusters()));
-
-  s.args().append("axis", pepper::str(node->axis()));
-  s.args().append("input_hidden_size", pepper::str(node->input_hidden_size()));
-
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  bool CircleNodeSummaryBuilder::summary(const luci::CircleInstanceNorm *node,
                                         locop::NodeSummary &s) const
  {
-  auto fused = node->fusedActivationFunction();
-  assert(fused != luci::FusedActFunc::UNDEFINED);
-
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("gamma", tbl()->lookup(node->gamma()));
-  s.args().append("beta", tbl()->lookup(node->beta()));
-  s.args().append("epsilon", pepper::str(node->epsilon()));
-  s.args().append("fused_activation_function", to_str(fused));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
  }
  
  } // namespace
diff --git a/compiler/luci/pass/include/luci/CircleOptimizer.h b/compiler/luci/pass/include/luci/CircleOptimizer.h

index 312749f..a832844 100644 (file)
--- a/compiler/luci/pass/include/luci/CircleOptimizer.h
+++ b/compiler/luci/pass/include/luci/CircleOptimizer.h
@@ -32,6 +32,7 @@ public:
    {
      enum Algorithm
      {
+      FuseBatchNormWithTConv,
        FuseBCQ,
        FuseInstanceNorm,
        ResolveCustomOpAdd,
@@ -39,6 +40,7 @@ public:
        ResolveCustomOpMatMul,
        QuantizeDequantizeWeights,
        QuantizeWithMinMax,
+      Requantize,
      };
  
      enum AlgorithmParameters
diff --git a/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithTConv.h b/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithTConv.h

new file mode 100644 (file)

index 0000000..d3e930a
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithTConv.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FUSE_BATCH_NORM_WITH_TCONV_PASS_H__
+#define __LUCI_FUSE_BATCH_NORM_WITH_TCONV_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fuse Batch Normalization into CircleTransposeConv
+ */
+struct FuseBatchNormWithTConvPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FuseBatchNormWithTConvPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FUSE_BATCH_NORM_WITH_TCONV_PASS_H__
diff --git a/runtime/onert/backend/cpu/ops/ReLULayer.h b/compiler/luci/pass/include/luci/Pass/RequantizePass.h

similarity index 51%

rename from runtime/onert/backend/cpu/ops/ReLULayer.h

rename to compiler/luci/pass/include/luci/Pass/RequantizePass.h

index 4ba2be7..2442b24 100644 (file)
--- a/runtime/onert/backend/cpu/ops/ReLULayer.h
+++ b/compiler/luci/pass/include/luci/Pass/RequantizePass.h
@@ -14,44 +14,39 @@
   * limitations under the License.
   */
  
-#ifndef __ONERT_BACKEND_CPU_OPS_RELULAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_RELULAYER_H__
+#ifndef __LUCI_REQUANTIZE_PASS_H__
+#define __LUCI_REQUANTIZE_PASS_H__
  
-#include <backend/IPortableTensor.h>
+#include <loco.h>
  
-#include <exec/IFunction.h>
+#include <logo/Pass.h>
  
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
+#include <luci/Pass/QuantizationParameters.h>
+
+namespace luci
  {
  
-class ReLULayer : public ::onert::exec::IFunction
+/**
+ * @brief Pass to quantize weights
+ */
+class RequantizePass : public logo::Pass
  {
  public:
-  ReLULayer();
+  RequantizePass(loco::DataType input_dtype, loco::DataType output_dtype)
+      : _input_dtype{input_dtype}, _output_dtype{output_dtype}
+  {
+    // DO NOTHING
+  }
+  virtual const char *name(void) const { return "luci::RequantizePass"; }
  
  public:
-  void reluFloat32();
-
-  void reluQuant8();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
+  bool run(loco::Graph *graph);
  
  private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
+  loco::DataType _input_dtype;
+  loco::DataType _output_dtype;
  };
  
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
+} // namespace luci
  
-#endif // __ONERT_BACKEND_CPU_OPS_RELULAYER_H__
+#endif //__LUCI_REQUANTIZE_PASS_H__
diff --git a/compiler/luci/pass/src/CircleOptimizer.cpp b/compiler/luci/pass/src/CircleOptimizer.cpp

index 2edf7a9..2ee759b 100644 (file)
--- a/compiler/luci/pass/src/CircleOptimizer.cpp
+++ b/compiler/luci/pass/src/CircleOptimizer.cpp
@@ -16,11 +16,13 @@
  
  #include "luci/CircleOptimizer.h"
  
+#include "luci/Pass/FuseBatchNormWithTConv.h"
  #include "luci/Pass/FuseBCQPass.h"
  #include "luci/Pass/FuseInstanceNormPass.h"
  #include "luci/Pass/ResolveCustomOpAddPass.h"
  #include "luci/Pass/ResolveCustomOpBatchMatMulPass.h"
  #include "luci/Pass/ResolveCustomOpMatMulPass.h"
+#include "luci/Pass/RequantizePass.h"
  #include "luci/Pass/QuantizeWithMinMaxPass.h"
  #include "luci/Pass/QuantizeDequantizeWeightsPass.h"
  // TODO add more passes
@@ -34,6 +36,7 @@
  #include "ProgressReporter.h"
  #include "CircleOptimizerUtils.h"
  
+#include <luci/IR/CircleNodes.h>
  #include <logo/Phase.h>
  
  #include <memory>
@@ -125,6 +128,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
    {
      phase.emplace_back(std::make_unique<FuseBCQPass>());
    }
+  if (_options->query(Options::Algorithm::FuseBatchNormWithTConv))
+  {
+    phase.emplace_back(std::make_unique<FuseBatchNormWithTConvPass>());
+  }
  
    // Shape inference is needed for added nodes doing above transformations
    phase.emplace_back(std::make_unique<luci::ShapeInferencePass>());
@@ -163,6 +170,14 @@ void CircleOptimizer::quantize(loco::Graph *g) const
        throw std::runtime_error("Unsupported granularity. List of supported granularity: " +
                                 to_string(fakeq_supported_granularity));
  
+    // Clear existing quantparams before doing fake quantization
+    for (auto node : loco::active_nodes(loco::output_nodes(g)))
+    {
+      auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+      if (circle_node->quantparam() != nullptr)
+        circle_node->quantparam(nullptr);
+    }
+
      luci::QuantizeDequantizeWeightsPass fake_quantizer(
          str_to_dtype(input_dtype), str_to_dtype(output_dtype), str_to_granularity(granularity));
      fake_quantizer.run(g);
@@ -196,6 +211,27 @@ void CircleOptimizer::quantize(loco::Graph *g) const
      quantizer.run(g);
    }
  
+  // Requantize
+  if (_options->query(Options::Algorithm::Requantize))
+  {
+    static const std::vector<std::string> rq_supported_input_dtype{"int8"};
+    static const std::vector<std::string> rq_supported_output_dtype{"uint8"};
+
+    auto input_dtype = _options->param(Options::AlgorithmParameters::Quantize_input_dtype);
+    auto output_dtype = _options->param(Options::AlgorithmParameters::Quantize_output_dtype);
+
+    if (!in_array(to_lower_case(input_dtype), rq_supported_input_dtype))
+      throw std::runtime_error("Unsupported input type. List of supported input types: " +
+                               to_string(rq_supported_input_dtype));
+
+    if (!in_array(to_lower_case(output_dtype), rq_supported_output_dtype))
+      throw std::runtime_error("Unsupported output type. List of supported output types: " +
+                               to_string(rq_supported_output_dtype));
+
+    luci::RequantizePass requantizer(str_to_dtype(input_dtype), str_to_dtype(output_dtype));
+    requantizer.run(g);
+  }
+
    logo::Phase phase;
  
    // Do Shape/Type inference
diff --git a/compiler/luci/pass/src/FuseBCQPass.cpp b/compiler/luci/pass/src/FuseBCQPass.cpp

index 260de5b..7aa2e3e 100644 (file)
--- a/compiler/luci/pass/src/FuseBCQPass.cpp
+++ b/compiler/luci/pass/src/FuseBCQPass.cpp
@@ -38,9 +38,9 @@ const std::string node_name_prefix(luci::NodeName node_name)
  {
    std::string prefix = node_name;
  
-  if (prefix.find("ReadVariableOp/resource/") != std::string::npos)
+  if (prefix.find("/ReadVariableOp/resource") != std::string::npos)
    {
-    const auto start_index = prefix.find("ReadVariableOp/resource/");
+    const auto start_index = prefix.find("/ReadVariableOp/resource");
  
      const auto left_prefix = prefix.substr(0, start_index);
      const auto right_prefix = prefix.substr(start_index + 24);
diff --git a/compiler/luci/pass/src/FuseBatchNormWithTConv.cpp b/compiler/luci/pass/src/FuseBatchNormWithTConv.cpp

new file mode 100644 (file)

index 0000000..e39455b
--- /dev/null
+++ b/compiler/luci/pass/src/FuseBatchNormWithTConv.cpp
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseBatchNormWithTConv.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+/**
+ *  NOTE TF's fusedBatchNorm is converted to mul and add of Circle.
+ *
+ *  BEFORE
+ *
+ *         [CircleTransposeConv]
+ *                  |
+ *                [mul]
+ *                  |
+ *                [add]
+ *  AFTER
+ *
+ *         [CircleTransposeConv]
+ */
+bool fused_batch_norm_with_tconv(luci::CircleTransposeConv *tconv)
+{
+  // check whether it has bias or not. This optimization works only if it doesn't.
+  auto bias = dynamic_cast<luci::CircleOutputExclude *>(tconv->bias());
+  if (not bias)
+    return false;
+
+  // get weight of tconv
+  auto filter = dynamic_cast<luci::CircleConst *>(tconv->filter());
+  if (not filter)
+    return false;
+  if (filter->dtype() != loco::DataType::FLOAT32)
+    return false;
+
+  // get mul node
+  auto tconv_output = loco::succs(tconv);
+  assert(tconv_output.size() == 1);
+  auto mul = dynamic_cast<luci::CircleMul *>(*tconv_output.begin());
+  if (not mul)
+    return false;
+  if (mul->dtype() != loco::DataType::FLOAT32)
+    return false;
+
+  // get add node
+  auto mul_output = loco::succs(mul);
+  assert(mul_output.size() == 1);
+  auto add = dynamic_cast<luci::CircleAdd *>(*mul_output.begin());
+  if (not add)
+    return false;
+  if (add->dtype() != loco::DataType::FLOAT32)
+    return false;
+  if (add->fusedActivationFunction() != luci::FusedActFunc::NONE &&
+      add->fusedActivationFunction() != luci::FusedActFunc::RELU6)
+    return false;
+
+  // get scale of batchnorm
+  auto scale = dynamic_cast<luci::CircleConst *>(mul->y());
+  if (not scale)
+    return false;
+
+  // scale dim(0) == tconv filter channel dim
+  if (filter->rank() != 4)
+    return false;
+  auto filter_channel_dim = filter->dim(3).value();
+  if (scale->rank() != 1)
+    return false;
+  auto scale_dim = scale->dim(0).value();
+  if (filter_channel_dim != scale_dim)
+    return false;
+
+  // get shift of batchnorm
+  auto shift = dynamic_cast<luci::CircleConst *>(add->y());
+  if (not shift)
+    return false;
+
+  // shift dim(0) == tconv filter channel dim
+  if (shift->rank() != 1)
+    return false;
+  auto shift_dim = shift->dim(0).value();
+  if (filter_channel_dim != shift_dim)
+    return false;
+
+  // filter weight = filter weight * mul(scale) + add(shift)
+  uint32_t filter_batch_dim = filter->dim(0).value();
+  uint32_t filter_height_dim = filter->dim(1).value();
+  uint32_t filter_width_dim = filter->dim(2).value();
+  for (uint32_t c = 0; c < filter_channel_dim; c++)
+  {
+    for (uint32_t n = 0; n < filter_batch_dim; n++)
+    {
+      for (uint32_t h = 0; h < filter_height_dim; h++)
+      {
+        for (uint32_t w = 0; w < filter_width_dim; w++)
+        {
+          uint32_t offset = n * filter_height_dim * filter_width_dim * filter_channel_dim +
+                            h * filter_width_dim * filter_channel_dim + w * filter_channel_dim + c;
+          filter->at<loco::DataType::FLOAT32>(offset) *= scale->at<loco::DataType::FLOAT32>(c);
+        }
+      }
+    }
+  }
+
+  // fuse shift with transposed conv
+  tconv->bias(shift);
+
+  if (add->fusedActivationFunction() == luci::FusedActFunc::RELU6)
+  {
+    // separate relu op from add op
+    auto relu = add->graph()->nodes()->create<luci::CircleRelu6>();
+    relu->features(tconv);
+
+    // remove mul node
+    replace(add).with(relu);
+  }
+  else
+  {
+    replace(add).with(tconv);
+  }
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool FuseBatchNormWithTConvPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto tconv = dynamic_cast<luci::CircleTransposeConv *>(node);
+    if (not tconv)
+      continue;
+
+    changed |= fused_batch_norm_with_tconv(tconv);
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp

index b335a53..60c1cdd 100644 (file)
--- a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
+++ b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
@@ -472,7 +472,12 @@ struct QuantizeWeights final : public luci::CircleNodeMutableVisitor<bool>
          if (granularity == QuantizationGranularity::ChannelWise)
          {
            auto quantparam = circle_node->quantparam();
-          assert(quantparam != nullptr);
+          if (quantparam == nullptr)
+          {
+            assert(false && "quantparam is nullptr");
+            return false;
+          }
+
            auto min = quantparam->min;
            auto scaling_factor = quantparam->scale;
            int32_t channel_dim_index = 0;
diff --git a/compiler/luci/pass/src/RequantizePass.cpp b/compiler/luci/pass/src/RequantizePass.cpp

new file mode 100644 (file)

index 0000000..49fbf76
--- /dev/null
+++ b/compiler/luci/pass/src/RequantizePass.cpp
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RequantizePass.h"
+#include "QuantizationUtils.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Log.h>
+
+#include <oops/UserExn.h>
+
+#include <iostream>
+#include <cmath>
+
+namespace luci
+{
+
+namespace
+{
+
+// Check if the node is the bias of Conv2D, DepthwiseConv2D, or FullyConnected layer
+bool is_bias(CircleConst *node)
+{
+  if (node == nullptr)
+    return false;
+
+  auto succs = loco::succs(node);
+  if (succs.size() != 1) // assume bias is used by only one node
+    return false;
+
+  for (auto out : succs)
+  {
+    auto conv = dynamic_cast<CircleConv2D *>(out);
+    if (conv != nullptr && conv->bias() == node)
+      return true;
+
+    auto dw_conv = dynamic_cast<CircleDepthwiseConv2D *>(out);
+    if (dw_conv != nullptr && dw_conv->bias() == node)
+      return true;
+
+    auto fc = dynamic_cast<CircleFullyConnected *>(out);
+    if (fc != nullptr && fc->bias() == node)
+      return true;
+
+    // TODO: add TransposeConv when bias is supported in CircleTransposeConv
+  }
+  return false;
+}
+
+void requant_nonconst_int8_to_uint8(CircleNode *circle_node)
+{
+  assert(circle_node->dtype() == loco::DataType::S8);
+
+  auto quantparam = circle_node->quantparam();
+  assert(quantparam != nullptr);
+  for (size_t i = 0; i < quantparam->zerop.size(); ++i)
+  {
+    quantparam->zerop[i] += 128;
+  }
+  circle_node->dtype(loco::DataType::U8);
+}
+
+// Requantize CircleConst from symmetric int8 to asymmetric uint8
+// Original values: -127 ~ 127
+// After requantization: 1 ~ 255 (zp <- zp + 128)
+void requant_const_int8_to_uint8(CircleConst *node)
+{
+  assert(node->dtype() == loco::DataType::S8);
+
+  uint32_t size = node->size<loco::DataType::S8>();
+  std::vector<int32_t> requantized_values(size);
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    int32_t data = node->at<loco::DataType::S8>(i);
+    requantized_values[i] = data + 128;
+  }
+
+  node->dtype(loco::DataType::U8); // change the type of tensor
+  node->size<loco::DataType::U8>(size);
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    assert(1 <= requantized_values[i] && requantized_values[i] <= 255);
+    node->at<loco::DataType::U8>(i) = requantized_values[i];
+  }
+
+  auto quantparam = node->quantparam();
+  assert(quantparam != nullptr);
+  for (size_t i = 0; i < quantparam->zerop.size(); ++i)
+  {
+    quantparam->zerop[i] += 128;
+  }
+}
+
+/**
+ * @brief RequantizeNonConst requantizes tensors for activations
+ */
+struct RequantizeNonConst final : public luci::CircleNodeMutableVisitor<bool>
+{
+  RequantizeNonConst(loco::DataType input, loco::DataType output)
+      : _input_type(input), _output_type(output)
+  {
+  }
+
+  loco::DataType _input_type;
+  loco::DataType _output_type;
+
+  // Requantize input tensors of each node
+  bool visit(luci::CircleNode *node)
+  {
+    LOGGER(l);
+    INFO(l) << "RequantizeNonConst visit node: " << node->name() << std::endl;
+    auto arity = node->arity();
+    for (uint32_t i = 0; i < arity; i++)
+    {
+      auto input_node = node->arg(i);
+      auto circle_node = loco::must_cast<luci::CircleNode *>(input_node);
+
+      // Check if this was quantized (only quantized tensors are requantized)
+      if (circle_node->quantparam() == nullptr)
+        continue;
+
+      // Check if this is already requantized
+      if (circle_node->dtype() == _output_type)
+        continue;
+
+      // Check if this is not const (only non-const is requantized in this function)
+      auto circle_const = dynamic_cast<CircleConst *>(circle_node);
+      if (circle_const != nullptr)
+        continue;
+
+      if (_input_type == loco::DataType::S8 && _output_type == loco::DataType::U8)
+        requant_nonconst_int8_to_uint8(circle_node);
+    }
+    return false;
+  }
+};
+
+/**
+ * @brief RequantizeConst requantizes tensors for weights
+ */
+struct RequantizeConst final : public luci::CircleNodeMutableVisitor<bool>
+{
+  RequantizeConst(loco::DataType input, loco::DataType output)
+      : _input_type(input), _output_type(output)
+  {
+  }
+
+  loco::DataType _input_type;
+  loco::DataType _output_type;
+
+  // Requantize input tensors of each node
+  bool visit(luci::CircleNode *node)
+  {
+    LOGGER(l);
+    INFO(l) << "RequantizeConst visit node: " << node->name() << std::endl;
+    auto arity = node->arity();
+    for (uint32_t i = 0; i < arity; i++)
+    {
+      auto input_node = node->arg(i);
+      auto circle_node = loco::must_cast<luci::CircleNode *>(input_node);
+
+      // Check if this was quantized (only quantized tensors are requantized)
+      if (circle_node->quantparam() == nullptr)
+        continue;
+
+      // Check if this is already requantized
+      if (circle_node->dtype() == _output_type)
+        continue;
+
+      // Check if this is const (only const is requantized in this function)
+      auto circle_const = dynamic_cast<CircleConst *>(circle_node);
+      if (circle_const == nullptr)
+        continue;
+
+      // Check if this is not bias
+      // bias is not requantized when int8 -> uint8
+      if (is_bias(circle_const))
+        continue;
+
+      if (_input_type == loco::DataType::S8 && _output_type == loco::DataType::U8)
+        requant_const_int8_to_uint8(circle_const);
+    }
+    return false;
+  }
+};
+
+} // namespace
+
+bool RequantizePass::run(loco::Graph *g)
+{
+  LOGGER(l);
+  INFO(l) << "RequantizePass Start" << std::endl;
+
+  // Requantize non-const (activations)
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    RequantizeNonConst rqnc(_input_dtype, _output_dtype);
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    circle_node->accept(&rqnc);
+  }
+
+  // Requantize const (including weights, constants)
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    RequantizeConst rqc(_input_dtype, _output_dtype);
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    circle_node->accept(&rqc);
+  }
+
+  // Update output dtype
+  auto graph_outputs = g->outputs();
+  for (auto node : loco::output_nodes(g))
+  {
+    auto circle_node = loco::must_cast<luci::CircleOutput *>(node);
+    if (static_cast<luci::CircleNode *>(circle_node->from())->dtype() == _output_dtype)
+    {
+      circle_node->dtype(_output_dtype);
+      auto graph_output = graph_outputs->at(circle_node->index());
+      graph_output->dtype(_output_dtype);
+    }
+  }
+
+  INFO(l) << "RequantizePass End" << std::endl;
+  return false; // one time run
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/CircleShapeInferenceRule.cpp b/compiler/luci/service/src/CircleShapeInferenceRule.cpp

index 6355ec5..db25186 100644 (file)
--- a/compiler/luci/service/src/CircleShapeInferenceRule.cpp
+++ b/compiler/luci/service/src/CircleShapeInferenceRule.cpp
@@ -47,49 +47,19 @@ std::ostream &operator<<(std::ostream &os, const loco::TensorShape &tensor_shape
    return os;
  }
  
-// Call this for CircleAvgPool2D and CircleMaxPool2D only
-template <class Pool2DType> loco::NodeShape infer_pool_2d_shape(const Pool2DType *node)
+loco::TensorShape own_shape(const luci::CircleNode *node)
  {
-  LUCI_ASSERT(loco::shape_known(node->value()), "Shape must be known");
-
-  auto ifm_shape = loco::shape_get(node->value()).template as<loco::TensorShape>();
-  assert(ifm_shape.rank() == 4);
-
-  uint32_t input_height = ifm_shape.dim(1).value();
-  uint32_t input_width = ifm_shape.dim(2).value();
-  uint32_t stride_height = node->stride()->h();
-  uint32_t stride_width = node->stride()->w();
-  uint32_t window_height = node->filter()->h();
-  uint32_t window_width = node->filter()->w();
-  uint32_t dilation_height = 1; // dilation for CircleAvgPool2D and CircleMaxPool2D is 1
-  uint32_t dilation_width = 1;
-  uint32_t effective_window_height = dilation_height * (window_height - 1) + 1;
-  uint32_t effective_window_width = dilation_width * (window_width - 1) + 1;
-
-  uint32_t output_height = 0;
-  uint32_t output_width = 0;
-
-  if (node->padding() == luci::Padding::VALID)
-  {
-    output_height = (input_height + stride_height - effective_window_height) / stride_height;
-    output_width = (input_width + stride_width - effective_window_width) / stride_width;
-  }
-  else if (node->padding() == luci::Padding::SAME)
-  {
-    output_height = (input_height + stride_height - 1) / stride_height;
-    output_width = (input_width + stride_width - 1) / stride_width;
-  }
-  else
-    LUCI_ASSERT(false, "Wrong padding type");
-
-  loco::TensorShape ofm_shape;
-  ofm_shape.rank(4);
-  ofm_shape.dim(0) = ifm_shape.dim(0);
-  ofm_shape.dim(1) = output_height;
-  ofm_shape.dim(2) = output_width;
-  ofm_shape.dim(3) = ifm_shape.dim(3);
+  loco::TensorShape shape;
+  shape.rank(node->rank());
+  for (uint32_t r = 0; r < node->rank(); ++r)
+    shape.dim(r) = loco::Dimension(node->dim(r).value());
+  return shape;
+}
  
-  return loco::NodeShape{ofm_shape};
+loco::NodeShape use_own(const luci::CircleNode *node)
+{
+  loco::TensorShape shape = own_shape(node);
+  return loco::NodeShape{shape};
  }
  
  /**
@@ -192,116 +162,6 @@ loco::TensorShape broadcast_shape(const loco::TensorShape &x, const loco::Tensor
    return output_shape;
  }
  
-// BatchMatMulV2 supports broadcasting in the batch dimensions(BatchMatMul doesn't)
-// TODO Distinguish BatchMatMul and BatchMatMulV2
-loco::NodeShape infer_batchmatmul_shape(const loco::TensorShape &x_shape,
-                                        const loco::TensorShape &y_shape, bool adj_x, bool adj_y)
-{
-  uint32_t x_rank = x_shape.rank();
-  uint32_t y_rank = y_shape.rank();
-  assert(x_rank >= 2 && y_rank >= 2);
-
-  loco::TensorShape output_shape;
-  output_shape.rank(x_shape.rank());
-  // Braodcast in the batch dimension
-  if (x_rank > 2 || y_rank > 2)
-  {
-    loco::TensorShape dummy_x = x_shape;
-    loco::TensorShape dummy_y = y_shape;
-    expand_rank(dummy_x, dummy_y);
-    if (x_rank < y_rank)
-      expand_rank(output_shape, dummy_y);
-
-    for (uint32_t d = 0; d < output_shape.rank() - 2; d++)
-    {
-      uint32_t max_dim = std::max(dummy_x.dim(d).value(), dummy_y.dim(d).value());
-      if (dummy_x.dim(d) == dummy_y.dim(d) ||
-          dummy_x.dim(d).value() * dummy_y.dim(d).value() == max_dim)
-        output_shape.dim(d).set(max_dim);
-      else
-        INTERNAL_EXN("BatchMatMul has wrong shape");
-    }
-  }
-
-  loco::Dimension x_lhs = adj_x ? x_shape.dim(x_rank - 1) : x_shape.dim(x_rank - 2);
-  loco::Dimension x_rhs = adj_x ? x_shape.dim(x_rank - 2) : x_shape.dim(x_rank - 1);
-  loco::Dimension y_lhs = adj_y ? y_shape.dim(y_rank - 1) : y_shape.dim(y_rank - 2);
-  loco::Dimension y_rhs = adj_y ? y_shape.dim(y_rank - 2) : y_shape.dim(y_rank - 1);
-
-  if (not(x_rhs == y_lhs))
-    INTERNAL_EXN("x_rhs and y_lhs should be same");
-
-  uint32_t out_rank = output_shape.rank();
-  output_shape.dim(out_rank - 2) = x_lhs;
-  output_shape.dim(out_rank - 1) = y_rhs;
-
-  return loco::NodeShape{output_shape};
-}
-
-loco::TensorShape own_shape(const luci::CircleNode *node)
-{
-  loco::TensorShape shape;
-  shape.rank(node->rank());
-  for (uint32_t r = 0; r < node->rank(); ++r)
-    shape.dim(r) = loco::Dimension(node->dim(r).value());
-  return shape;
-}
-
-loco::TensorShape infer_reducer(const loco::Node *input, const loco::Node *indices, bool keep_dims)
-{
-  const loco::DataType S32 = loco::DataType::S32;
-
-  auto input_shape = loco::shape_get(input).as<loco::TensorShape>();
-  auto reduction_indices = loco::must_cast<const luci::CircleConst *>(indices);
-
-  { // Exceptions
-    // TODO support non-const case
-    // TODO support other data type
-    LUCI_ASSERT(reduction_indices->dtype() == S32, "Only support int 32");
-  }
-
-  std::vector<int32_t> reduction_values;
-
-  for (uint32_t i = 0; i < reduction_indices->size<S32>(); ++i)
-  {
-    int32_t axis = reduction_indices->at<S32>(i);
-    if (axis < 0)
-      axis += input_shape.rank();
-    if (not(0 <= axis and axis < static_cast<int32_t>(input_shape.rank())))
-      INTERNAL_EXN_V("Invalid reduction axis for REDUCER", oops::to_uint32(axis));
-    reduction_values.push_back(axis);
-  }
-
-  loco::TensorShape output_shape;
-
-  if (keep_dims)
-  {
-    output_shape.rank(input_shape.rank());
-    for (uint32_t i = 0; i < input_shape.rank(); ++i)
-      output_shape.dim(i) = input_shape.dim(i);
-    for (uint32_t i = 0; i < reduction_values.size(); ++i)
-      output_shape.dim(reduction_values.at(i)) = 1;
-  }
-  else
-  {
-    std::vector<bool> check_reduce(input_shape.rank(), false);
-    for (uint32_t i = 0; i < reduction_values.size(); ++i)
-      check_reduce.at(reduction_values.at(i)) = true;
-
-    uint32_t reduce_cnt = 0;
-    for (uint32_t i = 0; i < check_reduce.size(); ++i)
-      if (check_reduce.at(i))
-        ++reduce_cnt;
-
-    output_shape.rank(input_shape.rank() - reduce_cnt);
-    for (uint32_t i = 0, j = 0; i < check_reduce.size(); ++i)
-      if (check_reduce.at(i) == false)
-        output_shape.dim(j++) = input_shape.dim(i);
-  }
-
-  return output_shape;
-}
-
  /**
   * @brief vector_from_constant will return int64_t vector from CircleConst node
   */
@@ -337,1334 +197,2122 @@ template <class CIRCLENODE> loco::NodeShape use_logits(const CIRCLENODE *node)
    return loco::NodeShape{shape};
  }
  
-loco::NodeShape use_own(const luci::CircleNode *node)
+template <class CIRCLENODE>
+loco::NodeShape use_paddings(const CIRCLENODE *node, const luci::CircleConst *paddings)
  {
-  loco::TensorShape shape = own_shape(node);
-  return loco::NodeShape{shape};
-}
+  const loco::DataType S32 = loco::DataType::S32;
  
-/**
- * @brief Class to infer the shape of CircleNode
- *
- * @note All CircleNode's inputs and outputs are always loco::Domain::Tensor
- */
-class ShapeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::NodeShape>
-{
-public:
-  loco::NodeShape visit(const luci::CircleAbs *node) final { return use_x(node); }
+  auto input_shape = loco::shape_get(node->input()).template as<loco::TensorShape>();
  
-  loco::NodeShape visit(const luci::CircleAdd *node) final { return broadcast_xy(node); }
+  // TODO support other data type
+  LUCI_ASSERT(paddings->dtype() == S32, "Only support int 32 for now");
+  LUCI_ASSERT(paddings->rank() == 2, "paddings should be rank 2")
  
-  loco::NodeShape visit(const luci::CircleAddN *node) final
-  {
-    auto shape = loco::shape_get(node->inputs(0)).as<loco::TensorShape>();
+  int32_t n = paddings->dim(0).value();
+  int32_t v = paddings->dim(1).value();
  
-    for (uint32_t idx = 1; idx < node->arity(); ++idx)
-    {
-      auto shape_idx = loco::shape_get(node->inputs(idx)).as<loco::TensorShape>();
-      if (!(shape == shape_idx))
-      {
-        INTERNAL_EXN_V("ADD_N shape not same as the first input: ", idx);
-      }
-    }
+  LUCI_ASSERT(v == 2, "paddings should be [n, 2]");
+  LUCI_ASSERT(n == int32_t(input_shape.rank()),
+              "paddings [n, 2] should have same value of input rank");
  
-    return loco::NodeShape{shape};
-  }
+  loco::TensorShape output_shape;
  
-  loco::NodeShape visit(const luci::CircleArgMax *node) final
+  output_shape.rank(input_shape.rank());
+  for (int32_t ni = 0; ni < n; ++ni)
    {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-    auto dimension_shape = loco::shape_get(node->dimension()).as<loco::TensorShape>();
-
-    int64_t select_axis = 0;
-    {
-      LUCI_ASSERT(node->dimension(), "2nd input dimension() should not be nullptr");
-
-      // Only support node's shape() is CircleConst with S32/S64
-      // Support S32 for now.
-      auto const_shape_node = loco::must_cast<luci::CircleConst *>(node->dimension());
-      LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32,
-                  "Only support int32 CircleConst for CircleArgMax");
+    int32_t idx = ni * 2;
+    int value = input_shape.dim(ni).value();
+    value += paddings->at<S32>(idx + 0); // left
+    value += paddings->at<S32>(idx + 1); // right
+    output_shape.dim(ni) = value;
+  }
  
-      if (const_shape_node->rank() > 1)
-        INTERNAL_EXN_V("Only support rank 0/1 CircleConst",
-                       oops::to_uint32(const_shape_node->rank()));
+  return loco::NodeShape{output_shape};
+}
  
-      select_axis = const_shape_node->scalar<loco::DataType::S32>();
-    }
-    assert(select_axis < input_shape.rank());
-    assert(select_axis >= 0); // TODO support minus of this breaks
+loco::NodeShape infer_add_n(const luci::CircleAddN *node)
+{
+  auto shape = loco::shape_get(node->inputs(0)).as<loco::TensorShape>();
  
-    // NOTE select_axis is removed
-    loco::TensorShape shape_output;
-    uint32_t rank = input_shape.rank();
-    uint32_t shrink = static_cast<uint32_t>(select_axis);
-    assert(rank > 0);
-    shape_output.rank(rank - 1);
-    for (uint32_t r = 0, d = 0; r < rank; ++r)
+  for (uint32_t idx = 1; idx < node->arity(); ++idx)
+  {
+    auto shape_idx = loco::shape_get(node->inputs(idx)).as<loco::TensorShape>();
+    if (!(shape == shape_idx))
      {
-      if (r == shrink)
-        continue;
-      shape_output.dim(d++) = input_shape.dim(r);
+      INTERNAL_EXN_V("ADD_N shape not same as the first input: ", idx);
      }
-    return loco::NodeShape{shape_output};
    }
+  return loco::NodeShape{shape};
+}
  
-  loco::NodeShape visit(const luci::CircleArgMin *node) final
-  {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-    auto dimension_shape = loco::shape_get(node->dimension()).as<loco::TensorShape>();
-
-    int64_t select_axis = 0;
-    {
-      LUCI_ASSERT(node->dimension(), "2nd input dimension() should not be nullptr");
+loco::NodeShape infer_arg_max(const luci::CircleArgMax *node)
+{
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto dimension_shape = loco::shape_get(node->dimension()).as<loco::TensorShape>();
  
-      // Only support node's shape() is CircleConst with S32/S64
-      // Support S32 for now.
-      auto const_shape_node = loco::must_cast<luci::CircleConst *>(node->dimension());
-      LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32,
-                  "Only support int32 CircleConst for CircleArgMin");
+  int64_t select_axis = 0;
+  {
+    LUCI_ASSERT(node->dimension(), "2nd input dimension() should not be nullptr");
  
-      if (const_shape_node->rank() > 1)
-        INTERNAL_EXN_V("Only support rank 0/1 CircleConst",
-                       oops::to_uint32(const_shape_node->rank()));
+    // Only support node's shape() is CircleConst with S32/S64
+    // Support S32 for now.
+    auto const_shape_node = loco::must_cast<luci::CircleConst *>(node->dimension());
+    LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32,
+                "Only support int32 CircleConst for CircleArgMax");
  
-      select_axis = const_shape_node->scalar<loco::DataType::S32>();
-    }
-    assert(select_axis < input_shape.rank());
-    assert(select_axis >= 0); // TODO support minus of this breaks
+    if (const_shape_node->rank() > 1)
+      INTERNAL_EXN_V("Only support rank 0/1 CircleConst",
+                     oops::to_uint32(const_shape_node->rank()));
  
-    // NOTE select_axis is removed
-    loco::TensorShape shape_output;
-    uint32_t rank = input_shape.rank();
-    uint32_t shrink = static_cast<uint32_t>(select_axis);
-    assert(rank > 0);
-    shape_output.rank(rank - 1);
-    for (uint32_t r = 0, d = 0; r < rank; ++r)
-    {
-      if (r == shrink)
-        continue;
-      shape_output.dim(d++) = input_shape.dim(r);
-    }
-    return loco::NodeShape{shape_output};
+    select_axis = const_shape_node->scalar<loco::DataType::S32>();
    }
+  assert(select_axis < input_shape.rank());
+  assert(select_axis >= 0); // TODO support minus of this breaks
  
-  loco::NodeShape visit(const luci::CircleAveragePool2D *node) final
+  // NOTE select_axis is removed
+  loco::TensorShape shape_output;
+  uint32_t rank = input_shape.rank();
+  uint32_t shrink = static_cast<uint32_t>(select_axis);
+  assert(rank > 0);
+  shape_output.rank(rank - 1);
+  for (uint32_t r = 0, d = 0; r < rank; ++r)
    {
-    return infer_pool_2d_shape(node);
+    if (r == shrink)
+      continue;
+    shape_output.dim(d++) = input_shape.dim(r);
    }
+  return loco::NodeShape{shape_output};
+}
  
-  loco::NodeShape visit(const luci::CircleBatchMatMul *node) final
-  {
-    auto x_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
-    auto y_shape = loco::shape_get(node->y()).as<loco::TensorShape>();
-
-    return infer_batchmatmul_shape(x_shape, y_shape, node->adj_x(), node->adj_y());
-  }
+loco::NodeShape infer_arg_min(const luci::CircleArgMin *node)
+{
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto dimension_shape = loco::shape_get(node->dimension()).as<loco::TensorShape>();
  
-  loco::NodeShape visit(const luci::CircleBatchToSpaceND *node) final
+  int64_t select_axis = 0;
    {
-    const loco::DataType S32 = loco::DataType::S32;
-
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-    // Support only input rank is 3 and 4
-    assert(input_shape.rank() == 3 || input_shape.rank() == 4);
+    LUCI_ASSERT(node->dimension(), "2nd input dimension() should not be nullptr");
  
-    // Only support block_shape() with S32 type CircleConst for now
-    auto const_block_shape = loco::must_cast<luci::CircleConst *>(node->block_shape());
-    LUCI_ASSERT(const_block_shape->dtype() == loco::DataType::S32,
-                "Only support int32 block_shape");
+    // Only support node's shape() is CircleConst with S32/S64
+    // Support S32 for now.
+    auto const_shape_node = loco::must_cast<luci::CircleConst *>(node->dimension());
+    LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32,
+                "Only support int32 CircleConst for CircleArgMin");
  
-    // Only support crops() with S32 type CircleConst for now
-    auto const_crops = loco::must_cast<luci::CircleConst *>(node->crops());
-    LUCI_ASSERT(const_crops->dtype() == loco::DataType::S32, "Only support int32 crops");
+    if (const_shape_node->rank() > 1)
+      INTERNAL_EXN_V("Only support rank 0/1 CircleConst",
+                     oops::to_uint32(const_shape_node->rank()));
  
-    auto const_block_shape_shape = loco::shape_get(const_block_shape).as<loco::TensorShape>();
-    auto const_crops_shape = loco::shape_get(const_crops).as<loco::TensorShape>();
-    assert(const_block_shape_shape.rank() == 1);
-    assert(const_crops_shape.rank() == 2);
+    select_axis = const_shape_node->scalar<loco::DataType::S32>();
+  }
+  assert(select_axis < input_shape.rank());
+  assert(select_axis >= 0); // TODO support minus of this breaks
  
-    int32_t input_spatial_dim = input_shape.rank() - 2;
-    assert(const_block_shape_shape.dim(0) == input_spatial_dim);
-    assert(const_crops_shape.dim(0) == input_spatial_dim);
-    assert(const_crops_shape.dim(1) == 2);
+  // NOTE select_axis is removed
+  loco::TensorShape shape_output;
+  uint32_t rank = input_shape.rank();
+  uint32_t shrink = static_cast<uint32_t>(select_axis);
+  assert(rank > 0);
+  shape_output.rank(rank - 1);
+  for (uint32_t r = 0, d = 0; r < rank; ++r)
+  {
+    if (r == shrink)
+      continue;
+    shape_output.dim(d++) = input_shape.dim(r);
+  }
+  return loco::NodeShape{shape_output};
+}
  
-    loco::TensorShape shape_output;
+// Call this for CircleAvgPool2D and CircleMaxPool2D only
+template <class Pool2DType> loco::NodeShape infer_pool_2d_shape(const Pool2DType *node)
+{
+  LUCI_ASSERT(loco::shape_known(node->value()), "Shape must be known");
  
-    shape_output.rank(input_shape.rank());
+  auto ifm_shape = loco::shape_get(node->value()).template as<loco::TensorShape>();
+  assert(ifm_shape.rank() == 4);
  
-    int32_t output_batch_size = input_shape.dim(0).value();
-    for (int32_t dim = 0; dim < input_spatial_dim; ++dim)
-    {
-      int dim_size = input_shape.dim(dim + 1).value() * const_block_shape->at<S32>(dim);
-      dim_size -= const_crops->at<S32>(dim * 2);
-      dim_size -= const_crops->at<S32>(dim * 2 + 1);
-      shape_output.dim(dim + 1) = dim_size;
+  uint32_t input_height = ifm_shape.dim(1).value();
+  uint32_t input_width = ifm_shape.dim(2).value();
+  uint32_t stride_height = node->stride()->h();
+  uint32_t stride_width = node->stride()->w();
+  uint32_t window_height = node->filter()->h();
+  uint32_t window_width = node->filter()->w();
+  uint32_t dilation_height = 1; // dilation for CircleAvgPool2D and CircleMaxPool2D is 1
+  uint32_t dilation_width = 1;
+  uint32_t effective_window_height = dilation_height * (window_height - 1) + 1;
+  uint32_t effective_window_width = dilation_width * (window_width - 1) + 1;
  
-      assert(output_batch_size % const_block_shape->at<S32>(dim) == 0);
-      output_batch_size = output_batch_size / const_block_shape->at<S32>(dim);
-    }
-    shape_output.dim(0) = output_batch_size;
-    shape_output.dim(input_shape.rank() - 1) = input_shape.dim(input_shape.rank() - 1);
+  uint32_t output_height = 0;
+  uint32_t output_width = 0;
  
-    return loco::NodeShape{shape_output};
+  if (node->padding() == luci::Padding::VALID)
+  {
+    output_height = (input_height + stride_height - effective_window_height) / stride_height;
+    output_width = (input_width + stride_width - effective_window_width) / stride_width;
    }
+  else if (node->padding() == luci::Padding::SAME)
+  {
+    output_height = (input_height + stride_height - 1) / stride_height;
+    output_width = (input_width + stride_width - 1) / stride_width;
+  }
+  else
+    LUCI_ASSERT(false, "Wrong padding type");
+
+  loco::TensorShape ofm_shape;
+  ofm_shape.rank(4);
+  ofm_shape.dim(0) = ifm_shape.dim(0);
+  ofm_shape.dim(1) = output_height;
+  ofm_shape.dim(2) = output_width;
+  ofm_shape.dim(3) = ifm_shape.dim(3);
+
+  return loco::NodeShape{ofm_shape};
+}
+
+loco::NodeShape infer_batch_to_space_nd(const luci::CircleBatchToSpaceND *node)
+{
+  const loco::DataType S32 = loco::DataType::S32;
+
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  // Support only input rank is 3 and 4
+  assert(input_shape.rank() == 3 || input_shape.rank() == 4);
+
+  // Only support block_shape() with S32 type CircleConst for now
+  auto const_block_shape = loco::must_cast<luci::CircleConst *>(node->block_shape());
+  LUCI_ASSERT(const_block_shape->dtype() == loco::DataType::S32, "Only support int32 block_shape");
+
+  // Only support crops() with S32 type CircleConst for now
+  auto const_crops = loco::must_cast<luci::CircleConst *>(node->crops());
+  LUCI_ASSERT(const_crops->dtype() == loco::DataType::S32, "Only support int32 crops");
+
+  auto const_block_shape_shape = loco::shape_get(const_block_shape).as<loco::TensorShape>();
+  auto const_crops_shape = loco::shape_get(const_crops).as<loco::TensorShape>();
+  assert(const_block_shape_shape.rank() == 1);
+  assert(const_crops_shape.rank() == 2);
+
+  int32_t input_spatial_dim = input_shape.rank() - 2;
+  assert(const_block_shape_shape.dim(0) == input_spatial_dim);
+  assert(const_crops_shape.dim(0) == input_spatial_dim);
+  assert(const_crops_shape.dim(1) == 2);
+
+  loco::TensorShape shape_output;
+
+  shape_output.rank(input_shape.rank());
+
+  int32_t output_batch_size = input_shape.dim(0).value();
+  for (int32_t dim = 0; dim < input_spatial_dim; ++dim)
+  {
+    int dim_size = input_shape.dim(dim + 1).value() * const_block_shape->at<S32>(dim);
+    dim_size -= const_crops->at<S32>(dim * 2);
+    dim_size -= const_crops->at<S32>(dim * 2 + 1);
+    shape_output.dim(dim + 1) = dim_size;
+
+    assert(output_batch_size % const_block_shape->at<S32>(dim) == 0);
+    output_batch_size = output_batch_size / const_block_shape->at<S32>(dim);
+  }
+  shape_output.dim(0) = output_batch_size;
+  shape_output.dim(input_shape.rank() - 1) = input_shape.dim(input_shape.rank() - 1);
+
+  return loco::NodeShape{shape_output};
+}
+
+struct OutputSize
+{
+  uint32_t height = 0;
+  uint32_t width = 0;
+};
+
+template <class Conv2DType> OutputSize infer_conv2d_type(const Conv2DType *node)
+{
+  auto ifm_shape = loco::shape_get(node->input()).template as<loco::TensorShape>();
+  auto ker_shape = loco::shape_get(node->filter()).template as<loco::TensorShape>();
+  assert(ifm_shape.rank() == 4);
+  assert(ker_shape.rank() == 4);
+
+  uint32_t input_height = ifm_shape.dim(1).value();
+  uint32_t input_width = ifm_shape.dim(2).value();
+  uint32_t stride_height = node->stride()->h();
+  uint32_t stride_width = node->stride()->w();
+  uint32_t ker_height = ker_shape.dim(1).value();
+  uint32_t ker_width = ker_shape.dim(2).value();
+  uint32_t dilation_height = node->dilation()->h();
+  uint32_t dilation_width = node->dilation()->w();
+  uint32_t effective_ker_height = dilation_height * (ker_height - 1) + 1;
+  uint32_t effective_ker_width = dilation_width * (ker_width - 1) + 1;
+
+  uint32_t output_height = 0;
+  uint32_t output_width = 0;
+
+  if (node->padding() == luci::Padding::VALID)
+  {
+    output_height = (input_height + stride_height - effective_ker_height) / stride_height;
+    output_width = (input_width + stride_width - effective_ker_width) / stride_width;
+  }
+  else if (node->padding() == luci::Padding::SAME)
+  {
+    output_height = (input_height + stride_height - 1) / stride_height;
+    output_width = (input_width + stride_width - 1) / stride_width;
+  }
+  else
+    LUCI_ASSERT(false, "Wrong padding type");
+
+  OutputSize os{output_height, output_width};
+
+  return os;
+}
+
+// BatchMatMulV2 supports broadcasting in the batch dimensions(BatchMatMul doesn't)
+// TODO Distinguish BatchMatMul and BatchMatMulV2
+loco::NodeShape infer_batchmatmul_shape(const loco::TensorShape &x_shape,
+                                        const loco::TensorShape &y_shape, bool adj_x, bool adj_y)
+{
+  uint32_t x_rank = x_shape.rank();
+  uint32_t y_rank = y_shape.rank();
+  assert(x_rank >= 2 && y_rank >= 2);
+
+  loco::TensorShape output_shape;
+  output_shape.rank(x_shape.rank());
+  // Braodcast in the batch dimension
+  if (x_rank > 2 || y_rank > 2)
+  {
+    loco::TensorShape dummy_x = x_shape;
+    loco::TensorShape dummy_y = y_shape;
+    expand_rank(dummy_x, dummy_y);
+    if (x_rank < y_rank)
+      expand_rank(output_shape, dummy_y);
+
+    for (uint32_t d = 0; d < output_shape.rank() - 2; d++)
+    {
+      uint32_t max_dim = std::max(dummy_x.dim(d).value(), dummy_y.dim(d).value());
+      if (dummy_x.dim(d) == dummy_y.dim(d) ||
+          dummy_x.dim(d).value() * dummy_y.dim(d).value() == max_dim)
+        output_shape.dim(d).set(max_dim);
+      else
+        INTERNAL_EXN("BatchMatMul has wrong shape");
+    }
+  }
+
+  loco::Dimension x_lhs = adj_x ? x_shape.dim(x_rank - 1) : x_shape.dim(x_rank - 2);
+  loco::Dimension x_rhs = adj_x ? x_shape.dim(x_rank - 2) : x_shape.dim(x_rank - 1);
+  loco::Dimension y_lhs = adj_y ? y_shape.dim(y_rank - 1) : y_shape.dim(y_rank - 2);
+  loco::Dimension y_rhs = adj_y ? y_shape.dim(y_rank - 2) : y_shape.dim(y_rank - 1);
+
+  if (not(x_rhs == y_lhs))
+    INTERNAL_EXN("x_rhs and y_lhs should be same");
+
+  uint32_t out_rank = output_shape.rank();
+  output_shape.dim(out_rank - 2) = x_lhs;
+  output_shape.dim(out_rank - 1) = y_rhs;
+
+  return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_concatenation(const luci::CircleConcatenation *node)
+{
+  // TODO Support when CircleConcatenation has 0 input
+  assert(node->numValues() > 0);
+
+  auto first_shape = loco::shape_get(node->values(0)).as<loco::TensorShape>();
+  auto axis = node->axis();
+  if (axis < 0)
+    axis += first_shape.rank();
+
+  assert(0 <= axis);
+  assert(first_shape.rank() > static_cast<uint32_t>(axis));
+
+  loco::TensorShape output_shape;
+
+  output_shape.rank(first_shape.rank());
+  for (uint32_t i = 0; i < output_shape.rank(); ++i)
+    output_shape.dim(i) = first_shape.dim(i);
+
+  for (uint32_t i = 1; i < node->numValues(); ++i)
+  {
+    auto input_shape = loco::shape_get(node->values(i)).as<loco::TensorShape>();
+
+    for (uint32_t j = 0; j < output_shape.rank(); ++j)
+    {
+      if (j == static_cast<uint32_t>(axis))
+        output_shape.dim(j) = output_shape.dim(j).value() + input_shape.dim(j).value();
+      else
+        assert(output_shape.dim(j) == input_shape.dim(j));
+    }
+  }
+
+  return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_conv2d(const luci::CircleConv2D *node)
+{
+  LOGGER(l);
+
+  auto ifm_shape = loco::shape_get(node->input()).as<loco::TensorShape>();  // in NHWC
+  auto ker_shape = loco::shape_get(node->filter()).as<loco::TensorShape>(); // in OHWI
+
+  INFO(l) << "[luci] CircleConv2D ShapeInf ifm(" << ifm_shape.rank() << ") ker(" << ker_shape.rank()
+          << ")" << std::endl;
+
+  assert(ifm_shape.rank() == 4);
+  assert(ker_shape.rank() == 4);
+  assert(ifm_shape.dim(3) == ker_shape.dim(3));
+
+  auto os = infer_conv2d_type(node);
+
+  loco::TensorShape ofm_shape;
+  ofm_shape.rank(4);
+  ofm_shape.dim(0) = ifm_shape.dim(0);
+  ofm_shape.dim(1) = os.height;
+  ofm_shape.dim(2) = os.width;
+  ofm_shape.dim(3) = ker_shape.dim(0);
+
+  return loco::NodeShape{ofm_shape};
+}
+
+loco::NodeShape infer_depth_to_space(const luci::CircleDepthToSpace *node)
+{
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  LUCI_ASSERT(input_shape.rank() == 4, "Only input rank 4 is supported");
+
+  // Only data format NHWC is supported
+  // TODO need to clarify what to do with layout in this operator
+  int32_t height = input_shape.dim(1).value();
+  int32_t width = input_shape.dim(2).value();
+  int32_t depth = input_shape.dim(3).value();
+
+  int block_size = node->block_size();
+
+  if (block_size < 2)
+    INTERNAL_EXN("Block size must be >= 2");
+
+  if (depth % (block_size * block_size))
+  {
+    INTERNAL_EXN("The input tensor's depth must be divisible by block_size^2");
+  }
+
+  loco::TensorShape output_shape;
+  output_shape.rank(4);
+
+  output_shape.dim(0) = input_shape.dim(0).value();
+  output_shape.dim(1) = height * block_size;
+  output_shape.dim(2) = width * block_size;
+  output_shape.dim(3) = depth / (block_size * block_size);
+
+  return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_depthwise_conv2d(const luci::CircleDepthwiseConv2D *node)
+{
+  auto ifm_shape = loco::shape_get(node->input()).as<loco::TensorShape>();  // in NHWC
+  auto ker_shape = loco::shape_get(node->filter()).as<loco::TensorShape>(); // in 1 H W CM
+
+  assert(ifm_shape.rank() == 4);
+  assert(ker_shape.rank() == 4);
+  assert(ker_shape.dim(0).value() == 1);
+
+  auto os = infer_conv2d_type(node);
+
+  loco::TensorShape ofm_shape;
+  ofm_shape.rank(4);
+  ofm_shape.dim(0) = ifm_shape.dim(0);
+  ofm_shape.dim(1) = os.height;
+  ofm_shape.dim(2) = os.width;
+  ofm_shape.dim(3) = ker_shape.dim(3);
+
+  return loco::NodeShape{ofm_shape};
+}
+
+loco::NodeShape infer_expand_dims(const luci::CircleExpandDims *node)
+{
+  const loco::DataType S32 = loco::DataType::S32;
+  auto x_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  if (x_shape.rank() == 0)
+  {
+    // This maybe for unknown shape. We use shape from the node itself.
+    return use_own(node);
+  }
+  auto const_axis = loco::must_cast<luci::CircleConst *>(node->axis());
+  LUCI_ASSERT(const_axis->dtype() == S32, "Only support int32 CircleConst for axis");
+  if (const_axis->rank() != 0 && const_axis->rank() != 1)
+  {
+    INTERNAL_EXN_V("Non-scalar axis in OP", node->opnum());
+  }
+  int32_t axis = const_axis->at<S32>(0);
+  LUCI_ASSERT((axis <= static_cast<int32_t>(x_shape.rank())) &&
+                  (axis >= -1 - static_cast<int32_t>(x_shape.rank())),
+              "Axis has to be between [-(D+1), D], where D is rank of input.");
+  size_t positive_axis = axis < 0 ? x_shape.rank() + axis + 1 : axis;
+  loco::TensorShape output_shape;
+  output_shape.rank(x_shape.rank() + 1);
+  size_t i = 0;
+  for (; i < positive_axis; i++)
+    output_shape.dim(i) = x_shape.dim(i);
+  output_shape.dim(i) = loco::Dimension(1);
+  for (; i < x_shape.rank(); i++)
+    output_shape.dim(i + 1) = x_shape.dim(i);
+  return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_fill(const luci::CircleFill *node)
+{
+  loco::TensorShape shape;
+  {
+    LUCI_ASSERT(node->dims(), "dims input should not be nullptr");
+
+    auto dims_node = dynamic_cast<luci::CircleConst *>(node->dims());
+    if (dims_node != nullptr)
+    {
+      // Only support node with S32
+      LUCI_ASSERT(dims_node->dtype() == loco::DataType::S32, "Only support int32 CircleConst");
+
+      if (dims_node->rank() != 1)
+        INTERNAL_EXN_V("Only support rank 1 CircleConst", oops::to_uint32(dims_node->rank()));
+
+      shape.rank(dims_node->dim(0).value());
+
+      for (uint32_t axis = 0; axis < shape.rank(); ++axis)
+      {
+        shape.dim(axis) = dims_node->at<loco::DataType::S32>(axis);
+      }
+    }
+    else
+    {
+      shape = own_shape(node);
+    }
+  }
+
+  return loco::NodeShape{shape};
+}
+
+loco::NodeShape infer_fully_connected(const luci::CircleFullyConnected *node)
+{
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto weights_shape = loco::shape_get(node->weights()).as<loco::TensorShape>();
+
+  // Checking shape capability for fully connected layer
+  // Input: a tensor of at least rank 2 [D1, D2, ... Dn]
+  // Weight: [# of units, K]
+  // Output: [D1 * D2 * ... * Dn / K, # of units]
+  if (input_shape.rank() < 2 || weights_shape.rank() != 2)
+  {
+    // Return node own shape if shape inference is not possible
+    return use_own(node);
+  }
+
+  uint32_t input_size = 1;
+  for (uint32_t i = 0; i < input_shape.rank(); i++)
+  {
+    input_size = input_size * input_shape.dim(i).value();
+  }
+  const uint32_t batch_size = input_size / weights_shape.dim(1).value();
+  loco::TensorShape out_shape;
+  out_shape.rank(2);
+  out_shape.dim(0) = batch_size;
+  out_shape.dim(1) = weights_shape.dim(0);
+
+  return loco::NodeShape{out_shape};
+}
+
+loco::NodeShape infer_gather(const luci::CircleGather *node)
+{
+  loco::TensorShape output_shape;
+
+  const auto input_shape = loco::shape_get(node->params()).as<loco::TensorShape>();
+  const auto positions_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
+  int32_t axis = node->axis();
+
+  // If CircleGather input has a dynamic shape, it can't inference this shape. So, it returns the
+  // shape that node already has.
+  if (input_shape.rank() == 0 || positions_shape.rank() == 0)
+    return use_own(node);
+
+  if (axis < 0)
+    axis += input_shape.rank();
+
+  output_shape.rank(input_shape.rank() - 1 + positions_shape.rank());
+  int32_t outdim_index = 0;
+  for (int32_t i = 0; i < axis; ++i)
+    output_shape.dim(outdim_index++) = input_shape.dim(i);
+  for (uint32_t i = 0; i < positions_shape.rank(); ++i)
+    output_shape.dim(outdim_index++) = positions_shape.dim(i);
+  for (uint32_t i = axis + 1; i < input_shape.rank(); ++i)
+    output_shape.dim(outdim_index++) = input_shape.dim(i);
+
+  return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_gather_nd(const luci::CircleGatherNd *node)
+{
+  loco::TensorShape output_shape;
+
+  const auto params_shape = loco::shape_get(node->params()).as<loco::TensorShape>();
+  const auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
+
+  const auto params_rank = params_shape.rank();
+  const auto indices_rank = indices_shape.rank();
+
+  // see https://www.tensorflow.org/api_docs/python/tf/gather_nd
+  // output.shape = indices.shape[:-1] + params.shape[indices.shape[-1]:]
+  // batch_dims isn't supported in tflite
+
+  // TODO: replace exceptions with setting shape to unknown?
+
+  if (!indices_shape.dim(indices_rank - 1).known())
+    INTERNAL_EXN("Last indices dimension is unknown");
+
+  auto indices_last_dim = indices_shape.dim(indices_rank - 1).value();
+
+  if (indices_last_dim > params_rank)
+    INTERNAL_EXN("Last indices dimension should be <= params rank");
+
+  const uint32_t output_rank = indices_rank + params_rank - indices_last_dim - 1;
+
+  output_shape.rank(output_rank);
+
+  uint32_t output_index = 0;
+  for (uint32_t i = 0; i < indices_rank - 1; ++i)
+  {
+    auto &dim = indices_shape.dim(i);
+    if (!dim.known())
+      INTERNAL_EXN("Unknown indices dimension is unsupported");
+    output_shape.dim(output_index++).set(dim.value());
+  }
+
+  for (uint32_t i = indices_last_dim; i < params_rank; ++i)
+  {
+    auto &dim = params_shape.dim(i);
+    if (!dim.known())
+      INTERNAL_EXN("Unknown params dimension is unsupported");
+    output_shape.dim(output_index++).set(dim.value());
+  }
+
+  return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_matrix_diag(const luci::CircleMatrixDiag *node)
+{
+  loco::TensorShape output_shape;
+
+  auto diagonal_shape = loco::shape_get(node->diagonal()).as<loco::TensorShape>();
+  auto rank = diagonal_shape.rank();
+
+  output_shape.rank(rank + 1);
+
+  for (uint32_t i = 0; i < rank; i++)
+  {
+    output_shape.dim(i) = diagonal_shape.dim(i);
+  }
+
+  output_shape.dim(rank) = diagonal_shape.dim(rank - 1);
+
+  return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_matrix_set_diag(const luci::CircleMatrixSetDiag *node)
+{
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto diagonal_shape = loco::shape_get(node->diagonal()).as<loco::TensorShape>();
+
+  auto rank = diagonal_shape.rank();
+
+  LUCI_ASSERT(rank == input_shape.rank() - 1, "diagonal rank = input rank - 1");
+
+  for (uint32_t i = 0; i < rank - 1; i++)
+  {
+    LUCI_ASSERT(diagonal_shape.dim(i) == input_shape.dim(i), "diagonal dims = input dims");
+  }
+
+  auto dim = std::min(input_shape.dim(rank - 1).value(), input_shape.dim(rank).value());
+
+  LUCI_ASSERT(dim == diagonal_shape.dim(rank - 1), "Max diag len error");
+
+  return loco::NodeShape{input_shape};
+}
+
+loco::TensorShape infer_reducer(const loco::Node *input, const loco::Node *indices, bool keep_dims)
+{
+  const loco::DataType S32 = loco::DataType::S32;
+
+  auto input_shape = loco::shape_get(input).as<loco::TensorShape>();
+  auto reduction_indices = loco::must_cast<const luci::CircleConst *>(indices);
+
+  { // Exceptions
+    // TODO support non-const case
+    // TODO support other data type
+    LUCI_ASSERT(reduction_indices->dtype() == S32, "Only support int 32");
+  }
+
+  std::vector<int32_t> reduction_values;
+
+  for (uint32_t i = 0; i < reduction_indices->size<S32>(); ++i)
+  {
+    int32_t axis = reduction_indices->at<S32>(i);
+    if (axis < 0)
+      axis += input_shape.rank();
+    if (not(0 <= axis and axis < static_cast<int32_t>(input_shape.rank())))
+      INTERNAL_EXN_V("Invalid reduction axis for REDUCER", oops::to_uint32(axis));
+    reduction_values.push_back(axis);
+  }
+
+  loco::TensorShape output_shape;
+
+  if (keep_dims)
+  {
+    output_shape.rank(input_shape.rank());
+    for (uint32_t i = 0; i < input_shape.rank(); ++i)
+      output_shape.dim(i) = input_shape.dim(i);
+    for (uint32_t i = 0; i < reduction_values.size(); ++i)
+      output_shape.dim(reduction_values.at(i)) = 1;
+  }
+  else
+  {
+    std::vector<bool> check_reduce(input_shape.rank(), false);
+    for (uint32_t i = 0; i < reduction_values.size(); ++i)
+      check_reduce.at(reduction_values.at(i)) = true;
+
+    uint32_t reduce_cnt = 0;
+    for (uint32_t i = 0; i < check_reduce.size(); ++i)
+      if (check_reduce.at(i))
+        ++reduce_cnt;
+
+    output_shape.rank(input_shape.rank() - reduce_cnt);
+    for (uint32_t i = 0, j = 0; i < check_reduce.size(); ++i)
+      if (check_reduce.at(i) == false)
+        output_shape.dim(j++) = input_shape.dim(i);
+  }
+
+  return output_shape;
+}
  
-  loco::NodeShape visit(const luci::CircleCast *node) final { return use_x(node); }
-
-  loco::NodeShape visit(const luci::CircleCeil *node) final { return use_x(node); }
-
-  loco::NodeShape visit(const luci::CircleConcatenation *node) final
+loco::NodeShape infer_mirror_pad(const luci::CircleMirrorPad *node)
+{
+  // TODO support non-const case
+  auto paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
+  return use_paddings(node, paddings);
+}
+
+loco::NodeShape infer_one_hot(const luci::CircleOneHot *node)
+{
+  const loco::DataType S32 = loco::DataType::S32;
+  auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
+  // Only support OneHot node's depth() is CircleConst with type S32
+  // TODO support depth with other types
+  auto depth = loco::must_cast<luci::CircleConst *>(node->depth());
+  LUCI_ASSERT(depth->dtype() == S32, "Only support int32 CircleConst");
+  if (depth->rank() != 0)
+    INTERNAL_EXN_V("Only support rank 0 CircleOneHot in Depth", oops::to_uint32(depth->rank()));
+  loco::TensorShape output_shape;
+  output_shape.rank(indices_shape.rank() + 1);
+  auto axis = node->axis();
+  if (axis < 0)
+    axis += indices_shape.rank() + 1;
+  LUCI_ASSERT(0 <= axis, "Axis is out of range");
+  LUCI_ASSERT(static_cast<uint32_t>(axis) <= indices_shape.rank(), "Axis is out of range");
+  uint32_t j = 0;
+  for (uint32_t i = 0; i < output_shape.rank(); i++)
+  {
+    if (i == static_cast<uint32_t>(axis))
+    {
+      output_shape.dim(i) = depth->at<S32>(0);
+    }
+    else
+    {
+      output_shape.dim(i) = indices_shape.dim(j++);
+    }
+  }
+  return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_pack(const luci::CirclePack *node)
+{
+  LUCI_ASSERT(node->values_count() > 0, "Only support one or more inputs");
+
+  auto first_shape = loco::shape_get(node->values(0)).as<loco::TensorShape>();
+  // Make sure all inputs have the same shape.
+  for (uint32_t i = 1; i < node->values_count(); ++i)
    {
-    // TODO Support when CircleConcatenation has 0 input
-    assert(node->numValues() > 0);
+    auto in_shape = loco::shape_get(node->values(i)).as<loco::TensorShape>();
+    LUCI_ASSERT(loco::NodeShape{first_shape} == loco::NodeShape{in_shape},
+                "All inputs must have the same shape");
+  }
  
-    auto first_shape = loco::shape_get(node->values(0)).as<loco::TensorShape>();
-    auto axis = node->axis();
-    if (axis < 0)
-      axis += first_shape.rank();
+  // Checking shape capability for pack layer
+  // Input: tensors [D1, D2, ... Dn]
+  // Axis: K
+  // Output: [D1, D2, ... , D_K-1, n, D_K+1, ... Dn]
+  auto axis = node->axis();
+  if (axis < 0)
+    axis += first_shape.rank() + 1;
  
-    assert(0 <= axis);
-    assert(first_shape.rank() > static_cast<uint32_t>(axis));
+  LUCI_ASSERT(0 <= axis, "Axis is out of range");
+  LUCI_ASSERT(static_cast<uint32_t>(axis) <= first_shape.rank(), "Axis is out of range");
  
-    loco::TensorShape output_shape;
+  loco::TensorShape output_shape;
+  output_shape.rank(first_shape.rank() + 1);
+
+  uint32_t j = 0;
+  for (uint32_t i = 0; i < output_shape.rank(); ++i)
+  {
+    if (i == static_cast<uint32_t>(axis))
+    {
+      output_shape.dim(i) = node->values_count();
+    }
+    else
+    {
+      output_shape.dim(i) = first_shape.dim(j++);
+    }
+  }
+
+  return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_pad(const luci::CirclePad *node)
+{
+  // TODO support non-const case
+  auto paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
+  return use_paddings(node, paddings);
+}
+
+loco::NodeShape infer_pad_v2(const luci::CirclePadV2 *node)
+{
+  // TODO support non-const case
+  auto paddings = dynamic_cast<luci::CircleConst *>(node->paddings());
+  if (!paddings)
+  {
+    auto node_shape = own_shape(node);
+    return loco::NodeShape{node_shape};
+  }
+  return use_paddings(node, paddings);
+}
+
+loco::NodeShape infer_p_relu(const luci::CirclePRelu *node)
+{
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto alpha_shape = loco::shape_get(node->alpha()).as<loco::TensorShape>();
+
+  auto output_shape = broadcast_shape(input_shape, alpha_shape);
+
+  return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_range(const luci::CircleRange *node)
+{
+  loco::TensorShape output_shape;
+  output_shape.rank(1);
+
+  auto start_node = dynamic_cast<luci::CircleConst *>(node->start());
+  auto limit_node = dynamic_cast<luci::CircleConst *>(node->limit());
+  auto delta_node = dynamic_cast<luci::CircleConst *>(node->delta());
+
+  if (start_node == nullptr || limit_node == nullptr || delta_node == nullptr)
+  {
+    return use_own(node);
+  }
+
+  double start = 0, limit = 0, delta = 0;
+
+#define GET_RANGE_PARAM(DT)         \
+  start = start_node->scalar<DT>(); \
+  limit = limit_node->scalar<DT>(); \
+  delta = delta_node->scalar<DT>();
+
+  switch (start_node->dtype())
+  {
+    case loco::DataType::FLOAT32:
+      GET_RANGE_PARAM(loco::DataType::FLOAT32)
+      break;
+    case loco::DataType::S32:
+      GET_RANGE_PARAM(loco::DataType::S32)
+      break;
+    default:
+      INTERNAL_EXN("Range data type not supported");
+  }
+
+#undef GET_RANGE_PARAM
+
+  if (delta == 0)
+    INTERNAL_EXN("Delta can not be zero");
+
+  output_shape.dim(0) = ceil((limit - start) / delta);
+
+  return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_reshape(const luci::CircleReshape *node)
+{
+  LOGGER(l);
+
+  const loco::DataType S32 = loco::DataType::S32;
  
-    output_shape.rank(first_shape.rank());
-    for (uint32_t i = 0; i < output_shape.rank(); ++i)
-      output_shape.dim(i) = first_shape.dim(i);
+  loco::TensorShape shape_by_input;
+  {
+    LUCI_ASSERT(node->shape(), "2nd input shape() should not be nullptr");
  
-    for (uint32_t i = 1; i < node->numValues(); ++i)
+    // Only support node's shape() is CircleConst with S32
+    // TODO support other node with other types
+    auto const_shape_node = dynamic_cast<luci::CircleConst *>(node->shape());
+    if (const_shape_node != nullptr)
      {
-      auto input_shape = loco::shape_get(node->values(i)).as<loco::TensorShape>();
+      LUCI_ASSERT(const_shape_node->dtype() == S32, "Only support int32 CircleConst");
  
-      for (uint32_t j = 0; j < output_shape.rank(); ++j)
+      shape_by_input.rank(const_shape_node->size<S32>());
+
+      for (uint32_t axis = 0; axis < shape_by_input.rank(); ++axis)
        {
-        if (j == static_cast<uint32_t>(axis))
-          output_shape.dim(j) = output_shape.dim(j).value() + input_shape.dim(j).value();
-        else
-          assert(output_shape.dim(j) == input_shape.dim(j));
+        shape_by_input.dim(axis) = const_shape_node->at<S32>(axis);
        }
      }
+    else
+    {
+      // We use shape from the node itself
+      shape_by_input = own_shape(node);
+    }
+  }
  
-    return loco::NodeShape{output_shape};
+  loco::TensorShape shape_by_attr;
+  {
+    shape_by_attr.rank(node->newShape()->rank());
+
+    for (uint32_t axis = 0; axis < shape_by_attr.rank(); ++axis)
+    {
+      shape_by_attr.dim(axis) = node->newShape()->dim(axis);
+    }
    }
  
-  loco::NodeShape visit(const luci::CircleConst *node) final { return use_own(node); }
+  if (!(shape_by_input == shape_by_attr))
+  {
+    INFO(l) << "CircleReshape: Two new shape information mismatched : " << std::endl;
+    INFO(l) << "   shape_by_input : " << shape_by_input << std::endl;
+    INFO(l) << "   shape_by_attr : " << shape_by_attr << std::endl;
+  }
+
+  loco::TensorShape output_shape = shape_by_input;
  
-  loco::NodeShape visit(const luci::CircleConv2D *node) final
+  // One of the dimensions can have special value -1, meaning its actual value should be inferred.
+  const auto input_shape = loco::shape_get(node->tensor()).as<loco::TensorShape>();
+  const uint32_t input_element_count = loco::element_count(&input_shape);
+  uint32_t output_element_count = 1;
+  uint32_t unknown_dim_index = UINT32_MAX;
+  for (uint32_t dim_index = 0; dim_index < output_shape.rank(); ++dim_index)
    {
-    LOGGER(l);
+    const uint32_t dim_value = output_shape.dim(dim_index).value();
+    if (static_cast<int>(dim_value) == -1)
+    {
+      LUCI_ASSERT(unknown_dim_index == UINT32_MAX, "More than one unknown dimension");
+      unknown_dim_index = dim_index;
+    }
+    else
+    {
+      output_element_count *= dim_value;
+    }
+  }
+  if (unknown_dim_index != UINT32_MAX)
+  {
+    output_shape.dim(unknown_dim_index) = input_element_count / output_element_count;
+  }
+
+  return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_resize_bilinear(const luci::CircleResizeBilinear *node)
+{
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+
+  if (input_shape.rank() != 4)
+    INTERNAL_EXN("Expected ResizeBilinear input to have rank 4");
+
+  auto *const_node = loco::must_cast<luci::CircleConst *>(node->size());
+
+  if (const_node->dtype() != loco::DataType::S32)
+    INTERNAL_EXN("Only S32 datatype is supported for ResizeBilinear size");
+
+  if (const_node->rank() != 1)
+    INTERNAL_EXN("Expected size tensor of rank 1");
+
+  if (const_node->dim(0).value() != 2)
+    INTERNAL_EXN("Expected size tensor with shape [2]");
+
+  loco::TensorShape output_shape;
+  output_shape.rank(4);
+  output_shape.dim(0) = input_shape.dim(0);
+  output_shape.dim(1) = const_node->at<loco::DataType::S32>(0);
+  output_shape.dim(2) = const_node->at<loco::DataType::S32>(1);
+  output_shape.dim(3) = input_shape.dim(3);
+
+  return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_resize_nearest_neighbor(const luci::CircleResizeNearestNeighbor *node)
+{
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+
+  if (input_shape.rank() != 4)
+    INTERNAL_EXN("Expected ResizeNearesNeighbor input to have rank 4");
+
+  auto *const_node = loco::must_cast<luci::CircleConst *>(node->size());
+
+  if (const_node->dtype() != loco::DataType::S32)
+    INTERNAL_EXN("Only S32 datatype is supported for ResizeNearesNeighbor size");
+
+  if (const_node->rank() != 1)
+    INTERNAL_EXN("Expected size tensor of rank 1");
+
+  if (const_node->dim(0).value() != 2)
+    INTERNAL_EXN("Expected size tensor with shape [2]");
+
+  loco::TensorShape output_shape;
+  output_shape.rank(4);
+  output_shape.dim(0) = input_shape.dim(0);
+  output_shape.dim(1) = const_node->at<loco::DataType::S32>(0);
+  output_shape.dim(2) = const_node->at<loco::DataType::S32>(1);
+  output_shape.dim(3) = input_shape.dim(3);
+
+  return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_scatter_nd(const luci::CircleScatterNd *node)
+{
+  loco::TensorShape output_shape;
+
+  auto shape_node = loco::must_cast<luci::CircleConst *>(node->shape());
+
+  const loco::DataType S32 = loco::DataType::S32;
+  const loco::DataType S64 = loco::DataType::S64;
+
+  std::vector<int64_t> vect_shape;
+
+  if (shape_node->dtype() == S32)
+    vect_shape = vector_from_constant<S32>(shape_node);
+  else if (shape_node->dtype() == S64)
+    vect_shape = vector_from_constant<S64>(shape_node);
+  else
+    LUCI_ASSERT(false, "Only support int32/int64 for shape()");
+
+  output_shape.rank(vect_shape.size());
+  for (uint32_t i = 0; i < vect_shape.size(); ++i)
+    output_shape.dim(i) = vect_shape[i];
+
+  return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_segment_sum(const luci::CircleSegmentSum *node)
+{
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto segment_shape = loco::shape_get(node->segment_ids()).as<loco::TensorShape>();
+
+  LUCI_ASSERT(segment_shape.rank() == 1, "segment_ids must be 1-D tensor");
+  LUCI_ASSERT(segment_shape.dim(0).value() == input_shape.dim(0).value(),
+              "segment_ids size must be equal to the size of data's first dimension");
+
+  auto ids_shape_value = loco::must_cast<luci::CircleConst *>(node->segment_ids());
+
+  std::vector<int64_t> vect_ids;
  
-    auto ifm_shape = loco::shape_get(node->input()).as<loco::TensorShape>();  // in NHWC
-    auto ker_shape = loco::shape_get(node->filter()).as<loco::TensorShape>(); // in OHWI
+  if (ids_shape_value->dtype() == loco::DataType::S32)
+    vect_ids = vector_from_constant<loco::DataType::S32>(ids_shape_value);
  
-    INFO(l) << "[luci] CircleConv2D ShapeInf ifm(" << ifm_shape.rank() << ") ker("
-            << ker_shape.rank() << ")" << std::endl;
+  LUCI_ASSERT(std::is_sorted(vect_ids.begin(), vect_ids.end()),
+              "segment_ids values should be sorted")
  
-    assert(ifm_shape.rank() == 4);
-    assert(ker_shape.rank() == 4);
-    assert(ifm_shape.dim(3) == ker_shape.dim(3));
+  loco::TensorShape output_shape;
+
+  output_shape.rank(input_shape.rank());
+
+  for (uint32_t i = 1; i < input_shape.rank(); ++i)
+    output_shape.dim(i) = input_shape.dim(i);
+
+  output_shape.dim(0) = vect_ids.back() + 1;
  
-    uint32_t input_height = ifm_shape.dim(1).value();
-    uint32_t input_width = ifm_shape.dim(2).value();
-    uint32_t stride_height = node->stride()->h();
-    uint32_t stride_width = node->stride()->w();
-    uint32_t ker_height = ker_shape.dim(1).value();
-    uint32_t ker_width = ker_shape.dim(2).value();
-    uint32_t dilation_height = node->dilation()->h();
-    uint32_t dilation_width = node->dilation()->w();
-    uint32_t effective_ker_height = dilation_height * (ker_height - 1) + 1;
-    uint32_t effective_ker_width = dilation_width * (ker_width - 1) + 1;
+  return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_select(const luci::CircleSelect *node)
+{
+  auto t_shape = loco::shape_get(node->t()).as<loco::TensorShape>();
+  assert(t_shape == loco::shape_get(node->e()).as<loco::TensorShape>());
  
-    uint32_t output_height = 0;
-    uint32_t output_width = 0;
+  // condition shape validation
+  auto c_shape = loco::shape_get(node->condition()).as<loco::TensorShape>();
+  if (c_shape.rank() != t_shape.rank())
+  {
+    if (c_shape.rank() != 0 && c_shape.rank() != 1)
+      INTERNAL_EXN_V("CircleSelect condition rank is not 0 nor 1: ", c_shape.rank());
  
-    if (node->padding() == luci::Padding::VALID)
+    if (c_shape.rank() == 1)
      {
-      output_height = (input_height + stride_height - effective_ker_height) / stride_height;
-      output_width = (input_width + stride_width - effective_ker_width) / stride_width;
+      if (c_shape.dim(0).value() != t_shape.dim(0).value())
+        INTERNAL_EXN("CircleSelect condition dim(0) should match with t.dim(0)");
      }
-    else if (node->padding() == luci::Padding::SAME)
+  }
+
+  return loco::NodeShape{t_shape};
+}
+
+loco::NodeShape infer_select_v2(const luci::CircleSelectV2 *node)
+{
+  auto c_shape = loco::shape_get(node->condition()).as<loco::TensorShape>();
+  auto t_shape = loco::shape_get(node->t()).as<loco::TensorShape>();
+  auto e_shape = loco::shape_get(node->e()).as<loco::TensorShape>();
+
+  // validate ability to broadcast shapes to each other
+  auto b_shape = broadcast_shape(broadcast_shape(c_shape, t_shape), e_shape);
+  return loco::NodeShape{b_shape};
+}
+
+loco::NodeShape infer_shape(const luci::CircleShape *node)
+{
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+
+  loco::TensorShape output_shape;
+
+  output_shape.rank(1);
+  output_shape.dim(0) = input_shape.rank();
+
+  return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_slice(const luci::CircleSlice *node)
+{
+  const loco::DataType S32 = loco::DataType::S32;
+  const loco::DataType S64 = loco::DataType::S64;
+
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+
+  auto const_begin = loco::must_cast<luci::CircleConst *>(node->begin());
+  auto const_size = loco::must_cast<luci::CircleConst *>(node->size());
+
+  loco::TensorShape output_shape;
+  std::vector<int64_t> vect_begin; // to hold both S32/S64, we use int64_t
+  std::vector<int64_t> vect_size;
+
+  if (const_begin->dtype() == S32)
+    vect_begin = vector_from_constant<S32>(const_begin);
+  else if (const_begin->dtype() == S64)
+    vect_begin = vector_from_constant<S64>(const_begin);
+  else
+    LUCI_ASSERT(false, "Only support int32/int64 for begin()");
+
+  if (const_size->dtype() == S32)
+    vect_size = vector_from_constant<S32>(const_size);
+  else if (const_size->dtype() == S64)
+    vect_size = vector_from_constant<S64>(const_size);
+  else
+    LUCI_ASSERT(false, "Only support int32/int64 for size()");
+
+  assert(input_shape.rank() == vect_begin.size());
+  assert(input_shape.rank() == vect_size.size());
+
+  output_shape.rank(vect_begin.size());
+  for (uint32_t idx = 0; idx < vect_begin.size(); ++idx)
+  {
+    auto size = vect_size.at(idx);
+    if (size == -1)
      {
-      output_height = (input_height + stride_height - 1) / stride_height;
-      output_width = (input_width + stride_width - 1) / stride_width;
+      size = input_shape.dim(idx).value() - vect_begin.at(idx);
      }
-    else
-      LUCI_ASSERT(false, "Wrong padding type");
+    output_shape.dim(idx) = size;
+  }
+
+  return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_space_to_batch_nd(const luci::CircleSpaceToBatchND *node)
+{
+  const loco::DataType S32 = loco::DataType::S32;
+
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  // Support only input rank is 3 and 4
+  assert(input_shape.rank() == 3 || input_shape.rank() == 4);
+
+  // Only support block_shape() with S32 type CircleConst for now
+  auto const_block_shape = loco::must_cast<luci::CircleConst *>(node->block_shape());
+  LUCI_ASSERT(const_block_shape->dtype() == S32, "Only support int32 block_shape");
  
-    loco::TensorShape ofm_shape;
-    ofm_shape.rank(4);
-    ofm_shape.dim(0) = ifm_shape.dim(0);
-    ofm_shape.dim(1) = output_height;
-    ofm_shape.dim(2) = output_width;
-    ofm_shape.dim(3) = ker_shape.dim(0);
+  // Only support paddings() with S32 type CircleConst for now
+  auto const_paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
+  LUCI_ASSERT(const_paddings->dtype() == S32, "Only support int32 paddings");
  
-    return loco::NodeShape{ofm_shape};
+  auto const_block_shape_shape = loco::shape_get(const_block_shape).as<loco::TensorShape>();
+  auto const_paddings_shape = loco::shape_get(const_paddings).as<loco::TensorShape>();
+  assert(const_block_shape_shape.rank() == 1);
+  assert(const_paddings_shape.rank() == 2);
+
+  int32_t input_spatial_dim = input_shape.rank() - 2;
+  assert(const_block_shape_shape.dim(0) == input_spatial_dim);
+  assert(const_paddings_shape.dim(0) == input_spatial_dim);
+  assert(const_paddings_shape.dim(1) == 2);
+
+  // Check all values of block_shape >= 1
+  uint32_t ele_count = const_block_shape->size<S32>();
+  for (uint32_t e = 0; e < ele_count; ++e)
+  {
+    auto val = const_block_shape->at<S32>(e);
+    if (val < 1)
+    {
+      INTERNAL_EXN_V("All values of block_shape >= 1: ", e);
+    }
    }
  
-  loco::NodeShape visit(const luci::CircleCos *node) final { return use_x(node); }
+  loco::TensorShape shape_output;
  
-  loco::NodeShape visit(const luci::CircleCustom *node) final { return use_own(node); }
+  shape_output.rank(input_shape.rank());
  
-  loco::NodeShape visit(const luci::CircleDepthToSpace *node) final
+  int32_t output_batch_size = input_shape.dim(0).value();
+  for (int32_t dim = 0; dim < input_spatial_dim; ++dim)
    {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-    LUCI_ASSERT(input_shape.rank() == 4, "Only input rank 4 is supported");
+    int dim_size = input_shape.dim(dim + 1).value();
+    dim_size += const_paddings->at<S32>(dim * 2);
+    dim_size += const_paddings->at<S32>(dim * 2 + 1);
+    shape_output.dim(dim + 1) = dim_size / const_block_shape->at<S32>(dim);
+
+    assert(dim_size % const_block_shape->at<S32>(dim) == 0);
+    output_batch_size = output_batch_size * const_block_shape->at<S32>(dim);
+  }
+  shape_output.dim(0) = output_batch_size;
+  shape_output.dim(input_shape.rank() - 1) = input_shape.dim(input_shape.rank() - 1);
+
+  return loco::NodeShape{shape_output};
+}
+
+loco::NodeShape infer_space_to_depth(const luci::CircleSpaceToDepth *node)
+{
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  LUCI_ASSERT(input_shape.rank() == 4, "Only input rank 4 is supported");
+
+  // Only data format NHWC is supported
+  int32_t height = input_shape.dim(1).value();
+  int32_t width = input_shape.dim(2).value();
+  int32_t depth = input_shape.dim(3).value();
  
-    // Only data format NHWC is supported
-    // TODO need to clarify what to do with layout in this operator
-    int32_t height = input_shape.dim(1).value();
-    int32_t width = input_shape.dim(2).value();
-    int32_t depth = input_shape.dim(3).value();
+  int block_size = node->block_size();
+
+  if (block_size < 2)
+    INTERNAL_EXN("Block size must be >= 2");
+
+  if ((height % block_size) || (width % block_size))
+  {
+    INTERNAL_EXN("The input tensor's height and width must be divisible by block_size");
+  }
+
+  loco::TensorShape output_shape;
+  output_shape.rank(4);
+
+  output_shape.dim(0) = input_shape.dim(0).value();
+  output_shape.dim(1) = height / block_size;
+  output_shape.dim(2) = width / block_size;
+  output_shape.dim(3) = block_size * block_size * depth;
+
+  return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_sparse_to_dense(const luci::CircleSparseToDense *node)
+{
+  loco::TensorShape shape;
+  {
+    LUCI_ASSERT(node->output_shape(), "dims input should not be nullptr");
+
+    auto output_shape_node = dynamic_cast<luci::CircleConst *>(node->output_shape());
+    if (output_shape_node != nullptr)
+    {
+      // Only support node with S32
+      LUCI_ASSERT(output_shape_node->dtype() == loco::DataType::S32,
+                  "Only support int32 CircleConst");
  
-    int block_size = node->block_size();
+      if (output_shape_node->rank() != 1)
+        INTERNAL_EXN_V("Only support rank 1 CircleConst",
+                       oops::to_uint32(output_shape_node->rank()));
  
-    if (block_size < 2)
-      INTERNAL_EXN("Block size must be >= 2");
+      shape.rank(output_shape_node->size<loco::DataType::S32>());
  
-    if (depth % (block_size * block_size))
+      for (uint32_t axis = 0; axis < shape.rank(); ++axis)
+      {
+        shape.dim(axis) = output_shape_node->at<loco::DataType::S32>(axis);
+      }
+    }
+    else
      {
-      INTERNAL_EXN("The input tensor's depth must be divisible by block_size^2");
+      shape = own_shape(node);
      }
+  }
  
-    loco::TensorShape output_shape;
-    output_shape.rank(4);
+  return loco::NodeShape{shape};
+}
  
-    output_shape.dim(0) = input_shape.dim(0).value();
-    output_shape.dim(1) = height * block_size;
-    output_shape.dim(2) = width * block_size;
-    output_shape.dim(3) = depth / (block_size * block_size);
+loco::NodeShape infer_strided_slice(const luci::CircleStridedSlice *node)
+{
+  auto begin_node = dynamic_cast<luci::CircleConst *>(node->begin());
+  auto end_node = dynamic_cast<luci::CircleConst *>(node->end());
+  auto strides_node = dynamic_cast<luci::CircleConst *>(node->strides());
  
-    return loco::NodeShape{output_shape};
+  if (begin_node == nullptr || end_node == nullptr || strides_node == nullptr)
+  {
+    return use_own(node);
    }
  
-  loco::NodeShape visit(const luci::CircleDepthwiseConv2D *node) final
+  loco::TensorShape shape = infer_output_shape(node);
+  return loco::NodeShape{shape};
+}
+
+loco::NodeShape infer_squeeze(const luci::CircleSqueeze *node)
+{
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+
+  // TODO input shape may be unknown before runtime
+  std::vector<bool> do_squeeze(input_shape.rank(), false);
+  uint32_t num_squeezed = 0;
+
+  if (!node->squeeze_dims().empty())
    {
-    auto ifm_shape = loco::shape_get(node->input()).as<loco::TensorShape>();  // in NHWC
-    auto ker_shape = loco::shape_get(node->filter()).as<loco::TensorShape>(); // in 1 H W CM
+    // SqueezeDims not empty, squeeze only dims specified
+    for (int32_t raw_dim : node->squeeze_dims())
+    {
+      int32_t dim = raw_dim < 0 ? raw_dim + input_shape.rank() : raw_dim;
  
-    assert(ifm_shape.rank() == 4);
-    assert(ker_shape.rank() == 4);
-    assert(ker_shape.dim(0).value() == 1);
+      if (dim < 0 || static_cast<uint32_t>(dim) >= input_shape.rank() ||
+          input_shape.dim(dim).value() != 1)
+      {
+        INTERNAL_EXN("invalid dimention specified to Squeeze");
+      }
  
-    uint32_t input_height = ifm_shape.dim(1).value();
-    uint32_t input_width = ifm_shape.dim(2).value();
-    uint32_t stride_height = node->stride()->h();
-    uint32_t stride_width = node->stride()->w();
-    uint32_t ker_height = ker_shape.dim(1).value();
-    uint32_t ker_width = ker_shape.dim(2).value();
-    uint32_t dilation_height = node->dilation()->h();
-    uint32_t dilation_width = node->dilation()->w();
-    uint32_t effective_ker_height = dilation_height * (ker_height - 1) + 1;
-    uint32_t effective_ker_width = dilation_width * (ker_width - 1) + 1;
+      if (!do_squeeze[dim])
+        ++num_squeezed;
+      do_squeeze[dim] = true;
+    }
+  }
+  else
+  {
+    // SqueezeDims empty, squeeze any dims with size == 1
+    for (uint32_t dim = 0; dim < input_shape.rank(); ++dim)
+    {
+      if (input_shape.dim(dim) == 1)
+      {
+        do_squeeze[dim] = true;
+        ++num_squeezed;
+      }
+    }
+  }
  
-    uint32_t output_height = 0;
-    uint32_t output_width = 0;
+  loco::TensorShape output_shape;
+  output_shape.rank(input_shape.rank() - num_squeezed);
  
-    if (node->padding() == luci::Padding::VALID)
-    {
-      output_height = (input_height + stride_height - effective_ker_height) / stride_height;
-      output_width = (input_width + stride_width - effective_ker_width) / stride_width;
-    }
-    else if (node->padding() == luci::Padding::SAME)
+  for (uint32_t in_dim = 0, out_dim = 0; in_dim < input_shape.rank(); ++in_dim)
+  {
+    if (!do_squeeze[in_dim])
      {
-      output_height = (input_height + stride_height - 1) / stride_height;
-      output_width = (input_width + stride_width - 1) / stride_width;
+      output_shape.dim(out_dim++) = input_shape.dim(in_dim);
      }
-    else
-      LUCI_ASSERT(false, "Wrong padding type");
+  }
  
-    loco::TensorShape ofm_shape;
-    ofm_shape.rank(4);
-    ofm_shape.dim(0) = ifm_shape.dim(0);
-    ofm_shape.dim(1) = output_height;
-    ofm_shape.dim(2) = output_width;
-    ofm_shape.dim(3) = ker_shape.dim(3);
+  return loco::NodeShape{output_shape};
+}
  
-    return loco::NodeShape{ofm_shape};
-  }
+loco::NodeShape infer_tile(const luci::CircleTile *node)
+{
+  const loco::DataType S32 = loco::DataType::S32;
  
-  loco::NodeShape visit(const luci::CircleDiv *node) final { return broadcast_xy(node); }
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto multiples = loco::must_cast<luci::CircleConst *>(node->multiples());
  
-  loco::NodeShape visit(const luci::CircleElu *node) final
-  {
-    auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
+  // TODO support non-const case
+  // TODO support S64 type
+  LUCI_ASSERT(multiples->dtype() == S32, "Only support int32 multiples");
+  LUCI_ASSERT(multiples->rank() == 1, "multiples should be rank 1")
  
-    return loco::NodeShape{input_shape};
-  }
+  uint32_t n = multiples->dim(0).value();
  
-  loco::NodeShape visit(const luci::CircleEqual *node) final { return broadcast_xy(node); }
+  LUCI_ASSERT(n == input_shape.rank(), "length of multiples should be the same with input rank");
  
-  loco::NodeShape visit(const luci::CircleExp *node) final { return use_x(node); }
+  loco::TensorShape output_shape;
  
-  loco::NodeShape visit(const luci::CircleExpandDims *node) final
+  output_shape.rank(input_shape.rank());
+  for (uint32_t ni = 0; ni < n; ++ni)
    {
-    const loco::DataType S32 = loco::DataType::S32;
-    auto x_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-    if (x_shape.rank() == 0)
-    {
-      // This maybe for unknown shape. We use shape from the node itself.
-      return use_own(node);
-    }
-    auto const_axis = loco::must_cast<luci::CircleConst *>(node->axis());
-    LUCI_ASSERT(const_axis->dtype() == S32, "Only support int32 CircleConst for axis");
-    if (const_axis->rank() != 0 && const_axis->rank() != 1)
-    {
-      INTERNAL_EXN_V("Non-scalar axis in OP", node->opnum());
-    }
-    int32_t axis = const_axis->at<S32>(0);
-    LUCI_ASSERT((axis <= static_cast<int32_t>(x_shape.rank())) &&
-                    (axis >= -1 - static_cast<int32_t>(x_shape.rank())),
-                "Axis has to be between [-(D+1), D], where D is rank of input.");
-    size_t positive_axis = axis < 0 ? x_shape.rank() + axis + 1 : axis;
-    loco::TensorShape output_shape;
-    output_shape.rank(x_shape.rank() + 1);
-    size_t i = 0;
-    for (; i < positive_axis; i++)
-      output_shape.dim(i) = x_shape.dim(i);
-    output_shape.dim(i) = loco::Dimension(1);
-    for (; i < x_shape.rank(); i++)
-      output_shape.dim(i + 1) = x_shape.dim(i);
-    return loco::NodeShape{output_shape};
+    int32_t multiple = multiples->at<S32>(ni);
+    output_shape.dim(ni) = input_shape.dim(ni).value() * static_cast<uint32_t>(multiple);
    }
  
-  loco::NodeShape visit(const luci::CircleFill *node) final
-  {
-    loco::TensorShape shape;
-    {
-      LUCI_ASSERT(node->dims(), "dims input should not be nullptr");
+  return loco::NodeShape{output_shape};
+}
  
-      auto dims_node = dynamic_cast<luci::CircleConst *>(node->dims());
-      if (dims_node != nullptr)
-      {
-        // Only support node with S32
-        LUCI_ASSERT(dims_node->dtype() == loco::DataType::S32, "Only support int32 CircleConst");
+loco::NodeShape infer_transpose(const luci::CircleTranspose *node)
+{
+  auto input_shape = loco::shape_get(node->a()).as<loco::TensorShape>();
  
-        if (dims_node->rank() != 1)
-          INTERNAL_EXN_V("Only support rank 1 CircleConst", oops::to_uint32(dims_node->rank()));
+  auto perm_node = loco::must_cast<luci::CircleConst *>(node->perm());
  
-        shape.rank(dims_node->dim(0).value());
+  loco::TensorShape output_shape;
+  output_shape.rank(input_shape.rank());
  
-        for (uint32_t axis = 0; axis < shape.rank(); ++axis)
-        {
-          shape.dim(axis) = dims_node->at<loco::DataType::S32>(axis);
-        }
-      }
-      else
-      {
-        shape = own_shape(node);
-      }
-    }
+  assert(perm_node->dtype() == loco::DataType::S32);
+  assert(input_shape.rank() == perm_node->template size<loco::DataType::S32>());
  
-    return loco::NodeShape{shape};
+  for (uint32_t out_axis = 0; out_axis < output_shape.rank(); out_axis++)
+  {
+    auto in_axis = perm_node->template at<loco::DataType::S32>(out_axis);
+    output_shape.dim(out_axis) = input_shape.dim(in_axis);
    }
  
-  loco::NodeShape visit(const luci::CircleFloor *node) final { return use_x(node); }
+  return output_shape;
+}
  
-  loco::NodeShape visit(const luci::CircleFloorDiv *node) final { return broadcast_xy(node); }
+loco::NodeShape infer_transpose_conv(const luci::CircleTransposeConv *node)
+{
+  // TransposeConv's output shape is written in its 'inputSizes' argument
+  auto input_sizes_const = loco::must_cast<luci::CircleConst *>(node->inputSizes());
+  // TODO support non-const type
+  LUCI_ASSERT(input_sizes_const->dtype() == loco::DataType::S32, "Only support S32 dtype")
+  LUCI_ASSERT(input_sizes_const->rank() == 1 && input_sizes_const->dim(0).value() == 4,
+              "Only support rank 1 with 4 entries")
  
-  loco::NodeShape visit(const luci::CircleFloorMod *node) final { return broadcast_xy(node); }
+  loco::TensorShape shape;
  
-  loco::NodeShape visit(const luci::CircleFullyConnected *node) final
-  {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-    auto weights_shape = loco::shape_get(node->weights()).as<loco::TensorShape>();
+  shape.rank(4);
+  for (uint32_t axis = 0; axis < 4; ++axis)
+    shape.dim(axis) = input_sizes_const->at<loco::DataType::S32>(axis);
  
-    // Checking shape capability for fully connected layer
-    // Input: a tensor of at least rank 2 [D1, D2, ... Dn]
-    // Weight: [# of units, K]
-    // Output: [D1 * D2 * ... * Dn / K, # of units]
-    if (input_shape.rank() < 2 || weights_shape.rank() != 2)
-    {
-      // Return node own shape if shape inference is not possible
-      return use_own(node);
-    }
+  return loco::NodeShape{shape};
+}
  
-    uint32_t input_size = 1;
-    for (uint32_t i = 0; i < input_shape.rank(); i++)
-    {
-      input_size = input_size * input_shape.dim(i).value();
-    }
-    const uint32_t batch_size = input_size / weights_shape.dim(1).value();
-    loco::TensorShape out_shape;
-    out_shape.rank(2);
-    out_shape.dim(0) = batch_size;
-    out_shape.dim(1) = weights_shape.dim(0);
+loco::NodeShape infer_unpack(const luci::CircleUnpack *node)
+{
+  // CircleUnpack provides list(array) of Tensors which has one less dimension of the input
+  // We'll set shape of CircleUnpack to shape of actual outputs
+  // TODO fix this if any problem rises
+  auto value_shape = loco::shape_get(node->value()).as<loco::TensorShape>();
  
-    return loco::NodeShape{out_shape};
-  }
+  auto axis = node->axis();
+  auto num = node->num();
+  auto rank = static_cast<int32_t>(value_shape.rank());
  
-  loco::NodeShape visit(const luci::CircleGather *node) final
+  if (rank == 0)
    {
-    loco::TensorShape output_shape;
+    // Unknown shape
+    return use_own(node);
+  }
  
-    const auto input_shape = loco::shape_get(node->params()).as<loco::TensorShape>();
-    const auto positions_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
-    int32_t axis = node->axis();
+  LUCI_ASSERT(-rank <= axis && axis < rank, "Axis is out of range");
  
-    // If CircleGather input has a dynamic shape, it can't inference this shape. So, it returns the
-    // shape that node already has.
-    if (input_shape.rank() == 0 || positions_shape.rank() == 0)
-      return use_own(node);
+  if (axis < 0)
+    axis += rank;
  
-    if (axis < 0)
-      axis += input_shape.rank();
+  LUCI_ASSERT(num == static_cast<int32_t>(value_shape.dim(axis).value()),
+              "num, axis maybe incorrect");
  
-    output_shape.rank(input_shape.rank() - 1 + positions_shape.rank());
-    int32_t outdim_index = 0;
-    for (int32_t i = 0; i < axis; ++i)
-      output_shape.dim(outdim_index++) = input_shape.dim(i);
-    for (uint32_t i = 0; i < positions_shape.rank(); ++i)
-      output_shape.dim(outdim_index++) = positions_shape.dim(i);
-    for (uint32_t i = axis + 1; i < input_shape.rank(); ++i)
-      output_shape.dim(outdim_index++) = input_shape.dim(i);
+  loco::TensorShape output_shape;
+  output_shape.rank(rank - 1);
  
-    return loco::NodeShape{output_shape};
+  for (int32_t i = 0, o = 0; i < rank; ++i)
+  {
+    if (i != axis)
+      output_shape.dim(o++) = value_shape.dim(i);
    }
  
-  loco::NodeShape visit(const luci::CircleGatherNd *node) final
-  {
-    loco::TensorShape output_shape;
+  return loco::NodeShape{output_shape};
+}
  
-    const auto params_shape = loco::shape_get(node->params()).as<loco::TensorShape>();
-    const auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
+loco::NodeShape infer_unique(const luci::CircleUnique *node)
+{
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
  
-    const auto params_rank = params_shape.rank();
-    const auto indices_rank = indices_shape.rank();
+  assert(input_shape.rank() == 1);
  
-    // see https://www.tensorflow.org/api_docs/python/tf/gather_nd
-    // output.shape = indices.shape[:-1] + params.shape[indices.shape[-1]:]
-    // batch_dims isn't supported in tflite
+  loco::TensorShape shape_output;
+  shape_output = own_shape(node);
  
-    // TODO: replace exceptions with setting shape to unknown?
+  return loco::NodeShape{shape_output};
+}
  
-    if (!indices_shape.dim(indices_rank - 1).known())
-      INTERNAL_EXN("Last indices dimension is unknown");
+// Circle Only
+loco::NodeShape infer_bcq_fully_connected(const luci::CircleBCQFullyConnected *node)
+{
+  loco::TensorShape out_shape;
  
-    auto indices_last_dim = indices_shape.dim(indices_rank - 1).value();
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto weights_clusters = loco::must_cast<luci::CircleConst *>(node->weights_clusters());
  
-    if (indices_last_dim > params_rank)
-      INTERNAL_EXN("Last indices dimension should be <= params rank");
+  LUCI_ASSERT(input_shape.rank() == 2, "Input rank of BCQFullyConnected should be 2");
  
-    const uint32_t output_rank = indices_rank + params_rank - indices_last_dim - 1;
+  int32_t qbits_sum = 0;
+  for (uint32_t i = 0; i < weights_clusters->dim(0).value(); ++i)
+  {
+    qbits_sum += weights_clusters->at<loco::DataType::S32>(i * 2 + 1);
+  }
  
-    output_shape.rank(output_rank);
+  out_shape.rank(2);
+  out_shape.dim(0) = qbits_sum;
+  out_shape.dim(1) = input_shape.dim(1);
  
-    uint32_t output_index = 0;
-    for (uint32_t i = 0; i < indices_rank - 1; ++i)
-    {
-      auto &dim = indices_shape.dim(i);
-      if (!dim.known())
-        INTERNAL_EXN("Unknown indices dimension is unsupported");
-      output_shape.dim(output_index++).set(dim.value());
-    }
+  return loco::NodeShape{out_shape};
+}
  
-    for (uint32_t i = indices_last_dim; i < params_rank; ++i)
-    {
-      auto &dim = params_shape.dim(i);
-      if (!dim.known())
-        INTERNAL_EXN("Unknown params dimension is unsupported");
-      output_shape.dim(output_index++).set(dim.value());
-    }
+loco::NodeShape infer_bcq_gather(const luci::CircleBCQGather *node)
+{
+  loco::TensorShape input_shape;
+  loco::TensorShape output_shape;
  
-    return loco::NodeShape{output_shape};
+  const auto input_binary_shape = loco::shape_get(node->input_binary()).as<loco::TensorShape>();
+  const auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
+  auto axis = node->axis();
+
+  auto input_clusters = loco::must_cast<luci::CircleConst *>(node->input_clusters());
+  auto qbits_sum = 0;
+  for (uint32_t i = 0; i < input_clusters->dim(0).value(); ++i)
+  {
+    qbits_sum += input_clusters->at<loco::DataType::S32>(i * 2 + 1);
    }
  
-  loco::NodeShape visit(const luci::CircleGreater *node) final { return broadcast_xy(node); }
+  input_shape.rank(2);
+  input_shape.dim(0) = qbits_sum;
+  input_shape.dim(1) = input_binary_shape.dim(1).value() * 32;
  
-  loco::NodeShape visit(const luci::CircleGreaterEqual *node) final { return broadcast_xy(node); }
+  output_shape.rank(input_shape.rank() - 1 + indices_shape.rank());
+  int32_t outdim_index = 0;
+  for (int32_t i = 0; i < axis; ++i)
+    output_shape.dim(outdim_index++) = input_shape.dim(i);
+  for (uint32_t i = 0; i < indices_shape.rank(); ++i)
+    output_shape.dim(outdim_index++) = indices_shape.dim(i);
+  for (uint32_t i = axis + 1; i < input_shape.rank(); ++i)
+    output_shape.dim(outdim_index++) = input_shape.dim(i);
  
-  loco::NodeShape visit(const luci::CircleIf *node) final
-  {
-    // Shape of CircleIf is not used. Just use input 0
-    assert(node->input_count() > 0);
-    const auto input_shape = loco::shape_get(node->input(0)).as<loco::TensorShape>();
-    return loco::NodeShape{input_shape};
-  }
+  return loco::NodeShape{output_shape};
+}
  
-  loco::NodeShape visit(const luci::CircleL2Normalize *node) final { return use_x(node); }
+// Virtual
+loco::NodeShape infer_input(const luci::CircleInput *node)
+{
+  loco::TensorShape shape;
  
-  loco::NodeShape visit(const luci::CircleL2Pool2D *node) final
-  {
-    return infer_pool_2d_shape(node);
-  }
+  shape.rank(node->rank());
+  for (uint32_t axis = 0; axis < node->rank(); axis++)
+    shape.dim(axis) = node->dim(axis);
  
-  loco::NodeShape visit(const luci::CircleLeakyRelu *node) final
-  {
-    const auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
-    return loco::NodeShape{input_shape};
-  }
+  return loco::NodeShape{shape};
+}
  
-  loco::NodeShape visit(const luci::CircleLess *node) final { return broadcast_xy(node); }
+loco::NodeShape infer_output(const luci::CircleOutput *node)
+{
+  auto graph_outputs = node->graph()->outputs();
+  auto graph_output = graph_outputs->at(node->index());
+  auto output_shape = graph_output->shape();
  
-  loco::NodeShape visit(const luci::CircleLessEqual *node) final { return broadcast_xy(node); }
+  return loco::NodeShape{*output_shape};
+}
  
-  loco::NodeShape visit(const luci::CircleLocalResponseNormalization *node) final
+loco::NodeShape infer_if_out(const luci::CircleIfOut *node)
+{
+  /**
+   * @note  IF operator type and shape are that of the "then" and "else"
+   *        Graph Outputs.
+   */
+  auto circle_if = dynamic_cast<const luci::CircleIf *>(node->input());
+  if (circle_if == nullptr)
    {
-    const auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-    return loco::NodeShape{input_shape};
+    INTERNAL_EXN("CircleIf IR is not configured correctly");
    }
  
-  loco::NodeShape visit(const luci::CircleLog *node) final { return use_x(node); }
+  auto index = node->index();
+  auto then_graph = circle_if->then_graph();
+  auto else_graph = circle_if->else_graph();
+  assert(then_graph != nullptr);
+  assert(else_graph != nullptr);
  
-  loco::NodeShape visit(const luci::CircleLogicalAnd *node) final { return use_x(node); }
+  // shape and type are assumed to be same
+  // these are checked at post_import_graph() in Import
+  auto then_outputs = loco::output_nodes(then_graph);
+  auto else_outputs = loco::output_nodes(else_graph);
+  assert(then_outputs.size() == else_outputs.size());
+  assert(index < static_cast<int32_t>(then_outputs.size()));
  
-  loco::NodeShape visit(const luci::CircleLogicalNot *node) final { return use_x(node); }
+  auto then_out = loco::must_cast<luci::CircleOutput *>(then_outputs.at(index));
+  auto else_out = loco::must_cast<luci::CircleOutput *>(else_outputs.at(index));
  
-  loco::NodeShape visit(const luci::CircleLogicalOr *node) final { return use_x(node); }
+  auto then_graph_outputs = then_graph->outputs(); // loco::GraphOutput items
+  auto else_graph_outputs = else_graph->outputs();
+  assert(then_graph_outputs->size() == else_graph_outputs->size());
  
-  loco::NodeShape visit(const luci::CircleLogistic *node) final { return use_x(node); }
+  auto then_graph_output = then_graph_outputs->at(then_out->index());
+  auto else_graph_output = else_graph_outputs->at(else_out->index());
+  (void)else_graph_output; // make compiler happy for unused variable warnings
+  assert(*then_graph_output->shape() == *else_graph_output->shape());
  
-  loco::NodeShape visit(const luci::CircleMatrixSetDiag *node) final
-  {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-    auto diagonal_shape = loco::shape_get(node->diagonal()).as<loco::TensorShape>();
+  return loco::NodeShape{*then_graph_output->shape()};
+}
  
-    auto rank = diagonal_shape.rank();
+loco::NodeShape infer_non_max_suppression_v4_out(const luci::CircleNonMaxSuppressionV4Out *node)
+{
+  const loco::DataType S32 = loco::DataType::S32;
  
-    LUCI_ASSERT(rank == input_shape.rank() - 1, "diagonal rank = input rank - 1");
+  auto nmsv4 = dynamic_cast<const luci::CircleNonMaxSuppressionV4 *>(node->input());
+  if (nmsv4 == nullptr)
+    INTERNAL_EXN("CircleNonMaxSuppressionV4 IR is not configured correctly");
  
-    for (uint32_t i = 0; i < rank - 1; i++)
-    {
-      LUCI_ASSERT(diagonal_shape.dim(i) == input_shape.dim(i), "diagonal dims = input dims");
-    }
+  auto index = node->index();
+  if (index == 1)
+    return loco::TensorShape({0});
  
-    auto dim = std::min(input_shape.dim(rank - 1).value(), input_shape.dim(rank).value());
+  assert(index == 0);
  
-    LUCI_ASSERT(dim == diagonal_shape.dim(rank - 1), "Max diag len error");
+  auto unknown = loco::TensorShape{loco::Dimension()};
+  auto max_output_size = dynamic_cast<const luci::CircleConst *>(nmsv4->max_output_size());
+  if (max_output_size == nullptr)
+    return unknown; // we need CircleConst for max output size
  
-    return loco::NodeShape{input_shape};
-  }
+  LUCI_ASSERT(max_output_size->dtype() == S32, "Only support int32 for max_output_size");
  
-  loco::NodeShape visit(const luci::CircleLogSoftmax *node) final { return use_logits(node); }
+  if (max_output_size->size<S32>() < 1)
+    return unknown;
  
-  loco::NodeShape visit(const luci::CircleMatrixDiag *node) final
-  {
-    loco::TensorShape output_shape;
+  auto max_output_size_value = uint32_t(max_output_size->at<S32>(0));
+  return loco::TensorShape{max_output_size_value};
+}
  
-    auto diagonal_shape = loco::shape_get(node->diagonal()).as<loco::TensorShape>();
-    auto rank = diagonal_shape.rank();
+loco::NodeShape infer_non_max_suppression_v5_out(const luci::CircleNonMaxSuppressionV5Out *node)
+{
+  const loco::DataType S32 = loco::DataType::S32;
  
-    output_shape.rank(rank + 1);
+  auto nmsv5 = dynamic_cast<const luci::CircleNonMaxSuppressionV5 *>(node->input());
+  if (nmsv5 == nullptr)
+    INTERNAL_EXN("CircleNonMaxSuppressionV5 IR is not configured correctly");
  
-    for (uint32_t i = 0; i < rank; i++)
-    {
-      output_shape.dim(i) = diagonal_shape.dim(i);
-    }
+  auto index = node->index();
+  if (index == 2)
+    return loco::TensorShape({0});
  
-    output_shape.dim(rank) = diagonal_shape.dim(rank - 1);
+  assert(index == 0 || index == 1);
  
-    return loco::NodeShape{output_shape};
-  }
+  auto unknown = loco::TensorShape{loco::Dimension()};
+  auto max_output_size = dynamic_cast<const luci::CircleConst *>(nmsv5->max_output_size());
+  if (max_output_size == nullptr)
+    return unknown; // we need CircleConst for max output size
  
-  loco::NodeShape visit(const luci::CircleMaximum *node) final { return broadcast_xy(node); }
+  LUCI_ASSERT(max_output_size->dtype() == S32, "Only support int32 for max_output_size");
  
-  loco::NodeShape visit(const luci::CircleMaxPool2D *node) final
-  {
-    return infer_pool_2d_shape(node);
-  }
+  if (max_output_size->size<S32>() < 1)
+    return unknown;
  
-  loco::NodeShape visit(const luci::CircleMean *node) final
-  {
-    auto output_shape = infer_reducer(node->input(), node->reduction_indices(), node->keep_dims());
-    return loco::NodeShape{output_shape};
-  }
+  auto max_output_size_value = uint32_t(max_output_size->at<S32>(0));
+  return loco::TensorShape{max_output_size_value};
+}
  
-  loco::NodeShape visit(const luci::CircleMinimum *node) final { return broadcast_xy(node); }
+loco::NodeShape infer_split_out(const luci::CircleSplitOut *node)
+{
+  const loco::DataType S32 = loco::DataType::S32;
  
-  loco::NodeShape visit(const luci::CircleMirrorPad *node) final
-  {
-    const loco::DataType S32 = loco::DataType::S32;
+  auto split = dynamic_cast<const luci::CircleSplit *>(node->input());
+  if (split == nullptr)
+    INTERNAL_EXN("CircleSplit IR is not configured correctly");
  
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-    auto paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
+  loco::NodeShape unknown;
  
-    // TODO support non-const case
-    // TODO support other data type
-    LUCI_ASSERT(paddings->dtype() == S32, "Only support int 32 for now");
-    LUCI_ASSERT(paddings->rank() == 2, "paddings should be rank 2")
+  auto split_shape = loco::shape_get(split).as<loco::TensorShape>();
  
-    int32_t n = paddings->dim(0).value();
-    int32_t v = paddings->dim(1).value();
+  auto split_dim = dynamic_cast<const luci::CircleConst *>(split->split_dim());
+  if (split_dim == nullptr)
+    return unknown; // we need CircleConst for split_dim
+  LUCI_ASSERT(split_dim->dtype() == S32, "Only support int32 for split_dim");
  
-    LUCI_ASSERT(v == 2, "paddings should be [n, 2]");
-    LUCI_ASSERT(n == int32_t(input_shape.rank()),
-                "paddings [n, 2] should have same value of input rank");
+  assert(split_dim->size<S32>() == 1);
+  auto split_dim_axis = split_dim->at<S32>(0);
+  if (split_dim_axis < 0)
+    split_dim_axis += split_shape.rank();
  
-    loco::TensorShape output_shape;
+  auto split_dim_value = split_shape.dim(split_dim_axis).value();
+  assert(split_dim_value % split->num_split() == 0);
+  const int split_depth = split_dim_value / split->num_split();
  
-    output_shape.rank(input_shape.rank());
-    for (int32_t ni = 0; ni < n; ++ni)
-    {
-      int32_t idx = ni * 2;
-      int value = input_shape.dim(ni).value();
-      value += paddings->at<S32>(idx + 0); // left
-      value += paddings->at<S32>(idx + 1); // right
-      output_shape.dim(ni) = value;
-    }
+  loco::TensorShape output_shape = split_shape;
  
-    return loco::NodeShape{output_shape};
-  }
+  // All shapes are equally same
+  output_shape.dim(split_dim_axis) = loco::Dimension(split_depth);
  
-  loco::NodeShape visit(const luci::CircleMul *node) final { return broadcast_xy(node); }
+  return loco::NodeShape{output_shape};
+}
  
-  loco::NodeShape visit(const luci::CircleNeg *node) final { return use_x(node); }
+loco::NodeShape infer_split_v_out(const luci::CircleSplitVOut *node)
+{
+  const loco::DataType S32 = loco::DataType::S32;
  
-  loco::NodeShape visit(const luci::CircleNonMaxSuppressionV4 *node) final
-  {
-    const auto boxes_shape = loco::shape_get(node->boxes()).as<loco::TensorShape>();
-    return loco::NodeShape{boxes_shape};
-  }
+  auto split = dynamic_cast<const luci::CircleSplitV *>(node->input());
+  if (split == nullptr)
+    INTERNAL_EXN("CircleSplit IR is not configured correctly");
  
-  loco::NodeShape visit(const luci::CircleNotEqual *node) final { return broadcast_xy(node); }
+  loco::NodeShape unknown;
  
-  loco::NodeShape visit(const luci::CircleOneHot *node) final
-  {
-    const loco::DataType S32 = loco::DataType::S32;
-    auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
-    // Only support OneHot node's depth() is CircleConst with type S32
-    // TODO support depth with other types
-    auto depth = loco::must_cast<luci::CircleConst *>(node->depth());
-    LUCI_ASSERT(depth->dtype() == S32, "Only support int32 CircleConst");
-    if (depth->rank() != 0)
-      INTERNAL_EXN_V("Only support rank 0 CircleOneHot in Depth", oops::to_uint32(depth->rank()));
-    loco::TensorShape output_shape;
-    output_shape.rank(indices_shape.rank() + 1);
-    auto axis = node->axis();
-    if (axis < 0)
-      axis += indices_shape.rank() + 1;
-    LUCI_ASSERT(0 <= axis, "Axis is out of range");
-    LUCI_ASSERT(static_cast<uint32_t>(axis) <= indices_shape.rank(), "Axis is out of range");
-    uint32_t j = 0;
-    for (uint32_t i = 0; i < output_shape.rank(); i++)
-    {
-      if (i == static_cast<uint32_t>(axis))
-      {
-        output_shape.dim(i) = depth->at<S32>(0);
-      }
-      else
-      {
-        output_shape.dim(i) = indices_shape.dim(j++);
-      }
-    }
-    return loco::NodeShape{output_shape};
-  }
+  auto split_shape = loco::shape_get(split).as<loco::TensorShape>();
  
-  loco::NodeShape visit(const luci::CirclePack *node) final
-  {
-    LUCI_ASSERT(node->values_count() > 0, "Only support one or more inputs");
+  auto size_splits = dynamic_cast<const luci::CircleConst *>(split->size_splits());
+  if (size_splits == nullptr)
+    return unknown; // we need CircleConst for size_splits
+  LUCI_ASSERT(size_splits->dtype() == S32, "Only support int32 for size_splits");
  
-    auto first_shape = loco::shape_get(node->values(0)).as<loco::TensorShape>();
-    // Make sure all inputs have the same shape.
-    for (uint32_t i = 1; i < node->values_count(); ++i)
-    {
-      auto in_shape = loco::shape_get(node->values(i)).as<loco::TensorShape>();
-      LUCI_ASSERT(loco::NodeShape{first_shape} == loco::NodeShape{in_shape},
-                  "All inputs must have the same shape");
-    }
+  auto split_dim = dynamic_cast<const luci::CircleConst *>(split->split_dim());
+  if (split_dim == nullptr)
+    return unknown; // we need CircleConst for split_dim
+  LUCI_ASSERT(split_dim->dtype() == S32, "Only support int32 for split_dim");
  
-    // Checking shape capability for pack layer
-    // Input: tensors [D1, D2, ... Dn]
-    // Axis: K
-    // Output: [D1, D2, ... , D_K-1, n, D_K+1, ... Dn]
-    auto axis = node->axis();
-    if (axis < 0)
-      axis += first_shape.rank() + 1;
+  // fetch axis
+  assert(split_dim->size<S32>() == 1);
+  auto split_dim_axis = split_dim->at<S32>(0);
+  if (split_dim_axis < 0)
+    split_dim_axis += split_shape.rank();
  
-    LUCI_ASSERT(0 <= axis, "Axis is out of range");
-    LUCI_ASSERT(static_cast<uint32_t>(axis) <= first_shape.rank(), "Axis is out of range");
+  // interpret size_splits values
+  int32_t size_splits_count = static_cast<int32_t>(size_splits->size<S32>());
+  assert(size_splits_count == split->num_split());
  
-    loco::TensorShape output_shape;
-    output_shape.rank(first_shape.rank() + 1);
+  int64_t minus_one_count = 0, size_splits_sum = 0;
+  for (int32_t idx = 0; idx < size_splits_count; ++idx)
+  {
+    auto size = size_splits->at<S32>(idx);
+    assert(size >= -1);
+    if (size == -1)
+      ++minus_one_count;
+    else
+      size_splits_sum += size;
+  }
+  if (minus_one_count > 1)
+    INTERNAL_EXN("CircleSplitV size_splits has more than two -1 values");
  
-    uint32_t j = 0;
-    for (uint32_t i = 0; i < output_shape.rank(); ++i)
-    {
-      if (i == static_cast<uint32_t>(axis))
-      {
-        output_shape.dim(i) = node->values_count();
-      }
-      else
-      {
-        output_shape.dim(i) = first_shape.dim(j++);
-      }
-    }
+  // calcuate this SplitVOut shape
+  auto input_size = split_shape.dim(split_dim_axis).value();
+  assert(size_splits_sum <= input_size);
  
-    return loco::NodeShape{output_shape};
-  }
+  auto index_this = node->index();
+  assert(0 <= index_this && index_this < split->num_split());
+  auto split_depth = size_splits->at<S32>(index_this);
+  if (split_depth == -1)
+    split_depth = input_size - size_splits_sum;
  
-  loco::NodeShape visit(const luci::CirclePad *node) final
-  {
-    const loco::DataType S32 = loco::DataType::S32;
+  loco::TensorShape output_shape = split_shape;
  
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-    auto paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
+  output_shape.dim(split_dim_axis) = loco::Dimension(split_depth);
  
-    // TODO support non-const case
-    // TODO support other data type
-    LUCI_ASSERT(paddings->dtype() == S32, "Only support int 32 for now");
-    LUCI_ASSERT(paddings->rank() == 2, "paddings should be rank 2")
+  return loco::NodeShape{output_shape};
+}
  
-    int32_t n = paddings->dim(0).value();
-    int32_t v = paddings->dim(1).value();
+loco::NodeShape infer_top_k_v2_out(const luci::CircleTopKV2Out *node)
+{
+  const loco::DataType S32 = loco::DataType::S32;
  
-    LUCI_ASSERT(v == 2, "paddings should be [n, 2]");
-    LUCI_ASSERT(n == int32_t(input_shape.rank()),
-                "paddings [n, 2] should have same value of input rank");
+  auto topkv2 = dynamic_cast<const luci::CircleTopKV2 *>(node->input());
+  if (topkv2 == nullptr)
+    INTERNAL_EXN("CircleSplit IR is not configured correctly");
  
-    loco::TensorShape output_shape;
+  // shape of topkv2 is same as topkv2->input()
+  auto input_shape = loco::shape_get(topkv2).as<loco::TensorShape>();
  
-    output_shape.rank(input_shape.rank());
-    for (int32_t ni = 0; ni < n; ++ni)
-    {
-      int32_t idx = ni * 2;
-      int value = input_shape.dim(ni).value();
-      value += paddings->at<S32>(idx + 0); // left
-      value += paddings->at<S32>(idx + 1); // right
-      output_shape.dim(ni) = value;
-    }
+  auto node_k = loco::must_cast<const luci::CircleConst *>(topkv2->k());
+  LUCI_ASSERT(node_k->dtype() == S32, "Only support Int32");
+  assert(node_k->size<S32>() == 1);
  
-    return loco::NodeShape{output_shape};
+  loco::TensorShape output_shape;
+
+  output_shape.rank(input_shape.rank());
+  for (uint32_t idx = 0; idx < input_shape.rank() - 1; ++idx)
+  {
+    output_shape.dim(idx) = input_shape.dim(idx);
    }
+  output_shape.dim(input_shape.rank() - 1) = node_k->at<S32>(0);
  
-  loco::NodeShape visit(const luci::CirclePow *node) final { return broadcast_xy(node); }
+  return loco::NodeShape{output_shape};
+}
  
-  loco::NodeShape visit(const luci::CirclePRelu *node) final
+loco::NodeShape infer_unique_out(const luci::CircleUniqueOut *node)
+{
+  if (node->index() == 0)
    {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-    auto alpha_shape = loco::shape_get(node->alpha()).as<loco::TensorShape>();
+    auto unique_shape = own_shape(node);
+    return loco::NodeShape{unique_shape};
+  }
+  assert(node->index() == 1);
+  auto unique = loco::must_cast<luci::CircleUnique *>(node->input());
+  auto unique_shape = loco::shape_get(unique->input()).as<loco::TensorShape>();
  
-    auto output_shape = broadcast_shape(input_shape, alpha_shape);
+  assert(unique_shape.rank() == 1);
  
-    return loco::NodeShape{output_shape};
+  loco::TensorShape shape_output;
+  shape_output.rank(1);
+  shape_output.dim(0) = unique_shape.dim(0);
+  return loco::NodeShape{shape_output};
+}
+
+loco::NodeShape infer_unpack_out(const luci::CircleUnpackOut *node)
+{
+  auto unpack = dynamic_cast<const luci::CircleUnpack *>(node->input());
+  if (unpack == nullptr)
+  {
+    INTERNAL_EXN("CircleUnpack IR is not configured correctly");
    }
  
-  loco::NodeShape visit(const luci::CircleRange *node) final
+  auto unpack_shape = loco::shape_get(unpack).as<loco::TensorShape>();
+
+  return loco::NodeShape{unpack_shape};
+}
+
+loco::NodeShape infer_while_out(const luci::CircleWhileOut *node)
+{
+  /**
+   * @note  WHILE operator's shape is the same with the "cond"
+   *        Graph input.
+   */
+  auto circle_while = dynamic_cast<const luci::CircleWhile *>(node->input());
+  if (circle_while == nullptr)
    {
-    loco::TensorShape output_shape;
-    output_shape.rank(1);
+    INTERNAL_EXN("CircleWhile IR is not configured correctly");
+  }
  
-    auto start_node = dynamic_cast<luci::CircleConst *>(node->start());
-    auto limit_node = dynamic_cast<luci::CircleConst *>(node->limit());
-    auto delta_node = dynamic_cast<luci::CircleConst *>(node->delta());
+  auto index = node->index();
+  auto cond_graph = circle_while->cond_graph();
+  assert(cond_graph != nullptr);
  
-    if (start_node == nullptr || limit_node == nullptr || delta_node == nullptr)
-    {
-      return use_own(node);
-    }
+  // Assumption: the index of CircleWhileOut matches with the index of input nodes returned by
+  // loco::input_nodes
+  auto cond_inputs = loco::input_nodes(cond_graph);
+  auto cond_in = loco::must_cast<luci::CircleInput *>(cond_inputs.at(index));
+
+  auto cond_graph_inputs = cond_graph->inputs();
+  auto cond_graph_input = cond_graph_inputs->at(cond_in->index());
+
+  auto cond_graph_input_shape = *cond_graph_input->shape();
+  auto this_shape = own_shape(node);
+
+  if (!(this_shape == cond_graph_input_shape))
+  {
+    LOGGER(l);
+    WARN(l) << "Warning: CircleWhileOut '" << node->name() << "' shape mispatch " << this_shape
+            << " vs " << cond_graph_input_shape;
+  }
  
-    double start = 0, limit = 0, delta = 0;
+  return loco::NodeShape{this_shape};
+}
  
-#define GET_RANGE_PARAM(DT)         \
-  start = start_node->scalar<DT>(); \
-  limit = limit_node->scalar<DT>(); \
-  delta = delta_node->scalar<DT>();
+/**
+ * @brief Class to infer the shape of CircleNode
+ *
+ * @note All CircleNode's inputs and outputs are always loco::Domain::Tensor
+ */
+class ShapeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::NodeShape>
+{
+public:
+  loco::NodeShape visit(const luci::CircleAbs *node) final { return use_x(node); }
  
-    switch (start_node->dtype())
-    {
-      case loco::DataType::FLOAT32:
-        GET_RANGE_PARAM(loco::DataType::FLOAT32)
-        break;
-      case loco::DataType::S32:
-        GET_RANGE_PARAM(loco::DataType::S32)
-        break;
-      default:
-        INTERNAL_EXN("Range data type not supported");
-    }
+  loco::NodeShape visit(const luci::CircleAdd *node) final { return broadcast_xy(node); }
  
-#undef GET_RANGE_PARAM
+  loco::NodeShape visit(const luci::CircleAddN *node) final { return infer_add_n(node); }
  
-    if (delta == 0)
-      INTERNAL_EXN("Delta can not be zero");
+  loco::NodeShape visit(const luci::CircleArgMax *node) final { return infer_arg_max(node); }
  
-    output_shape.dim(0) = ceil((limit - start) / delta);
+  loco::NodeShape visit(const luci::CircleArgMin *node) final { return infer_arg_min(node); }
  
-    return loco::NodeShape{output_shape};
+  loco::NodeShape visit(const luci::CircleAveragePool2D *node) final
+  {
+    return infer_pool_2d_shape(node);
    }
  
-  loco::NodeShape visit(const luci::CircleRank *) final
+  loco::NodeShape visit(const luci::CircleBatchMatMul *node) final
    {
-    loco::TensorShape shape_output;
-    shape_output.rank(0);
+    auto x_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
+    auto y_shape = loco::shape_get(node->y()).as<loco::TensorShape>();
  
-    return loco::NodeShape{shape_output};
+    return infer_batchmatmul_shape(x_shape, y_shape, node->adj_x(), node->adj_y());
    }
  
-  loco::NodeShape visit(const luci::CircleReduceAny *node) final
+  loco::NodeShape visit(const luci::CircleBatchToSpaceND *node) final
    {
-    auto output_shape = infer_reducer(node->input(), node->reduction_indices(), node->keep_dims());
-    return loco::NodeShape{output_shape};
+    return infer_batch_to_space_nd(node);
    }
  
-  loco::NodeShape visit(const luci::CircleReduceMax *node) final
+  loco::NodeShape visit(const luci::CircleCast *node) final { return use_x(node); }
+
+  loco::NodeShape visit(const luci::CircleCeil *node) final { return use_x(node); }
+
+  loco::NodeShape visit(const luci::CircleConcatenation *node) final
    {
-    auto output_shape = infer_reducer(node->input(), node->reduction_indices(), node->keep_dims());
-    return loco::NodeShape{output_shape};
+    return infer_concatenation(node);
    }
  
-  loco::NodeShape visit(const luci::CircleReduceMin *node) final
+  loco::NodeShape visit(const luci::CircleConst *node) final { return use_own(node); }
+
+  loco::NodeShape visit(const luci::CircleConv2D *node) final { return infer_conv2d(node); }
+
+  loco::NodeShape visit(const luci::CircleCos *node) final { return use_x(node); }
+
+  loco::NodeShape visit(const luci::CircleCustom *node) final { return use_own(node); }
+
+  loco::NodeShape visit(const luci::CircleDepthToSpace *node) final
    {
-    auto output_shape = infer_reducer(node->input(), node->reduction_indices(), node->keep_dims());
-    return loco::NodeShape{output_shape};
+    return infer_depth_to_space(node);
    }
  
-  loco::NodeShape visit(const luci::CircleReduceProd *node) final
+  loco::NodeShape visit(const luci::CircleDepthwiseConv2D *node) final
    {
-    auto output_shape = infer_reducer(node->input(), node->reduction_indices(), node->keep_dims());
-    return loco::NodeShape{output_shape};
+    return infer_depthwise_conv2d(node);
    }
  
-  loco::NodeShape visit(const luci::CircleRelu *node) final
+  loco::NodeShape visit(const luci::CircleDiv *node) final { return broadcast_xy(node); }
+
+  loco::NodeShape visit(const luci::CircleElu *node) final
    {
      auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
  
      return loco::NodeShape{input_shape};
    }
  
-  loco::NodeShape visit(const luci::CircleRelu6 *node) final
-  {
-    auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
+  loco::NodeShape visit(const luci::CircleEqual *node) final { return broadcast_xy(node); }
  
-    return loco::NodeShape{input_shape};
-  }
+  loco::NodeShape visit(const luci::CircleExp *node) final { return use_x(node); }
  
-  loco::NodeShape visit(const luci::CircleReluN1To1 *node) final
+  loco::NodeShape visit(const luci::CircleExpandDims *node) final
    {
-    auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
-
-    return loco::NodeShape{input_shape};
+    return infer_expand_dims(node);
    }
  
-  /**
-   * @note  CircleReshape has new shape info in two places: 2nd input and attribute.
-   *        This shape inference uses shape from input 'shape' node when it's constant.
-   *        If not, shape will be from node itself. shape from attribute is not used.
-   *
-   * TODO Change this policy when not appropriate
-   */
-  loco::NodeShape visit(const luci::CircleReshape *node) final
-  {
-    LOGGER(l);
+  loco::NodeShape visit(const luci::CircleFill *node) final { return infer_fill(node); }
  
-    const loco::DataType S32 = loco::DataType::S32;
+  loco::NodeShape visit(const luci::CircleFloor *node) final { return use_x(node); }
  
-    loco::TensorShape shape_by_input;
-    {
-      LUCI_ASSERT(node->shape(), "2nd input shape() should not be nullptr");
+  loco::NodeShape visit(const luci::CircleFloorDiv *node) final { return broadcast_xy(node); }
  
-      // Only support node's shape() is CircleConst with S32
-      // TODO support other node with other types
-      auto const_shape_node = dynamic_cast<luci::CircleConst *>(node->shape());
-      if (const_shape_node != nullptr)
-      {
-        LUCI_ASSERT(const_shape_node->dtype() == S32, "Only support int32 CircleConst");
+  loco::NodeShape visit(const luci::CircleFloorMod *node) final { return broadcast_xy(node); }
  
-        shape_by_input.rank(const_shape_node->size<S32>());
+  loco::NodeShape visit(const luci::CircleFullyConnected *node) final
+  {
+    return infer_fully_connected(node);
+  }
  
-        for (uint32_t axis = 0; axis < shape_by_input.rank(); ++axis)
-        {
-          shape_by_input.dim(axis) = const_shape_node->at<S32>(axis);
-        }
-      }
-      else
-      {
-        // We use shape from the node itself
-        shape_by_input = own_shape(node);
-      }
-    }
+  loco::NodeShape visit(const luci::CircleGather *node) final { return infer_gather(node); }
  
-    loco::TensorShape shape_by_attr;
-    {
-      shape_by_attr.rank(node->newShape()->rank());
+  loco::NodeShape visit(const luci::CircleGatherNd *node) final { return infer_gather_nd(node); }
  
-      for (uint32_t axis = 0; axis < shape_by_attr.rank(); ++axis)
-      {
-        shape_by_attr.dim(axis) = node->newShape()->dim(axis);
-      }
-    }
+  loco::NodeShape visit(const luci::CircleGreater *node) final { return broadcast_xy(node); }
  
-    if (!(shape_by_input == shape_by_attr))
-    {
-      INFO(l) << "CircleReshape: Two new shape information mismatched : " << std::endl;
-      INFO(l) << "   shape_by_input : " << shape_by_input << std::endl;
-      INFO(l) << "   shape_by_attr : " << shape_by_attr << std::endl;
-    }
+  loco::NodeShape visit(const luci::CircleGreaterEqual *node) final { return broadcast_xy(node); }
  
-    loco::TensorShape output_shape = shape_by_input;
+  loco::NodeShape visit(const luci::CircleIf *node) final
+  {
+    // Shape of CircleIf is not used. Just use input 0
+    assert(node->input_count() > 0);
+    const auto input_shape = loco::shape_get(node->input(0)).as<loco::TensorShape>();
+    return loco::NodeShape{input_shape};
+  }
  
-    // One of the dimensions can have special value -1, meaning its actual value should be inferred.
-    const auto input_shape = loco::shape_get(node->tensor()).as<loco::TensorShape>();
-    const uint32_t input_element_count = loco::element_count(&input_shape);
-    uint32_t output_element_count = 1;
-    uint32_t unknown_dim_index = UINT32_MAX;
-    for (uint32_t dim_index = 0; dim_index < output_shape.rank(); ++dim_index)
-    {
-      const uint32_t dim_value = output_shape.dim(dim_index).value();
-      if (static_cast<int>(dim_value) == -1)
-      {
-        LUCI_ASSERT(unknown_dim_index == UINT32_MAX, "More than one unknown dimension");
-        unknown_dim_index = dim_index;
-      }
-      else
-      {
-        output_element_count *= dim_value;
-      }
-    }
-    if (unknown_dim_index != UINT32_MAX)
-    {
-      output_shape.dim(unknown_dim_index) = input_element_count / output_element_count;
-    }
+  loco::NodeShape visit(const luci::CircleL2Normalize *node) final { return use_x(node); }
  
-    return loco::NodeShape{output_shape};
+  loco::NodeShape visit(const luci::CircleL2Pool2D *node) final
+  {
+    return infer_pool_2d_shape(node);
    }
  
-  loco::NodeShape visit(const luci::CircleResizeBilinear *node) final
+  loco::NodeShape visit(const luci::CircleLeakyRelu *node) final
    {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-
-    if (input_shape.rank() != 4)
-      INTERNAL_EXN("Expected ResizeBilinear input to have rank 4");
-
-    auto *const_node = loco::must_cast<luci::CircleConst *>(node->size());
-
-    if (const_node->dtype() != loco::DataType::S32)
-      INTERNAL_EXN("Only S32 datatype is supported for ResizeBilinear size");
-
-    if (const_node->rank() != 1)
-      INTERNAL_EXN("Expected size tensor of rank 1");
-
-    if (const_node->dim(0).value() != 2)
-      INTERNAL_EXN("Expected size tensor with shape [2]");
+    const auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
+    return loco::NodeShape{input_shape};
+  }
  
-    loco::TensorShape output_shape;
-    output_shape.rank(4);
-    output_shape.dim(0) = input_shape.dim(0);
-    output_shape.dim(1) = const_node->at<loco::DataType::S32>(0);
-    output_shape.dim(2) = const_node->at<loco::DataType::S32>(1);
-    output_shape.dim(3) = input_shape.dim(3);
+  loco::NodeShape visit(const luci::CircleLess *node) final { return broadcast_xy(node); }
  
-    return loco::NodeShape{output_shape};
-  }
+  loco::NodeShape visit(const luci::CircleLessEqual *node) final { return broadcast_xy(node); }
  
-  loco::NodeShape visit(const luci::CircleResizeNearestNeighbor *node) final
+  loco::NodeShape visit(const luci::CircleLocalResponseNormalization *node) final
    {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+    const auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+    return loco::NodeShape{input_shape};
+  }
  
-    if (input_shape.rank() != 4)
-      INTERNAL_EXN("Expected ResizeNearesNeighbor input to have rank 4");
+  loco::NodeShape visit(const luci::CircleLog *node) final { return use_x(node); }
  
-    auto *const_node = loco::must_cast<luci::CircleConst *>(node->size());
+  loco::NodeShape visit(const luci::CircleLogicalAnd *node) final { return use_x(node); }
  
-    if (const_node->dtype() != loco::DataType::S32)
-      INTERNAL_EXN("Only S32 datatype is supported for ResizeNearesNeighbor size");
+  loco::NodeShape visit(const luci::CircleLogicalNot *node) final { return use_x(node); }
  
-    if (const_node->rank() != 1)
-      INTERNAL_EXN("Expected size tensor of rank 1");
+  loco::NodeShape visit(const luci::CircleLogicalOr *node) final { return use_x(node); }
  
-    if (const_node->dim(0).value() != 2)
-      INTERNAL_EXN("Expected size tensor with shape [2]");
+  loco::NodeShape visit(const luci::CircleLogistic *node) final { return use_x(node); }
  
-    loco::TensorShape output_shape;
-    output_shape.rank(4);
-    output_shape.dim(0) = input_shape.dim(0);
-    output_shape.dim(1) = const_node->at<loco::DataType::S32>(0);
-    output_shape.dim(2) = const_node->at<loco::DataType::S32>(1);
-    output_shape.dim(3) = input_shape.dim(3);
+  loco::NodeShape visit(const luci::CircleLogSoftmax *node) final { return use_logits(node); }
  
-    return loco::NodeShape{output_shape};
+  loco::NodeShape visit(const luci::CircleMatrixDiag *node) final
+  {
+    return infer_matrix_diag(node);
    }
  
-  loco::NodeShape visit(const luci::CircleReverseSequence *node) final
+  loco::NodeShape visit(const luci::CircleMatrixSetDiag *node) final
    {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-
-    return loco::NodeShape{input_shape};
+    return infer_matrix_set_diag(node);
    }
  
-  loco::NodeShape visit(const luci::CircleRound *node) final { return use_x(node); }
+  loco::NodeShape visit(const luci::CircleMaximum *node) final { return broadcast_xy(node); }
  
-  loco::NodeShape visit(const luci::CircleReverseV2 *node) final
+  loco::NodeShape visit(const luci::CircleMaxPool2D *node) final
    {
-    auto input_shape = loco::shape_get(node->tensor()).as<loco::TensorShape>();
-
-    LUCI_ASSERT(loco::shape_get(node->axis()).as<loco::TensorShape>().rank() == 1,
-                "Tensor must be 1-D");
-
-    return loco::NodeShape{input_shape};
+    return infer_pool_2d_shape(node);
    }
  
-  loco::NodeShape visit(const luci::CircleRsqrt *node) final { return use_x(node); }
-
-  loco::NodeShape visit(const luci::CircleScatterNd *node) final
+  loco::NodeShape visit(const luci::CircleMean *node) final
    {
-    loco::TensorShape output_shape;
-
-    auto shape_node = loco::must_cast<luci::CircleConst *>(node->shape());
+    auto output_shape = infer_reducer(node->input(), node->reduction_indices(), node->keep_dims());
+    return loco::NodeShape{output_shape};
+  }
  
-    const loco::DataType S32 = loco::DataType::S32;
-    const loco::DataType S64 = loco::DataType::S64;
+  loco::NodeShape visit(const luci::CircleMinimum *node) final { return broadcast_xy(node); }
  
-    std::vector<int64_t> vect_shape;
+  loco::NodeShape visit(const luci::CircleMirrorPad *node) final { return infer_mirror_pad(node); }
  
-    if (shape_node->dtype() == S32)
-      vect_shape = vector_from_constant<S32>(shape_node);
-    else if (shape_node->dtype() == S64)
-      vect_shape = vector_from_constant<S64>(shape_node);
-    else
-      LUCI_ASSERT(false, "Only support int32/int64 for shape()");
+  loco::NodeShape visit(const luci::CircleMul *node) final { return broadcast_xy(node); }
  
-    output_shape.rank(vect_shape.size());
-    for (uint32_t i = 0; i < vect_shape.size(); ++i)
-      output_shape.dim(i) = vect_shape[i];
+  loco::NodeShape visit(const luci::CircleNeg *node) final { return use_x(node); }
  
-    return loco::NodeShape{output_shape};
+  loco::NodeShape visit(const luci::CircleNonMaxSuppressionV4 *node) final
+  {
+    const auto boxes_shape = loco::shape_get(node->boxes()).as<loco::TensorShape>();
+    return loco::NodeShape{boxes_shape};
    }
  
-  loco::NodeShape visit(const luci::CircleSegmentSum *node) final
+  loco::NodeShape visit(const luci::CircleNonMaxSuppressionV5 *node) final
    {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-    auto segment_shape = loco::shape_get(node->segment_ids()).as<loco::TensorShape>();
+    const auto boxes_shape = loco::shape_get(node->boxes()).as<loco::TensorShape>();
+    return loco::NodeShape{boxes_shape};
+  }
  
-    LUCI_ASSERT(segment_shape.rank() == 1, "segment_ids must be 1-D tensor");
-    LUCI_ASSERT(segment_shape.dim(0).value() == input_shape.dim(0).value(),
-                "segment_ids size must be equal to the size of data's first dimension");
+  loco::NodeShape visit(const luci::CircleNotEqual *node) final { return broadcast_xy(node); }
  
-    auto ids_shape_value = loco::must_cast<luci::CircleConst *>(node->segment_ids());
+  loco::NodeShape visit(const luci::CircleOneHot *node) final { return infer_one_hot(node); }
  
-    std::vector<int64_t> vect_ids;
+  loco::NodeShape visit(const luci::CirclePack *node) final { return infer_pack(node); }
  
-    if (ids_shape_value->dtype() == loco::DataType::S32)
-      vect_ids = vector_from_constant<loco::DataType::S32>(ids_shape_value);
+  loco::NodeShape visit(const luci::CirclePad *node) final { return infer_pad(node); }
  
-    LUCI_ASSERT(std::is_sorted(vect_ids.begin(), vect_ids.end()),
-                "segment_ids values should be sorted")
+  loco::NodeShape visit(const luci::CirclePadV2 *node) final { return infer_pad_v2(node); }
  
-    loco::TensorShape output_shape;
+  loco::NodeShape visit(const luci::CirclePow *node) final { return broadcast_xy(node); }
  
-    output_shape.rank(input_shape.rank());
+  loco::NodeShape visit(const luci::CirclePRelu *node) final { return infer_p_relu(node); }
  
-    for (uint32_t i = 1; i < input_shape.rank(); ++i)
-      output_shape.dim(i) = input_shape.dim(i);
+  loco::NodeShape visit(const luci::CircleRange *node) final { return infer_range(node); }
  
-    output_shape.dim(0) = vect_ids.back() + 1;
+  loco::NodeShape visit(const luci::CircleRank *) final
+  {
+    loco::TensorShape shape_output;
+    shape_output.rank(0);
  
-    return loco::NodeShape{output_shape};
+    return loco::NodeShape{shape_output};
    }
  
-  loco::NodeShape visit(const luci::CircleSelect *node) final
+  loco::NodeShape visit(const luci::CircleReduceAny *node) final
    {
-    auto t_shape = loco::shape_get(node->t()).as<loco::TensorShape>();
-    assert(t_shape == loco::shape_get(node->e()).as<loco::TensorShape>());
-
-    // condition shape validation
-    auto c_shape = loco::shape_get(node->condition()).as<loco::TensorShape>();
-    if (c_shape.rank() != t_shape.rank())
-    {
-      if (c_shape.rank() != 0 && c_shape.rank() != 1)
-        INTERNAL_EXN_V("CircleSelect condition rank is not 0 nor 1: ", c_shape.rank());
-
-      if (c_shape.rank() == 1)
-      {
-        if (c_shape.dim(0).value() != t_shape.dim(0).value())
-          INTERNAL_EXN("CircleSelect condition dim(0) should match with t.dim(0)");
-      }
-    }
-
-    return loco::NodeShape{t_shape};
+    auto output_shape = infer_reducer(node->input(), node->reduction_indices(), node->keep_dims());
+    return loco::NodeShape{output_shape};
    }
  
-  loco::NodeShape visit(const luci::CircleSelectV2 *node) final
+  loco::NodeShape visit(const luci::CircleReduceMax *node) final
    {
-    auto c_shape = loco::shape_get(node->condition()).as<loco::TensorShape>();
-    auto t_shape = loco::shape_get(node->t()).as<loco::TensorShape>();
-    auto e_shape = loco::shape_get(node->e()).as<loco::TensorShape>();
-
-    // validate ability to broadcast shapes to each other
-    auto b_shape = broadcast_shape(broadcast_shape(c_shape, t_shape), e_shape);
-    return loco::NodeShape{b_shape};
+    auto output_shape = infer_reducer(node->input(), node->reduction_indices(), node->keep_dims());
+    return loco::NodeShape{output_shape};
    }
  
-  loco::NodeShape visit(const luci::CircleShape *node) final
+  loco::NodeShape visit(const luci::CircleReduceMin *node) final
    {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-
-    loco::TensorShape output_shape;
-
-    output_shape.rank(1);
-    output_shape.dim(0) = input_shape.rank();
-
+    auto output_shape = infer_reducer(node->input(), node->reduction_indices(), node->keep_dims());
      return loco::NodeShape{output_shape};
    }
  
-  loco::NodeShape visit(const luci::CircleSin *node) final { return use_x(node); }
-
-  loco::NodeShape visit(const luci::CircleSlice *node) final
+  loco::NodeShape visit(const luci::CircleReduceProd *node) final
    {
-    const loco::DataType S32 = loco::DataType::S32;
-    const loco::DataType S64 = loco::DataType::S64;
+    auto output_shape = infer_reducer(node->input(), node->reduction_indices(), node->keep_dims());
+    return loco::NodeShape{output_shape};
+  }
  
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  loco::NodeShape visit(const luci::CircleRelu *node) final
+  {
+    auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
  
-    auto const_begin = loco::must_cast<luci::CircleConst *>(node->begin());
-    auto const_size = loco::must_cast<luci::CircleConst *>(node->size());
+    return loco::NodeShape{input_shape};
+  }
  
-    loco::TensorShape output_shape;
-    std::vector<int64_t> vect_begin; // to hold both S32/S64, we use int64_t
-    std::vector<int64_t> vect_size;
+  loco::NodeShape visit(const luci::CircleRelu6 *node) final
+  {
+    auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
  
-    if (const_begin->dtype() == S32)
-      vect_begin = vector_from_constant<S32>(const_begin);
-    else if (const_begin->dtype() == S64)
-      vect_begin = vector_from_constant<S64>(const_begin);
-    else
-      LUCI_ASSERT(false, "Only support int32/int64 for begin()");
+    return loco::NodeShape{input_shape};
+  }
  
-    if (const_size->dtype() == S32)
-      vect_size = vector_from_constant<S32>(const_size);
-    else if (const_size->dtype() == S64)
-      vect_size = vector_from_constant<S64>(const_size);
-    else
-      LUCI_ASSERT(false, "Only support int32/int64 for size()");
+  loco::NodeShape visit(const luci::CircleReluN1To1 *node) final
+  {
+    auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
  
-    assert(input_shape.rank() == vect_begin.size());
-    assert(input_shape.rank() == vect_size.size());
+    return loco::NodeShape{input_shape};
+  }
  
-    output_shape.rank(vect_begin.size());
-    for (uint32_t idx = 0; idx < vect_begin.size(); ++idx)
-    {
-      auto size = vect_size.at(idx);
-      if (size == -1)
-      {
-        size = input_shape.dim(idx).value() - vect_begin.at(idx);
-      }
-      output_shape.dim(idx) = size;
-    }
+  /**
+   * @note  CircleReshape has new shape info in two places: 2nd input and attribute.
+   *        This shape inference uses shape from input 'shape' node when it's constant.
+   *        If not, shape will be from node itself. shape from attribute is not used.
+   *
+   * TODO Change this policy when not appropriate
+   */
+  loco::NodeShape visit(const luci::CircleReshape *node) final { return infer_reshape(node); }
  
-    return loco::NodeShape{output_shape};
+  loco::NodeShape visit(const luci::CircleResizeBilinear *node) final
+  {
+    return infer_resize_bilinear(node);
    }
  
-  loco::NodeShape visit(const luci::CircleSoftmax *node) final { return use_logits(node); }
-
-  loco::NodeShape visit(const luci::CircleSpaceToBatchND *node) final
+  loco::NodeShape visit(const luci::CircleResizeNearestNeighbor *node) final
    {
-    const loco::DataType S32 = loco::DataType::S32;
+    return infer_resize_nearest_neighbor(node);
+  }
  
+  loco::NodeShape visit(const luci::CircleReverseSequence *node) final
+  {
      auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-    // Support only input rank is 3 and 4
-    assert(input_shape.rank() == 3 || input_shape.rank() == 4);
-
-    // Only support block_shape() with S32 type CircleConst for now
-    auto const_block_shape = loco::must_cast<luci::CircleConst *>(node->block_shape());
-    LUCI_ASSERT(const_block_shape->dtype() == S32, "Only support int32 block_shape");
-
-    // Only support paddings() with S32 type CircleConst for now
-    auto const_paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
-    LUCI_ASSERT(const_paddings->dtype() == S32, "Only support int32 paddings");
-
-    auto const_block_shape_shape = loco::shape_get(const_block_shape).as<loco::TensorShape>();
-    auto const_paddings_shape = loco::shape_get(const_paddings).as<loco::TensorShape>();
-    assert(const_block_shape_shape.rank() == 1);
-    assert(const_paddings_shape.rank() == 2);
-
-    int32_t input_spatial_dim = input_shape.rank() - 2;
-    assert(const_block_shape_shape.dim(0) == input_spatial_dim);
-    assert(const_paddings_shape.dim(0) == input_spatial_dim);
-    assert(const_paddings_shape.dim(1) == 2);
-
-    // Check all values of block_shape >= 1
-    uint32_t ele_count = const_block_shape->size<S32>();
-    for (uint32_t e = 0; e < ele_count; ++e)
-    {
-      auto val = const_block_shape->at<S32>(e);
-      if (val < 1)
-      {
-        INTERNAL_EXN_V("All values of block_shape >= 1: ", e);
-      }
-    }
  
-    loco::TensorShape shape_output;
+    return loco::NodeShape{input_shape};
+  }
  
-    shape_output.rank(input_shape.rank());
+  loco::NodeShape visit(const luci::CircleRound *node) final { return use_x(node); }
  
-    int32_t output_batch_size = input_shape.dim(0).value();
-    for (int32_t dim = 0; dim < input_spatial_dim; ++dim)
-    {
-      int dim_size = input_shape.dim(dim + 1).value();
-      dim_size += const_paddings->at<S32>(dim * 2);
-      dim_size += const_paddings->at<S32>(dim * 2 + 1);
-      shape_output.dim(dim + 1) = dim_size / const_block_shape->at<S32>(dim);
+  loco::NodeShape visit(const luci::CircleReverseV2 *node) final
+  {
+    auto input_shape = loco::shape_get(node->tensor()).as<loco::TensorShape>();
  
-      assert(dim_size % const_block_shape->at<S32>(dim) == 0);
-      output_batch_size = output_batch_size * const_block_shape->at<S32>(dim);
-    }
-    shape_output.dim(0) = output_batch_size;
-    shape_output.dim(input_shape.rank() - 1) = input_shape.dim(input_shape.rank() - 1);
+    LUCI_ASSERT(loco::shape_get(node->axis()).as<loco::TensorShape>().rank() == 1,
+                "Tensor must be 1-D");
  
-    return loco::NodeShape{shape_output};
+    return loco::NodeShape{input_shape};
    }
  
-  loco::NodeShape visit(const luci::CircleSpaceToDepth *node) final
+  loco::NodeShape visit(const luci::CircleRsqrt *node) final { return use_x(node); }
+
+  loco::NodeShape visit(const luci::CircleScatterNd *node) final { return infer_scatter_nd(node); }
+
+  loco::NodeShape visit(const luci::CircleSegmentSum *node) final
    {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-    LUCI_ASSERT(input_shape.rank() == 4, "Only input rank 4 is supported");
+    return infer_segment_sum(node);
+  }
  
-    // Only data format NHWC is supported
-    int32_t height = input_shape.dim(1).value();
-    int32_t width = input_shape.dim(2).value();
-    int32_t depth = input_shape.dim(3).value();
+  loco::NodeShape visit(const luci::CircleSelect *node) final { return infer_select(node); }
  
-    int block_size = node->block_size();
+  loco::NodeShape visit(const luci::CircleSelectV2 *node) final { return infer_select_v2(node); }
  
-    if (block_size < 2)
-      INTERNAL_EXN("Block size must be >= 2");
+  loco::NodeShape visit(const luci::CircleShape *node) final { return infer_shape(node); }
  
-    if ((height % block_size) || (width % block_size))
-    {
-      INTERNAL_EXN("The input tensor's height and width must be divisible by block_size");
-    }
+  loco::NodeShape visit(const luci::CircleSin *node) final { return use_x(node); }
  
-    loco::TensorShape output_shape;
-    output_shape.rank(4);
+  loco::NodeShape visit(const luci::CircleSlice *node) final { return infer_slice(node); }
  
-    output_shape.dim(0) = input_shape.dim(0).value();
-    output_shape.dim(1) = height / block_size;
-    output_shape.dim(2) = width / block_size;
-    output_shape.dim(3) = block_size * block_size * depth;
+  loco::NodeShape visit(const luci::CircleSoftmax *node) final { return use_logits(node); }
  
-    return loco::NodeShape{output_shape};
+  loco::NodeShape visit(const luci::CircleSpaceToBatchND *node) final
+  {
+    return infer_space_to_batch_nd(node);
    }
  
-  loco::NodeShape visit(const luci::CircleSparseToDense *node) final
+  loco::NodeShape visit(const luci::CircleSpaceToDepth *node) final
    {
-    loco::TensorShape shape;
-    {
-      LUCI_ASSERT(node->output_shape(), "dims input should not be nullptr");
-
-      auto output_shape_node = dynamic_cast<luci::CircleConst *>(node->output_shape());
-      if (output_shape_node != nullptr)
-      {
-        // Only support node with S32
-        LUCI_ASSERT(output_shape_node->dtype() == loco::DataType::S32,
-                    "Only support int32 CircleConst");
-
-        if (output_shape_node->rank() != 1)
-          INTERNAL_EXN_V("Only support rank 1 CircleConst",
-                         oops::to_uint32(output_shape_node->rank()));
-
-        shape.rank(output_shape_node->dim(0).value());
-
-        for (uint32_t axis = 0; axis < shape.rank(); ++axis)
-        {
-          shape.dim(axis) = output_shape_node->at<loco::DataType::S32>(axis);
-        }
-      }
-      else
-      {
-        shape = own_shape(node);
-      }
-    }
+    return infer_space_to_depth(node);
+  }
  
-    return loco::NodeShape{shape};
+  loco::NodeShape visit(const luci::CircleSparseToDense *node) final
+  {
+    return infer_sparse_to_dense(node);
    }
  
    loco::NodeShape visit(const luci::CircleSplit *node) final
@@ -1692,71 +2340,10 @@ public:
  
    loco::NodeShape visit(const luci::CircleStridedSlice *node) final
    {
-    auto begin_node = dynamic_cast<luci::CircleConst *>(node->begin());
-    auto end_node = dynamic_cast<luci::CircleConst *>(node->end());
-    auto strides_node = dynamic_cast<luci::CircleConst *>(node->strides());
-
-    if (begin_node == nullptr || end_node == nullptr || strides_node == nullptr)
-    {
-      return use_own(node);
-    }
-
-    loco::TensorShape shape = infer_output_shape(node);
-    return loco::NodeShape{shape};
+    return infer_strided_slice(node);
    }
  
-  loco::NodeShape visit(const luci::CircleSqueeze *node) final
-  {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-
-    // TODO input shape may be unknown before runtime
-    std::vector<bool> do_squeeze(input_shape.rank(), false);
-    uint32_t num_squeezed = 0;
-
-    if (!node->squeeze_dims().empty())
-    {
-      // SqueezeDims not empty, squeeze only dims specified
-      for (int32_t raw_dim : node->squeeze_dims())
-      {
-        int32_t dim = raw_dim < 0 ? raw_dim + input_shape.rank() : raw_dim;
-
-        if (dim < 0 || static_cast<uint32_t>(dim) >= input_shape.rank() ||
-            input_shape.dim(dim).value() != 1)
-        {
-          INTERNAL_EXN("invalid dimention specified to Squeeze");
-        }
-
-        if (!do_squeeze[dim])
-          ++num_squeezed;
-        do_squeeze[dim] = true;
-      }
-    }
-    else
-    {
-      // SqueezeDims empty, squeeze any dims with size == 1
-      for (uint32_t dim = 0; dim < input_shape.rank(); ++dim)
-      {
-        if (input_shape.dim(dim) == 1)
-        {
-          do_squeeze[dim] = true;
-          ++num_squeezed;
-        }
-      }
-    }
-
-    loco::TensorShape output_shape;
-    output_shape.rank(input_shape.rank() - num_squeezed);
-
-    for (uint32_t in_dim = 0, out_dim = 0; in_dim < input_shape.rank(); ++in_dim)
-    {
-      if (!do_squeeze[in_dim])
-      {
-        output_shape.dim(out_dim++) = input_shape.dim(in_dim);
-      }
-    }
-
-    return loco::NodeShape{output_shape};
-  }
+  loco::NodeShape visit(const luci::CircleSqueeze *node) final { return infer_squeeze(node); }
  
    loco::NodeShape visit(const luci::CircleSub *node) final { return broadcast_xy(node); }
  
@@ -1768,33 +2355,7 @@ public:
  
    loco::NodeShape visit(const luci::CircleTanh *node) final { return use_x(node); }
  
-  loco::NodeShape visit(const luci::CircleTile *node) final
-  {
-    const loco::DataType S32 = loco::DataType::S32;
-
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-    auto multiples = loco::must_cast<luci::CircleConst *>(node->multiples());
-
-    // TODO support non-const case
-    // TODO support S64 type
-    LUCI_ASSERT(multiples->dtype() == S32, "Only support int32 multiples");
-    LUCI_ASSERT(multiples->rank() == 1, "multiples should be rank 1")
-
-    uint32_t n = multiples->dim(0).value();
-
-    LUCI_ASSERT(n == input_shape.rank(), "length of multiples should be the same with input rank");
-
-    loco::TensorShape output_shape;
-
-    output_shape.rank(input_shape.rank());
-    for (uint32_t ni = 0; ni < n; ++ni)
-    {
-      int32_t multiple = multiples->at<S32>(ni);
-      output_shape.dim(ni) = input_shape.dim(ni).value() * static_cast<uint32_t>(multiple);
-    }
-
-    return loco::NodeShape{output_shape};
-  }
+  loco::NodeShape visit(const luci::CircleTile *node) final { return infer_tile(node); }
  
    loco::NodeShape visit(const luci::CircleTopKV2 *node) final
    {
@@ -1803,93 +2364,16 @@ public:
      return loco::NodeShape{input_shape};
    }
  
-  loco::NodeShape visit(const luci::CircleTranspose *node) final
-  {
-    auto input_shape = loco::shape_get(node->a()).as<loco::TensorShape>();
-
-    auto perm_node = loco::must_cast<luci::CircleConst *>(node->perm());
-
-    loco::TensorShape output_shape;
-    output_shape.rank(input_shape.rank());
-
-    assert(perm_node->dtype() == loco::DataType::S32);
-    assert(input_shape.rank() == perm_node->template size<loco::DataType::S32>());
-
-    for (uint32_t out_axis = 0; out_axis < output_shape.rank(); out_axis++)
-    {
-      auto in_axis = perm_node->template at<loco::DataType::S32>(out_axis);
-      output_shape.dim(out_axis) = input_shape.dim(in_axis);
-    }
-
-    return output_shape;
-  }
-
-  loco::NodeShape visit(const luci::CircleUnique *node) final
-  {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-
-    assert(input_shape.rank() == 1);
-
-    loco::TensorShape shape_output;
-    shape_output = own_shape(node);
-
-    return loco::NodeShape{shape_output};
-  }
+  loco::NodeShape visit(const luci::CircleTranspose *node) final { return infer_transpose(node); }
  
    loco::NodeShape visit(const luci::CircleTransposeConv *node) final
    {
-    // TransposeConv's output shape is written in its 'inputSizes' argument
-    auto input_sizes_const = loco::must_cast<luci::CircleConst *>(node->inputSizes());
-    // TODO support non-const type
-    LUCI_ASSERT(input_sizes_const->dtype() == loco::DataType::S32, "Only support S32 dtype")
-    LUCI_ASSERT(input_sizes_const->rank() == 1 && input_sizes_const->dim(0).value() == 4,
-                "Only support rank 1 with 4 entries")
-
-    loco::TensorShape shape;
-
-    shape.rank(4);
-    for (uint32_t axis = 0; axis < 4; ++axis)
-      shape.dim(axis) = input_sizes_const->at<loco::DataType::S32>(axis);
-
-    return loco::NodeShape{shape};
+    return infer_transpose_conv(node);
    }
  
-  loco::NodeShape visit(const luci::CircleUnpack *node) final
-  {
-    // CircleUnpack provides list(array) of Tensors which has one less dimension of the input
-    // We'll set shape of CircleUnpack to shape of actual outputs
-    // TODO fix this if any problem rises
-    auto value_shape = loco::shape_get(node->value()).as<loco::TensorShape>();
-
-    auto axis = node->axis();
-    auto num = node->num();
-    auto rank = static_cast<int32_t>(value_shape.rank());
-
-    if (rank == 0)
-    {
-      // Unknown shape
-      return use_own(node);
-    }
-
-    LUCI_ASSERT(-rank <= axis && axis < rank, "Axis is out of range");
-
-    if (axis < 0)
-      axis += rank;
-
-    LUCI_ASSERT(num == static_cast<int32_t>(value_shape.dim(axis).value()),
-                "num, axis maybe incorrect");
-
-    loco::TensorShape output_shape;
-    output_shape.rank(rank - 1);
-
-    for (int32_t i = 0, o = 0; i < rank; ++i)
-    {
-      if (i != axis)
-        output_shape.dim(o++) = value_shape.dim(i);
-    }
+  loco::NodeShape visit(const luci::CircleUnpack *node) final { return infer_unpack(node); }
  
-    return loco::NodeShape{output_shape};
-  }
+  loco::NodeShape visit(const luci::CircleUnique *node) final { return infer_unique(node); }
  
    loco::NodeShape visit(const luci::CircleWhere *node) final { return use_own(node); }
  
@@ -1911,57 +2395,10 @@ public:
    // Circle Only
    loco::NodeShape visit(const luci::CircleBCQFullyConnected *node) final
    {
-    loco::TensorShape out_shape;
-
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-    auto weights_clusters = loco::must_cast<luci::CircleConst *>(node->weights_clusters());
-
-    LUCI_ASSERT(input_shape.rank() == 2, "Input rank of BCQFullyConnected should be 2");
-
-    int32_t qbits_sum = 0;
-    for (uint32_t i = 0; i < weights_clusters->dim(0).value(); ++i)
-    {
-      qbits_sum += weights_clusters->at<loco::DataType::S32>(i * 2 + 1);
-    }
-
-    out_shape.rank(2);
-    out_shape.dim(0) = qbits_sum;
-    out_shape.dim(1) = input_shape.dim(1);
-
-    return loco::NodeShape{out_shape};
+    return infer_bcq_fully_connected(node);
    }
  
-  loco::NodeShape visit(const luci::CircleBCQGather *node) final
-  {
-    loco::TensorShape input_shape;
-    loco::TensorShape output_shape;
-
-    const auto input_binary_shape = loco::shape_get(node->input_binary()).as<loco::TensorShape>();
-    const auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
-    auto axis = node->axis();
-
-    auto input_clusters = loco::must_cast<luci::CircleConst *>(node->input_clusters());
-    auto qbits_sum = 0;
-    for (uint32_t i = 0; i < input_clusters->dim(0).value(); ++i)
-    {
-      qbits_sum += input_clusters->at<loco::DataType::S32>(i * 2 + 1);
-    }
-
-    input_shape.rank(2);
-    input_shape.dim(0) = qbits_sum;
-    input_shape.dim(1) = input_binary_shape.dim(1).value() * 32;
-
-    output_shape.rank(input_shape.rank() - 1 + indices_shape.rank());
-    int32_t outdim_index = 0;
-    for (int32_t i = 0; i < axis; ++i)
-      output_shape.dim(outdim_index++) = input_shape.dim(i);
-    for (uint32_t i = 0; i < indices_shape.rank(); ++i)
-      output_shape.dim(outdim_index++) = indices_shape.dim(i);
-    for (uint32_t i = axis + 1; i < input_shape.rank(); ++i)
-      output_shape.dim(outdim_index++) = input_shape.dim(i);
-
-    return loco::NodeShape{output_shape};
-  }
+  loco::NodeShape visit(const luci::CircleBCQGather *node) final { return infer_bcq_gather(node); }
  
    loco::NodeShape visit(const luci::CircleInstanceNorm *node) final
    {
@@ -1971,25 +2408,9 @@ public:
    }
  
    // Virtual
-  loco::NodeShape visit(const luci::CircleInput *node) final
-  {
-    loco::TensorShape shape;
-
-    shape.rank(node->rank());
-    for (uint32_t axis = 0; axis < node->rank(); axis++)
-      shape.dim(axis) = node->dim(axis);
-
-    return loco::NodeShape{shape};
-  }
+  loco::NodeShape visit(const luci::CircleInput *node) final { return infer_input(node); }
  
-  loco::NodeShape visit(const luci::CircleOutput *node) final
-  {
-    auto graph_outputs = node->graph()->outputs();
-    auto graph_output = graph_outputs->at(node->index());
-    auto output_shape = graph_output->shape();
-
-    return loco::NodeShape{*output_shape};
-  }
+  loco::NodeShape visit(const luci::CircleOutput *node) final { return infer_output(node); }
  
    loco::NodeShape visit(const luci::CircleOutputDummy *node) final { return use_own(node); }
  
@@ -1997,259 +2418,32 @@ public:
  
    loco::NodeShape visit(const luci::CircleCustomOut *node) final { return use_own(node); }
  
-  loco::NodeShape visit(const luci::CircleIfOut *node) final
-  {
-    /**
-     * @note  IF operator type and shape are that of the "then" and "else"
-     *        Graph Outputs.
-     */
-    auto circle_if = dynamic_cast<const luci::CircleIf *>(node->input());
-    if (circle_if == nullptr)
-    {
-      INTERNAL_EXN("CircleIf IR is not configured correctly");
-    }
-
-    auto index = node->index();
-    auto then_graph = circle_if->then_graph();
-    auto else_graph = circle_if->else_graph();
-    assert(then_graph != nullptr);
-    assert(else_graph != nullptr);
-
-    // shape and type are assumed to be same
-    // these are checked at post_import_graph() in Import
-    auto then_outputs = loco::output_nodes(then_graph);
-    auto else_outputs = loco::output_nodes(else_graph);
-    assert(then_outputs.size() == else_outputs.size());
-    assert(index < static_cast<int32_t>(then_outputs.size()));
-
-    auto then_out = loco::must_cast<luci::CircleOutput *>(then_outputs.at(index));
-    auto else_out = loco::must_cast<luci::CircleOutput *>(else_outputs.at(index));
-
-    auto then_graph_outputs = then_graph->outputs(); // loco::GraphOutput items
-    auto else_graph_outputs = else_graph->outputs();
-    assert(then_graph_outputs->size() == else_graph_outputs->size());
-
-    auto then_graph_output = then_graph_outputs->at(then_out->index());
-    auto else_graph_output = else_graph_outputs->at(else_out->index());
-    (void)else_graph_output; // make compiler happy for unused variable warnings
-    assert(*then_graph_output->shape() == *else_graph_output->shape());
-
-    return loco::NodeShape{*then_graph_output->shape()};
-  }
+  loco::NodeShape visit(const luci::CircleIfOut *node) final { return infer_if_out(node); }
  
    loco::NodeShape visit(const luci::CircleNonMaxSuppressionV4Out *node) final
    {
-    const loco::DataType S32 = loco::DataType::S32;
-
-    auto nmsv4 = dynamic_cast<const luci::CircleNonMaxSuppressionV4 *>(node->input());
-    if (nmsv4 == nullptr)
-      INTERNAL_EXN("CircleNonMaxSuppressionV4 IR is not configured correctly");
-
-    auto index = node->index();
-    if (index == 1)
-      return loco::TensorShape({0});
-
-    assert(index == 0);
-
-    auto unknown = loco::TensorShape{loco::Dimension()};
-    auto max_output_size = dynamic_cast<const luci::CircleConst *>(nmsv4->max_output_size());
-    if (max_output_size == nullptr)
-      return unknown; // we need CircleConst for max output size
-
-    LUCI_ASSERT(max_output_size->dtype() == S32, "Only support int32 for max_output_size");
-
-    if (max_output_size->size<S32>() < 1)
-      return unknown;
-
-    auto max_output_size_value = uint32_t(max_output_size->at<S32>(0));
-    return loco::TensorShape{max_output_size_value};
+    return infer_non_max_suppression_v4_out(node);
    }
  
-  loco::NodeShape visit(const luci::CircleSplitOut *node) final
+  loco::NodeShape visit(const luci::CircleNonMaxSuppressionV5Out *node) final
    {
-    const loco::DataType S32 = loco::DataType::S32;
-
-    auto split = dynamic_cast<const luci::CircleSplit *>(node->input());
-    if (split == nullptr)
-      INTERNAL_EXN("CircleSplit IR is not configured correctly");
-
-    loco::NodeShape unknown;
-
-    auto split_shape = loco::shape_get(split).as<loco::TensorShape>();
-
-    auto split_dim = dynamic_cast<const luci::CircleConst *>(split->split_dim());
-    if (split_dim == nullptr)
-      return unknown; // we need CircleConst for split_dim
-    LUCI_ASSERT(split_dim->dtype() == S32, "Only support int32 for split_dim");
-
-    assert(split_dim->size<S32>() == 1);
-    auto split_dim_axis = split_dim->at<S32>(0);
-    if (split_dim_axis < 0)
-      split_dim_axis += split_shape.rank();
-
-    auto split_dim_value = split_shape.dim(split_dim_axis).value();
-    assert(split_dim_value % split->num_split() == 0);
-    const int split_depth = split_dim_value / split->num_split();
-
-    loco::TensorShape output_shape = split_shape;
-
-    // All shapes are equally same
-    output_shape.dim(split_dim_axis) = loco::Dimension(split_depth);
-
-    return loco::NodeShape{output_shape};
+    return infer_non_max_suppression_v5_out(node);
    }
  
-  loco::NodeShape visit(const luci::CircleSplitVOut *node) final
-  {
-    const loco::DataType S32 = loco::DataType::S32;
-
-    auto split = dynamic_cast<const luci::CircleSplitV *>(node->input());
-    if (split == nullptr)
-      INTERNAL_EXN("CircleSplit IR is not configured correctly");
-
-    loco::NodeShape unknown;
-
-    auto split_shape = loco::shape_get(split).as<loco::TensorShape>();
-
-    auto size_splits = dynamic_cast<const luci::CircleConst *>(split->size_splits());
-    if (size_splits == nullptr)
-      return unknown; // we need CircleConst for size_splits
-    LUCI_ASSERT(size_splits->dtype() == S32, "Only support int32 for size_splits");
-
-    auto split_dim = dynamic_cast<const luci::CircleConst *>(split->split_dim());
-    if (split_dim == nullptr)
-      return unknown; // we need CircleConst for split_dim
-    LUCI_ASSERT(split_dim->dtype() == S32, "Only support int32 for split_dim");
+  loco::NodeShape visit(const luci::CircleSplitOut *node) final { return infer_split_out(node); }
  
-    // fetch axis
-    assert(split_dim->size<S32>() == 1);
-    auto split_dim_axis = split_dim->at<S32>(0);
-    if (split_dim_axis < 0)
-      split_dim_axis += split_shape.rank();
-
-    // interpret size_splits values
-    int32_t size_splits_count = static_cast<int32_t>(size_splits->size<S32>());
-    assert(size_splits_count == split->num_split());
-
-    int64_t minus_one_count = 0, size_splits_sum = 0;
-    for (int32_t idx = 0; idx < size_splits_count; ++idx)
-    {
-      auto size = size_splits->at<S32>(idx);
-      assert(size >= -1);
-      if (size == -1)
-        ++minus_one_count;
-      else
-        size_splits_sum += size;
-    }
-    if (minus_one_count > 1)
-      INTERNAL_EXN("CircleSplitV size_splits has more than two -1 values");
-
-    // calcuate this SplitVOut shape
-    auto input_size = split_shape.dim(split_dim_axis).value();
-    assert(size_splits_sum <= input_size);
-
-    auto index_this = node->index();
-    assert(0 <= index_this && index_this < split->num_split());
-    auto split_depth = size_splits->at<S32>(index_this);
-    if (split_depth == -1)
-      split_depth = input_size - size_splits_sum;
-
-    loco::TensorShape output_shape = split_shape;
-
-    output_shape.dim(split_dim_axis) = loco::Dimension(split_depth);
-
-    return loco::NodeShape{output_shape};
-  }
+  loco::NodeShape visit(const luci::CircleSplitVOut *node) final { return infer_split_v_out(node); }
  
    loco::NodeShape visit(const luci::CircleTopKV2Out *node) final
    {
-    const loco::DataType S32 = loco::DataType::S32;
-
-    auto topkv2 = dynamic_cast<const luci::CircleTopKV2 *>(node->input());
-    if (topkv2 == nullptr)
-      INTERNAL_EXN("CircleSplit IR is not configured correctly");
-
-    // shape of topkv2 is same as topkv2->input()
-    auto input_shape = loco::shape_get(topkv2).as<loco::TensorShape>();
-
-    auto node_k = loco::must_cast<const luci::CircleConst *>(topkv2->k());
-    LUCI_ASSERT(node_k->dtype() == S32, "Only support Int32");
-    assert(node_k->size<S32>() == 1);
-
-    loco::TensorShape output_shape;
-
-    output_shape.rank(input_shape.rank());
-    for (uint32_t idx = 0; idx < input_shape.rank() - 1; ++idx)
-    {
-      output_shape.dim(idx) = input_shape.dim(idx);
-    }
-    output_shape.dim(input_shape.rank() - 1) = node_k->at<S32>(0);
-
-    return loco::NodeShape{output_shape};
-  }
-
-  loco::NodeShape visit(const luci::CircleUniqueOut *node) final
-  {
-    auto unique = dynamic_cast<const luci::CircleUnique *>(node->input());
-    if (unique == nullptr)
-    {
-      INTERNAL_EXN("CircleUnique IR is not configured correctly");
-    }
-
-    auto unique_shape = loco::shape_get(unique).as<loco::TensorShape>();
-
-    return loco::NodeShape{unique_shape};
-  }
-
-  loco::NodeShape visit(const luci::CircleUnpackOut *node) final
-  {
-    auto unpack = dynamic_cast<const luci::CircleUnpack *>(node->input());
-    if (unpack == nullptr)
-    {
-      INTERNAL_EXN("CircleUnpack IR is not configured correctly");
-    }
-
-    auto unpack_shape = loco::shape_get(unpack).as<loco::TensorShape>();
-
-    return loco::NodeShape{unpack_shape};
+    return infer_top_k_v2_out(node);
    }
  
-  loco::NodeShape visit(const luci::CircleWhileOut *node) final
-  {
-    /**
-     * @note  WHILE operator's shape is the same with the "cond"
-     *        Graph input.
-     */
-    auto circle_while = dynamic_cast<const luci::CircleWhile *>(node->input());
-    if (circle_while == nullptr)
-    {
-      INTERNAL_EXN("CircleWhile IR is not configured correctly");
-    }
-
-    auto index = node->index();
-    auto cond_graph = circle_while->cond_graph();
-    assert(cond_graph != nullptr);
-
-    // Assumption: the index of CircleWhileOut matches with the index of input nodes returned by
-    // loco::input_nodes
-    auto cond_inputs = loco::input_nodes(cond_graph);
-    auto cond_in = loco::must_cast<luci::CircleInput *>(cond_inputs.at(index));
-
-    auto cond_graph_inputs = cond_graph->inputs();
-    auto cond_graph_input = cond_graph_inputs->at(cond_in->index());
+  loco::NodeShape visit(const luci::CircleUniqueOut *node) final { return infer_unique_out(node); }
  
-    auto cond_graph_input_shape = *cond_graph_input->shape();
-    auto this_shape = own_shape(node);
+  loco::NodeShape visit(const luci::CircleUnpackOut *node) final { return infer_unpack_out(node); }
  
-    if (!(this_shape == cond_graph_input_shape))
-    {
-      LOGGER(l);
-      WARN(l) << "Warning: CircleWhileOut '" << node->name() << "' shape mispatch " << this_shape
-              << " vs " << cond_graph_input_shape;
-    }
-
-    return loco::NodeShape{this_shape};
-  }
+  loco::NodeShape visit(const luci::CircleWhileOut *node) final { return infer_while_out(node); }
  };
  
  } // namespace
diff --git a/compiler/luci/service/src/CircleTypeInferenceRule.cpp b/compiler/luci/service/src/CircleTypeInferenceRule.cpp

index e7910bf..d28d8ac 100644 (file)
--- a/compiler/luci/service/src/CircleTypeInferenceRule.cpp
+++ b/compiler/luci/service/src/CircleTypeInferenceRule.cpp
@@ -257,6 +257,11 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
      return loco::dtype_get(node->boxes());
    }
  
+  loco::DataType visit(const luci::CircleNonMaxSuppressionV5 *node) final
+  {
+    return loco::dtype_get(node->boxes());
+  }
+
    loco::DataType visit(const luci::CircleNotEqual *) final { return loco::DataType::BOOL; }
  
    loco::DataType visit(const luci::CirclePack *node) final
@@ -273,6 +278,11 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
  
    loco::DataType visit(const luci::CirclePad *node) final { return loco::dtype_get(node->input()); }
  
+  loco::DataType visit(const luci::CirclePadV2 *node) final
+  {
+    return loco::dtype_get(node->input());
+  }
+
    loco::DataType visit(const luci::CirclePow *node) final
    {
      // TODO make sure types cannot differ
@@ -589,6 +599,17 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
      return loco::DataType::S32;
    }
  
+  loco::DataType visit(const luci::CircleNonMaxSuppressionV5Out *node) final
+  {
+    (void)node;
+    if (node->index() == 0 || node->index() == 2)
+    {
+      return loco::DataType::S32;
+    }
+    assert(node->index() == 1);
+    return loco::DataType::FLOAT32;
+  }
+
    loco::DataType visit(const luci::CircleSplitOut *node) final
    {
      return loco::dtype_get(node->input());
diff --git a/compiler/luci/tests/test.lst b/compiler/luci/tests/test.lst

index 9fd42ed..12dd7ff 100644 (file)
--- a/compiler/luci/tests/test.lst
+++ b/compiler/luci/tests/test.lst
@@ -96,6 +96,10 @@ addread(MirrorPad_000)
  addread(Mul_000)
  addread(Mul_U8_000)
  addread(Neg_000)
+addread(NonMaxSuppressionV4_000)
+addread(NonMaxSuppressionV4_001)
+addread(NonMaxSuppressionV5_000)
+addread(NonMaxSuppressionV5_001)
  addread(NotEqual_000)
  addread(OneHot_000)
  addread(OneHot_001)
@@ -105,6 +109,7 @@ addread(Pack_000)
  addread(Pack_U8_000)
  addread(Pad_000)
  addread(Pad_U8_000)
+addread(PadV2_000)
  addread(Pow_000)
  addread(PRelu_000)
  addread(Range_000)
@@ -128,6 +133,7 @@ addread(Reshape_002)
  addread(Reshape_003)
  addread(Reshape_U8_000)
  addread(ResizeBilinear_000)
+addread(ResizeBilinear_U8_000)
  addread(ResizeNearestNeighbor_000)
  addread(ReverseSequence_000)
  addread(ReverseV2_000)
@@ -151,6 +157,7 @@ addread(SpaceToBatchND_001)
  addread(SpaceToBatchND_002)
  addread(SpaceToBatchND_003)
  addread(SpaceToDepth_000)
+addread(SpaceToDepth_U8_000)
  addread(SparseToDense_000)
  addread(Split_000)
  addread(SplitV_000)
@@ -166,12 +173,19 @@ addread(Sub_U8_000)
  addread(Sum_000)
  addread(Sum_001)
  addread(Tanh_000)
+addread(Tanh_U8_000)
  addread(Tile_000)
  addread(Tile_U8_000)
  addread(TopKV2_000)
  addread(TopKV2_001)
  addread(Transpose_000)
  addread(TransposeConv_000)
+addread(Unique_000)
+addread(Unique_001)
+addread(Unique_002)
+addread(Unique_003)
+addread(Unique_U8_000)
+addread(Unique_U8_001)
  addread(Unpack_000)
  addread(Unpack_001)
  addread(Unpack_002)
@@ -296,6 +310,10 @@ addwrite(MirrorPad_000)
  addwrite(Mul_000)
  addwrite(Mul_U8_000)
  addwrite(Neg_000)
+addwrite(NonMaxSuppressionV4_000)
+addwrite(NonMaxSuppressionV4_001)
+addwrite(NonMaxSuppressionV5_000)
+addwrite(NonMaxSuppressionV5_001)
  addwrite(NotEqual_000)
  addwrite(OneHot_000)
  addwrite(OneHot_001)
@@ -304,6 +322,7 @@ addwrite(OneHot_003)
  addwrite(Pack_000)
  addwrite(Pack_U8_000)
  addwrite(Pad_000)
+addwrite(PadV2_000)
  addwrite(Pow_000)
  addwrite(PRelu_000)
  addwrite(Range_000)
@@ -327,6 +346,7 @@ addwrite(Reshape_002)
  addwrite(Reshape_003)
  addwrite(Reshape_U8_000)
  addwrite(ResizeBilinear_000)
+addwrite(ResizeBilinear_U8_000)
  addwrite(ResizeNearestNeighbor_000)
  addwrite(ReverseSequence_000)
  addwrite(ReverseV2_000)
@@ -350,6 +370,7 @@ addwrite(SpaceToBatchND_001)
  addwrite(SpaceToBatchND_002)
  addwrite(SpaceToBatchND_003)
  addwrite(SpaceToDepth_000)
+addwrite(SpaceToDepth_U8_000)
  addwrite(SparseToDense_000)
  addwrite(Split_000)
  addwrite(SplitV_000)
@@ -365,12 +386,19 @@ addwrite(Sub_U8_000)
  addwrite(Sum_000)
  addwrite(Sum_001)
  addwrite(Tanh_000)
+addwrite(Tanh_U8_000)
  addwrite(Tile_000)
  addwrite(Tile_U8_000)
  addwrite(TopKV2_000)
  addwrite(TopKV2_001)
  addwrite(Transpose_000)
  addwrite(TransposeConv_000)
+addwrite(Unique_000)
+addwrite(Unique_001)
+addwrite(Unique_002)
+addwrite(Unique_003)
+addwrite(Unique_U8_000)
+addwrite(Unique_U8_001)
  addwrite(Unpack_000)
  addwrite(Unpack_001)
  addwrite(Unpack_002)
diff --git a/compiler/one-cmds/one-import-tf b/compiler/one-cmds/one-import-tf

index d59e1c5..58c6868 100644 (file)
--- a/compiler/one-cmds/one-import-tf
+++ b/compiler/one-cmds/one-import-tf
@@ -83,6 +83,10 @@ while [ "$#" -ne 0 ]; do
    esac
  done
  
+if [ -n ${INPUT_SHAPES} ] && [ ${TF_INTERFACE} = "--v2" ]; then
+  echo "Warning: if --v2 option is used, shape will be ignored"
+fi
+
  if [ -z ${INPUT_PATH} ] || [ ! -e ${INPUT_PATH} ]; then
    echo "Error: input model not found"
    echo ""
@@ -117,16 +121,18 @@ show_err_onexit()
  trap show_err_onexit ERR
  
  # generate temporary tflite file
-echo "python" "${DRIVER_PATH}/tf2tfliteV2.py" ${TF_INTERFACE} --input_path ${INPUT_PATH} \
---input_arrays ${INPUT_ARRAYS} --input_shapes ${INPUT_SHAPES} \
---output_path "${TMPDIR}/${MODEL_NAME}.tflite" \
---output_arrays ${OUTPUT_ARRAYS} > "${OUTPUT_PATH}.log"
-echo " " >> "${OUTPUT_PATH}.log"
+CONVERT_SCRIPT="python ${DRIVER_PATH}/tf2tfliteV2.py ${TF_INTERFACE} "
+CONVERT_SCRIPT+="--input_path ${INPUT_PATH} "
+CONVERT_SCRIPT+="--input_arrays ${INPUT_ARRAYS} "
+CONVERT_SCRIPT+="--output_path ${TMPDIR}/${MODEL_NAME}.tflite "
+CONVERT_SCRIPT+="--output_arrays ${OUTPUT_ARRAYS} "
+if [ ! -z ${INPUT_SHAPES} ]; then
+  CONVERT_SCRIPT+="--input_shapes ${INPUT_SHAPES} "
+fi
  
-python "${DRIVER_PATH}/tf2tfliteV2.py" ${TF_INTERFACE} --input_path ${INPUT_PATH} \
---input_arrays ${INPUT_ARRAYS} --input_shapes ${INPUT_SHAPES} \
---output_path "${TMPDIR}/${MODEL_NAME}.tflite" \
---output_arrays ${OUTPUT_ARRAYS} >> "${OUTPUT_PATH}.log" 2>&1
+echo ${CONVERT_SCRIPT} > "${OUTPUT_PATH}.log"
+echo "" >> "${OUTPUT_PATH}.log"
+$CONVERT_SCRIPT >> "${OUTPUT_PATH}.log" 2>&1
  
  # convert .tflite to .circle
  echo " " >> "${OUTPUT_PATH}.log"
diff --git a/compiler/one-cmds/one-prepare-venv b/compiler/one-cmds/one-prepare-venv

index 0a53bd3..0b11e7f 100644 (file)
--- a/compiler/one-cmds/one-prepare-venv
+++ b/compiler/one-cmds/one-prepare-venv
@@ -46,7 +46,9 @@ python3 -m venv "${DRIVER_PATH}/venv"
  # Install tensorflow
  source "${VENV_ACTIVATE}"
  
+# TODO remove version number of 'pip==20.2.1 setuptools==49.3.0'
+# NOTE adding version is for temporary hotfix of setuptools 50.x.y version
  python -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
-  install -U pip setuptools
+  install -U pip==20.2.1 setuptools==49.3.0
  python -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
    install tensorflow-cpu==2.3.0
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/fake_quantization/ker.json

new file mode 100644 (file)

index 0000000..6460e54
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/fake_quantization/ker.json
@@ -0,0 +1,48 @@
+{
+  "weights": [
+    [
+      [
+        [
+          1.0,
+          2.0
+        ],
+        [
+          -3.0,
+          -4.0
+        ]
+      ],
+      [
+        [
+          -5.0,
+          6.0
+        ],
+        [
+          -7.0,
+          8.0
+        ]
+      ]
+    ],
+    [
+      [
+        [
+          4.0,
+          -2.0
+        ],
+        [
+          3.0,
+          -1.0
+        ]
+      ],
+      [
+        [
+          -8.0,
+          -6.0
+        ],
+        [
+          7.0,
+          5.0
+        ]
+      ]
+    ]
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/bias.json

new file mode 100644 (file)

index 0000000..a55af0b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/bias.json
@@ -0,0 +1,10 @@
+{
+  "weights": [
+    4374,
+    8747
+  ],
+  "scale": [
+    0.0002286423499283808,
+    0.0002286423499283808
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ifm.json

new file mode 100644 (file)

index 0000000..0e481bb
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ifm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0038869199343025684,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ker.json

new file mode 100644 (file)

index 0000000..4e12a55
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ker.json
@@ -0,0 +1,64 @@
+{
+  "weights": [
+    [
+      [
+        [
+          136,
+          153
+        ],
+        [
+          68,
+          51
+        ]
+      ],
+      [
+        [
+          34,
+          221
+        ],
+        [
+          0,
+          255
+        ]
+      ]
+    ],
+    [
+      [
+        [
+          204,
+          102
+        ],
+        [
+          187,
+          119
+        ]
+      ],
+      [
+        [
+          0,
+          34
+        ],
+        [
+          255,
+          221
+        ]
+      ]
+    ]
+  ],
+  "scale": [
+    0.058823529411764705,
+    0.058823529411764705
+  ],
+  "zero_point": [
+    119.0,
+    136.0
+  ],
+  "min": [
+    -7.0,
+    -8.0
+  ],
+  "max": [
+    8.0,
+    7.0
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ofm.json

new file mode 100644 (file)

index 0000000..7d23cba
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.05829785391688347,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/record_minmax/ifm.json

new file mode 100644 (file)

index 0000000..af8dc16
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/record_minmax/ifm.json
@@ -0,0 +1,4 @@
+{
+  "min": 0.022708916887640953,
+  "max": 0.9911645770072937
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/record_minmax/ofm.json

new file mode 100644 (file)

index 0000000..5f7bd99
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/record_minmax/ofm.json
@@ -0,0 +1,4 @@
+{
+  "min": 0.0,
+  "max": 14.86595230102539
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/fake_quantization/ker.json

new file mode 100644 (file)

index 0000000..675eadc
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/fake_quantization/ker.json
@@ -0,0 +1,34 @@
+{
+  "weights": [
+    [
+      [
+        [
+          1.0352935791015625,
+          1.976470947265625,
+          2.9568634033203125,
+          3.95294189453125
+        ],
+        [
+          -8.972549438476562,
+          9.976470947265625,
+          -11.011764526367188,
+          11.9686279296875
+        ]
+      ],
+      [
+        [
+          5.0039215087890625,
+          6.023530960083008,
+          7.035295486450195,
+          8.01568603515625
+        ],
+        [
+          13.027450561523438,
+          -14.023529052734375,
+          14.988235473632812,
+          -16.0313720703125
+        ]
+      ]
+    ]
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/bias.json

new file mode 100644 (file)

index 0000000..3cda452
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/bias.json
@@ -0,0 +1,14 @@
+{
+  "weights": [
+    2985,
+    5473,
+    7578,
+    9382
+  ],
+  "scale": [
+    0.0003349798455903035,
+    0.0003654325561959198,
+    0.00039588526680153606,
+    0.00042633797740715233
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ifm.json

new file mode 100644 (file)

index 0000000..97931cc
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ifm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.003882720833644271,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ker.json

new file mode 100644 (file)

index 0000000..add4d0f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ker.json
@@ -0,0 +1,58 @@
+{
+  "weights": [
+    [
+      [
+        [
+          116,
+          170,
+          137,
+          182
+        ],
+        [
+          0,
+          255,
+          0,
+          255
+        ]
+      ],
+      [
+        [
+          162,
+          213,
+          177,
+          219
+        ],
+        [
+          255,
+          0,
+          255,
+          0
+        ]
+      ]
+    ]
+  ],
+  "scale": [
+    0.08627450980392157,
+    0.09411764705882353,
+    0.10196078431372549,
+    0.10980392156862745
+  ],
+  "zero_point": [
+    104.0,
+    149.0,
+    108.0,
+    146.0
+  ],
+  "min": [
+    -8.972549019607843,
+    -14.023529411764706,
+    -11.011764705882353,
+    -16.031372549019608
+  ],
+  "max": [
+    13.027450980392157,
+    9.976470588235294,
+    14.988235294117647,
+    11.968627450980392
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ofm.json

new file mode 100644 (file)

index 0000000..f587aac
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.07756166160106659,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/record_minmax/ifm.json

new file mode 100644 (file)

index 0000000..fa8fffc
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/record_minmax/ifm.json
@@ -0,0 +1,4 @@
+{
+  "min": 0.003264044094830751,
+  "max": 0.9900938200950622
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/record_minmax/ofm.json

new file mode 100644 (file)

index 0000000..612c0b4
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/record_minmax/ofm.json
@@ -0,0 +1,4 @@
+{
+  "min": 0.0,
+  "max": 19.778222274780273
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/fake_quantization/weight.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/fake_quantization/weight.json

new file mode 100644 (file)

index 0000000..4661cb3
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/fake_quantization/weight.json
@@ -0,0 +1,76 @@
+{
+  "weights": [
+    [
+      1.0039215087890625,
+      2.007843017578125,
+      -3.0117650032043457,
+      -4.015686511993408,
+      -5.019608020782471,
+      6.023530006408691,
+      -7.027451515197754,
+      7.968626976013184,
+      4.015686988830566,
+      -2.007843017578125,
+      3.0117645263671875,
+      -1.0039215087890625,
+      -7.9686279296875,
+      -6.023530006408691,
+      7.027451515197754,
+      5.019608497619629
+    ],
+    [
+      1.0039215087890625,
+      2.007843017578125,
+      -3.0117650032043457,
+      -4.015686511993408,
+      -5.019608020782471,
+      6.023530006408691,
+      -7.027451515197754,
+      7.968626976013184,
+      4.015686988830566,
+      -2.007843017578125,
+      3.0117645263671875,
+      -1.0039215087890625,
+      -7.9686279296875,
+      -6.023530006408691,
+      7.027451515197754,
+      5.019608497619629
+    ],
+    [
+      1.0039215087890625,
+      2.007843017578125,
+      -3.0117650032043457,
+      -4.015686511993408,
+      -5.019608020782471,
+      6.023530006408691,
+      -7.027451515197754,
+      7.968626976013184,
+      4.015686988830566,
+      -2.007843017578125,
+      3.0117645263671875,
+      -1.0039215087890625,
+      -7.9686279296875,
+      -6.023530006408691,
+      7.027451515197754,
+      5.019608497619629
+    ],
+    [
+      1.0039215087890625,
+      2.007843017578125,
+      -3.0117650032043457,
+      -4.015686511993408,
+      -5.019608020782471,
+      6.023530006408691,
+      -7.027451515197754,
+      7.968626976013184,
+      4.015686988830566,
+      -2.007843017578125,
+      3.0117645263671875,
+      -1.0039215087890625,
+      -7.9686279296875,
+      -6.023530006408691,
+      7.027451515197754,
+      5.019608497619629
+    ]
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/bias.json

new file mode 100644 (file)

index 0000000..4333c0f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/bias.json
@@ -0,0 +1,14 @@
+{
+  "weights": [
+    4099,
+    -8199,
+    -12298,
+    16398
+  ],
+  "scale": [
+    0.00024393631821001058,
+    0.00024393631821001058,
+    0.00024393631821001058,
+    0.00024393631821001058
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/in.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/in.json

new file mode 100644 (file)

index 0000000..8edac1b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/in.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.003887734841555357,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/out.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/out.json

new file mode 100644 (file)

index 0000000..1b94f16
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/out.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.061938945204019547,
+  "zero_point": 171.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/weight.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/weight.json

new file mode 100644 (file)

index 0000000..5ee46c8
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/weight.json
@@ -0,0 +1,100 @@
+{
+  "weights": [
+    [
+      144,
+      160,
+      80,
+      64,
+      48,
+      224,
+      16,
+      255,
+      192,
+      96,
+      176,
+      112,
+      1,
+      32,
+      240,
+      208
+    ],
+    [
+      144,
+      160,
+      80,
+      64,
+      48,
+      224,
+      16,
+      255,
+      192,
+      96,
+      176,
+      112,
+      1,
+      32,
+      240,
+      208
+    ],
+    [
+      144,
+      160,
+      80,
+      64,
+      48,
+      224,
+      16,
+      255,
+      192,
+      96,
+      176,
+      112,
+      1,
+      32,
+      240,
+      208
+    ],
+    [
+      144,
+      160,
+      80,
+      64,
+      48,
+      224,
+      16,
+      255,
+      192,
+      96,
+      176,
+      112,
+      1,
+      32,
+      240,
+      208
+    ]
+  ],
+  "scale": [
+    0.06274509803921569,
+    0.06274509803921569,
+    0.06274509803921569,
+    0.06274509803921569
+  ],
+  "zero_point": [
+    128.0,
+    128.0,
+    128.0,
+    128.0
+  ],
+  "min": [
+    -8.031372549019608,
+    -8.031372549019608,
+    -8.031372549019608,
+    -8.031372549019608
+  ],
+  "max": [
+    7.968627450980392,
+    7.968627450980392,
+    7.968627450980392,
+    7.968627450980392
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/record_minmax/in.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/record_minmax/in.json

new file mode 100644 (file)

index 0000000..48e4645
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/record_minmax/in.json
@@ -0,0 +1,4 @@
+{
+  "min": 0.010438590832054616,
+  "max": 0.9913724160194397
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/record_minmax/out.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/record_minmax/out.json

new file mode 100644 (file)

index 0000000..ec83b94
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/record_minmax/out.json
@@ -0,0 +1,4 @@
+{
+  "min": -10.584291763305664,
+  "max": 5.210139312744141
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/fake_quantization/ker.json

new file mode 100644 (file)

index 0000000..76a0440
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/fake_quantization/ker.json
@@ -0,0 +1,48 @@
+{
+  "weights": [
+    [
+      [
+        [
+          0.960784912109375,
+          2.0588245391845703
+        ],
+        [
+          -3.0196075439453125,
+          -3.980391502380371
+        ],
+        [
+          4.9411773681640625,
+          -6.039215087890625
+        ]
+      ],
+      [
+        [
+          7.0,
+          7.960784912109375
+        ],
+        [
+          -9.058823585510254,
+          -10.019607543945312
+        ],
+        [
+          10.980392456054688,
+          -11.941176414489746
+        ]
+      ],
+      [
+        [
+          13.039216995239258,
+          14.000001907348633
+        ],
+        [
+          -14.960784912109375,
+          -16.05882453918457
+        ],
+        [
+          17.019607543945312,
+          -17.980392456054688
+        ]
+      ]
+    ]
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ifm.json

new file mode 100644 (file)

index 0000000..4c3669f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ifm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0038701011799275875,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ker.json

new file mode 100644 (file)

index 0000000..04e0648
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ker.json
@@ -0,0 +1,60 @@
+{
+  "weights": [
+    [
+      [
+        [
+          138,
+          146
+        ],
+        [
+          109,
+          102
+        ],
+        [
+          167,
+          87
+        ]
+      ],
+      [
+        [
+          182,
+          189
+        ],
+        [
+          65,
+          58
+        ],
+        [
+          211,
+          44
+        ]
+      ],
+      [
+        [
+          226,
+          233
+        ],
+        [
+          22,
+          14
+        ],
+        [
+          255,
+          0
+        ]
+      ]
+    ]
+  ],
+  "scale": [
+    0.13725490196078433
+  ],
+  "zero_point": [
+    131.0
+  ],
+  "min": [
+    -17.980392156862745
+  ],
+  "max": [
+    17.019607843137255
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ofm.json

new file mode 100644 (file)

index 0000000..2e17905
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.25486624240875244,
+  "zero_point": 178.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/record_minmax/ifm.json

new file mode 100644 (file)

index 0000000..d46844b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/record_minmax/ifm.json
@@ -0,0 +1,4 @@
+{
+  "min": 0.006121497452259064,
+  "max": 0.9868757891654968
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/record_minmax/ofm.json

new file mode 100644 (file)

index 0000000..4441f18
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/record_minmax/ofm.json
@@ -0,0 +1,4 @@
+{
+  "min": -45.46586318969727,
+  "max": 19.525028419494628
+}
diff --git a/compiler/pota-quantization-value-test/test.lst b/compiler/pota-quantization-value-test/test.lst

index 9eb3489..d9fd917 100644 (file)
--- a/compiler/pota-quantization-value-test/test.lst
+++ b/compiler/pota-quantization-value-test/test.lst
@@ -1,4 +1,8 @@
+addTest(Conv2D_004 channel uint8)
  addTest(Conv2D_004 layer uint8)
+addTest(DepthwiseConv2D_002 channel uint8)
  addTest(DepthwiseConv2D_002 layer uint8)
+addTest(FullyConnected_003 channel uint8)
  addTest(FullyConnected_003 layer uint8)
+addTest(TransposeConv_001 channel uint8)
  addTest(TransposeConv_001 layer uint8)
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/0.txt

new file mode 100644 (file)

index 0000000..98e895c
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/0.txt
@@ -0,0 +1 @@
+0.19242816,0.44059092,0.06788187,0.04543579,0.14106855,0.6858487 ,0.6214997 ,0.31582046,0.859484  ,0.3664256 ,0.86936104,0.871024  ,0.68752515,0.5296719 ,0.99137205,0.02956272,0.14838405,0.69830126,0.22359788,0.9060323 ,0.7141239 ,0.5573066 ,0.96645916,0.11426282
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/1.txt

new file mode 100644 (file)

index 0000000..f480f80
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/1.txt
@@ -0,0 +1 @@
+0.57016104,0.2788207 ,0.8045938 ,0.7589986 ,0.81506515,0.8411593 ,0.4162234 ,0.1664247 ,0.5584996 ,0.7799966 ,0.4213713 ,0.97587234,0.79440975,0.5089373 ,0.90030503,0.78015554,0.10080549,0.5115089 ,0.77238286,0.9580212 ,0.8758745 ,0.14367636,0.4304664 ,0.55175275
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/2.txt

new file mode 100644 (file)

index 0000000..683ea39
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/2.txt
@@ -0,0 +1 @@
+0.6224246 ,0.30448085,0.29629433,0.44483584,0.30473125,0.6186932 ,0.45563242,0.5394331 ,0.22901213,0.4313142 ,0.4019574 ,0.02263176,0.3806077 ,0.27828163,0.23962335,0.26323524,0.6125012 ,0.5459546 ,0.6340052 ,0.19074932,0.2216875 ,0.77709603,0.03312786,0.02945002
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/3.txt

new file mode 100644 (file)

index 0000000..56c8c25
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/3.txt
@@ -0,0 +1 @@
+0.7524557 ,0.5408983 ,0.07039106,0.5143847 ,0.04857475,0.7305833 ,0.36986747,0.42291477,0.90452653,0.43744263,0.24857366,0.7537328 ,0.04559262,0.65276045,0.3851062 ,0.49503985,0.37213495,0.10627239,0.7085863 ,0.1913133 ,0.08057284,0.31767172,0.9685745 ,0.5942544 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/4.txt

new file mode 100644 (file)

index 0000000..ecb221e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/4.txt
@@ -0,0 +1 @@
+0.16251074,0.5574537 ,0.5857036 ,0.877607  ,0.29711136,0.02456062,0.8250261 ,0.21300122,0.5064036 ,0.5882086 ,0.7736793 ,0.09394809,0.98618525,0.6611699 ,0.5001983 ,0.06507304,0.88984424,0.57143325,0.07953393,0.02649987,0.9283147 ,0.65522593,0.18371649,0.12332761
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/0.txt

new file mode 100644 (file)

index 0000000..f4fb503
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/0.txt
@@ -0,0 +1 @@
+0.4383064 ,0.8700848 ,0.86010957,0.08396256,0.7963264 ,0.4156023 ,0.28146362,0.82196397,0.9921972 ,0.09969576,0.23987265,0.6734369 ,0.5469574 ,0.20805728,0.32639247,0.76773816
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/1.txt

new file mode 100644 (file)

index 0000000..af4b015
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/1.txt
@@ -0,0 +1 @@
+0.4565062 ,0.92036587,0.47286046,0.18118097,0.5347498 ,0.91550153,0.300375  ,0.00581101,0.38686675,0.91085213,0.07278002,0.35556316,0.13014294,0.7274307 ,0.13867259,0.27517235
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/2.txt

new file mode 100644 (file)

index 0000000..5771603
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/2.txt
@@ -0,0 +1 @@
+0.6900174 ,0.28745306,0.30255774,0.5095008 ,0.6689176 ,0.4914624 ,0.92629427,0.504829  ,0.33514255,0.49005315,0.08569656,0.60965323,0.82193315,0.12380831,0.06971261,0.8822662 
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/3.txt

new file mode 100644 (file)

index 0000000..1e03d83
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/3.txt
@@ -0,0 +1 @@
+0.4240734 ,0.5430392 ,0.7536325 ,0.46065134,0.00315792,0.02719985,0.7080977 ,0.24389206,0.8114604 ,0.13292362,0.346597  ,0.70247084,0.55753845,0.01969242,0.82950485,0.66249627
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/4.txt

new file mode 100644 (file)

index 0000000..89ee30a
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/4.txt
@@ -0,0 +1 @@
+0.31586212,0.19079527,0.9161567 ,0.8614566 ,0.9018915 ,0.34651542,0.62554437,0.05542602,0.8268219 ,0.38112178,0.9396123 ,0.49426383,0.8034765 ,0.72456217,0.5404088 ,0.8512237 
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/0.txt

new file mode 100644 (file)

index 0000000..9b19de5
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/0.txt
@@ -0,0 +1 @@
+0.12934422,0.01033248,0.85648465,0.77248603,0.5128501 ,0.2453174 ,0.05065866,0.6601359 ,0.984665  ,0.57697976,0.58360994,0.79360527,0.90097004,0.26150337,0.1575109 ,0.9711614 
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/1.txt

new file mode 100644 (file)

index 0000000..4524779
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/1.txt
@@ -0,0 +1 @@
+0.23895125,0.30275205,0.9916519 ,0.52355504,0.2577219 ,0.03600567,0.75446343,0.8064663 ,0.07550113,0.919774  ,0.84333146,0.48820078,0.31365713,0.97172034,0.7472666 ,0.66353893
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/2.txt

new file mode 100644 (file)

index 0000000..851e72c
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/2.txt
@@ -0,0 +1 @@
+0.6186688 ,0.4357826 ,0.63239735,0.64489084,0.17722449,0.7146202 ,0.5182415 ,0.45549247,0.21316396,0.9769707 ,0.18412311,0.05855984,0.6755795 ,0.8516815 ,0.20649713,0.32990783
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/3.txt

new file mode 100644 (file)

index 0000000..7ff3c75
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/3.txt
@@ -0,0 +1 @@
+0.15501449,0.67026544,0.2957976 ,0.95577955,0.6215903 ,0.2029572 ,0.6069057 ,0.60434276,0.01298514,0.66787016,0.02053251,0.34120578,0.63562113,0.9166186 ,0.7134427 ,0.95491254
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/4.txt

new file mode 100644 (file)

index 0000000..fe60dbd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/4.txt
@@ -0,0 +1 @@
+0.46877268,0.36748132,0.09441566,0.4476946 ,0.08834982,0.5387882 ,0.8359256 ,0.4374628 ,0.3835091 ,0.3577151 ,0.49470654,0.6017202 ,0.3546875 ,0.64218026,0.69008195,0.37631917
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/0.txt

new file mode 100644 (file)

index 0000000..fb728bb
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/0.txt
@@ -0,0 +1 @@
+0.5177879 ,0.10991199,0.19134527,0.25834408,0.16297385,0.5499753 ,0.8782323 ,0.74750453,0.16825114,0.72425395,0.68458   ,0.9399099 ,0.81214494,0.73325175,0.6407931 ,0.02865177,0.04341139,0.44781777,0.59848577,0.72099334,0.654926  ,0.93810713,0.5193446 ,0.8657371 ,0.50826824,0.10122011,0.6946167 ,0.5009533 ,0.27305812,0.7708204 ,0.14410722,0.7092205 
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/1.txt

new file mode 100644 (file)

index 0000000..8c72dc7
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/1.txt
@@ -0,0 +1 @@
+0.57410187,0.5534829 ,0.434663  ,0.55580896,0.9040647 ,0.16827786,0.82538676,0.25387943,0.7611494 ,0.49195638,0.00602222,0.20389748,0.541152  ,0.962896  ,0.37785006,0.9330408 ,0.9868882 ,0.57428783,0.830525  ,0.67987496,0.5576374 ,0.4303    ,0.8442439 ,0.21868347,0.45653513,0.7913927 ,0.31475154,0.6723579 ,0.5749264 ,0.07061622,0.6450232 ,0.52825755
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/2.txt

new file mode 100644 (file)

index 0000000..04ff6ae
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/2.txt
@@ -0,0 +1 @@
+0.49751657,0.3004485 ,0.11624487,0.17704253,0.9022095 ,0.24667789,0.9204152 ,0.09801941,0.9194739 ,0.35418576,0.36659864,0.4962548 ,0.83799136,0.58057517,0.2948883 ,0.28411615,0.14429809,0.8460358 ,0.7026028 ,0.25956342,0.5251088 ,0.06569998,0.01754393,0.45209908,0.95638806,0.6044543 ,0.17229715,0.6828144 ,0.8684328 ,0.5829665 ,0.1456113 ,0.3334334 
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/3.txt

new file mode 100644 (file)

index 0000000..1342dac
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/3.txt
@@ -0,0 +1 @@
+0.00850414,0.5746211 ,0.7659193 ,0.8643168 ,0.36803156,0.08386383,0.76002747,0.19255683,0.05220222,0.18169314,0.88597506,0.6793377 ,0.45955214,0.16984127,0.5275391 ,0.910098  ,0.64607793,0.3997594 ,0.38601097,0.40899974,0.10289235,0.896202  ,0.22364503,0.30232555,0.11873382,0.07853477,0.20674925,0.35148785,0.02880615,0.09937044,0.4382221 ,0.53562754
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/4.txt

new file mode 100644 (file)

index 0000000..e3e8539
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/4.txt
@@ -0,0 +1 @@
+0.8097857 ,0.4602844 ,0.01609277,0.7885611 ,0.9090256 ,0.75475484,0.98657864,0.5927874 ,0.73494065,0.374227  ,0.23557834,0.6020654 ,0.0122237 ,0.37126908,0.38277507,0.67635936,0.4139088 ,0.8625733 ,0.37775922,0.15304309,0.6196326 ,0.4827059 ,0.76868814,0.5530773 ,0.3336473 ,0.11217184,0.5877591 ,0.5325879 ,0.48493427,0.6317438 ,0.9385114 ,0.02825027
diff --git a/compiler/record-minmax/src/RecordMinMax.cpp b/compiler/record-minmax/src/RecordMinMax.cpp

index 17c6aa6..0ef7ccc 100644 (file)
--- a/compiler/record-minmax/src/RecordMinMax.cpp
+++ b/compiler/record-minmax/src/RecordMinMax.cpp
@@ -16,12 +16,12 @@
  
  #include "RecordMinMax.h"
  #include "RecordFunction.h"
-#include "CircleExpContract.h"
  #include "MinMaxObserver.h"
  #include "HDF5Importer.h"
  
  #include <luci/Importer.h>
  #include <luci/CircleExporter.h>
+#include <luci/CircleFileExpContract.h>
  #include <luci/IR/CircleQuantParam.h>
  
  #include <algorithm>
@@ -83,6 +83,15 @@ void RecordMinMax::initialize(const std::string &input_model_path)
    }
    std::vector<char> model_data((std::istreambuf_iterator<char>(fs)),
                                 std::istreambuf_iterator<char>());
+
+  // Verify flatbuffers
+  flatbuffers::Verifier verifier{reinterpret_cast<const uint8_t *>(model_data.data()),
+                                 model_data.size()};
+  if (!circle::VerifyModelBuffer(verifier))
+  {
+    throw std::runtime_error("ERROR: Failed to verify circle '" + input_model_path + "'");
+  }
+
    _module = luci::Importer().importModule(circle::GetModel(model_data.data()));
  
    if (_module == nullptr)
@@ -185,7 +194,8 @@ void RecordMinMax::saveModel(const std::string &output_model_path)
  {
    // Export to output Circle file
    luci::CircleExporter exporter;
-  CircleExpContract contract(_module.get(), output_model_path);
+
+  luci::CircleFileExpContract contract(_module.get(), output_model_path);
  
    if (!exporter.invoke(&contract))
    {
diff --git a/compiler/souschef/CMakeLists.txt b/compiler/souschef/CMakeLists.txt

index 5a307be..ca7eddc 100644 (file)
--- a/compiler/souschef/CMakeLists.txt
+++ b/compiler/souschef/CMakeLists.txt
@@ -1,5 +1,13 @@
+nnas_find_package(Protobuf QUIET)
+
+if(NOT Protobuf_FOUND)
+  message(STATUS "Build souschef: FAILED (missing Protobuf")
+  return()
+endif(NOT Protobuf_FOUND)
+
  file(GLOB_RECURSE SOURCES "src/*.cpp")
  
  add_library(souschef STATIC ${SOURCES})
  set_target_properties(souschef PROPERTIES POSITION_INDEPENDENT_CODE ON)
  target_include_directories(souschef PUBLIC include)
+target_link_libraries(souschef PUBLIC libprotobuf)
diff --git a/compiler/souschef/include/souschef/Dataset.h b/compiler/souschef/include/souschef/Dataset.h

index 46a12e4..ef67a73 100644 (file)
--- a/compiler/souschef/include/souschef/Dataset.h
+++ b/compiler/souschef/include/souschef/Dataset.h
@@ -19,6 +19,8 @@
  
  #include <vector>
  
+#include <google/protobuf/repeated_field.h>
+
  namespace souschef
  {
  
@@ -57,6 +59,21 @@ private:
    std::vector<T> _vec;
  };
  
+template <typename T> std::vector<T> as_vector(const ::google::protobuf::RepeatedPtrField<T> &field)
+{
+  std::vector<T> res;
+  for (const auto &elem : field)
+  {
+    res.emplace_back(elem);
+  }
+  return res;
+}
+
+template <typename T> Dataset<T> as_dataset(const ::google::protobuf::RepeatedPtrField<T> &field)
+{
+  return Dataset<T>(as_vector<T>(field));
+}
+
  } // namespace souschef
  
  #endif // __SOUSCHEF_DATASET_H__
diff --git a/compiler/souschef/include/souschef/Dims.h b/compiler/souschef/include/souschef/Dims.h

new file mode 100644 (file)

index 0000000..52c64dd
--- /dev/null
+++ b/compiler/souschef/include/souschef/Dims.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SOUSCHEF_DIMS_H__
+#define __SOUSCHEF_DIMS_H__
+
+#include <functional>
+#include <numeric>
+#include <vector>
+
+namespace souschef
+{
+
+template <typename T> using Dims = std::vector<T>;
+
+template <typename SHAPETYPE> Dims<int32_t> as_dims(const SHAPETYPE &shape)
+{
+  std::vector<int32_t> res;
+
+  for (auto &dim : shape.dim())
+  {
+    res.emplace_back(static_cast<int32_t>(dim));
+  }
+
+  return res;
+}
+
+int32_t element_count(const Dims<int32_t> &dims)
+{
+  return std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<int32_t>());
+}
+
+} // namespace souschef
+
+#endif // __SOUSCHEF_DIMS_H__
diff --git a/compiler/souschef/include/souschef/TensorFiller.h b/compiler/souschef/include/souschef/TensorFiller.h

new file mode 100644 (file)

index 0000000..1d87f13
--- /dev/null
+++ b/compiler/souschef/include/souschef/TensorFiller.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SOUSCHEF_TENSOR_FILLER_H__
+#define __SOUSCHEF_TENSOR_FILLER_H__
+
+#include <map>
+#include <vector>
+
+namespace souschef
+{
+
+class TensorFiller
+{
+public:
+  virtual ~TensorFiller() = default;
+
+  /**
+   * @brief This will record the tensor by index, if it needs filler option,
+   *        such as kernel, bias.
+   */
+  void set_tensor_filler(uint32_t tensor_index) { _tensor_filler[tensor_index] = true; }
+
+  /**
+   * @brief This will store int32 filler values such as reshape information for the tensor
+   */
+  void set_tensor_filler(uint32_t tensor_index, std::vector<int32_t> &expvalues)
+  {
+    _tensor_filler_vint32[tensor_index] = expvalues;
+  }
+
+  void set_tensor_filler(uint32_t tensor_index, std::vector<float> &expvalues)
+  {
+    _tensor_filler_vfloat[tensor_index] = expvalues;
+  }
+
+  /**
+   * @brief This will return true if the tensor by index, needs a filler option.
+   */
+  bool get_tensor_filler(uint32_t tensor_index)
+  {
+    auto it = _tensor_filler.find(tensor_index);
+    if (it != _tensor_filler.end())
+    {
+      return it->second;
+    }
+    return false;
+  }
+
+  /**
+   * @brief This will return true if the tensor by index, needs a int array filler option.
+   */
+  bool get_tensor_filler(uint32_t tensor_index, std::vector<int32_t> &expvalues)
+  {
+    auto it = _tensor_filler_vint32.find(tensor_index);
+    if (it != _tensor_filler_vint32.end())
+    {
+      expvalues = it->second;
+      return true;
+    }
+    return false;
+  }
+
+  bool get_tensor_filler(uint32_t tensor_index, std::vector<float> &expvalues)
+  {
+    auto it = _tensor_filler_vfloat.find(tensor_index);
+    if (it != _tensor_filler_vfloat.end())
+    {
+      expvalues = it->second;
+      return true;
+    }
+    return false;
+  }
+
+private:
+  std::map<uint32_t, bool> _tensor_filler{};
+  std::map<uint32_t, std::vector<int32_t>> _tensor_filler_vint32{};
+  std::map<uint32_t, std::vector<float>> _tensor_filler_vfloat{};
+};
+
+} // namespace souschef
+
+#endif // __SOUSCHEF_TENSOR_FILLER_H__
diff --git a/compiler/luci-value-test/tester/src/CircleExpContract.cpp b/compiler/souschef/src/Dims.cpp

similarity index 63%

rename from compiler/luci-value-test/tester/src/CircleExpContract.cpp

rename to compiler/souschef/src/Dims.cpp

index b56b7ee..fba4813 100644 (file)
--- a/compiler/luci-value-test/tester/src/CircleExpContract.cpp
+++ b/compiler/souschef/src/Dims.cpp
@@ -14,20 +14,6 @@
   * limitations under the License.
   */
  
-#include "CircleExpContract.h"
+#include "souschef/Dims.h"
  
-#include <oops/InternalExn.h>
-
-#include <fstream>
-#include <iostream>
-
-bool CircleExpContract::store(const char *ptr, const size_t size) const
-{
-  if (!ptr)
-    INTERNAL_EXN("Graph was not serialized by FlatBuffer for some reason");
-
-  std::ofstream fs(_filepath.c_str(), std::ofstream::binary);
-  fs.write(ptr, size);
-
-  return fs.good();
-}
+// NOTE Do NOT delete this file; this file checks the completeness of 'Dims.h'
diff --git a/compiler/tflchef/core/src/ModelChef.cpp b/compiler/tflchef/core/src/ModelChef.cpp

index 692ce48..a4b435d 100644 (file)
--- a/compiler/tflchef/core/src/ModelChef.cpp
+++ b/compiler/tflchef/core/src/ModelChef.cpp
@@ -26,6 +26,7 @@
  #include "OpChefs.h"
  
  #include <souschef/Dataset.h>
+#include <souschef/Dims.h>
  
  #include "Log.h"
  
@@ -41,52 +42,8 @@
  #include <sstream>
  #include <stdexcept>
  
-namespace
-{
-
  using namespace souschef;
  
-template <typename T> std::vector<T> as_vector(const ::google::protobuf::RepeatedPtrField<T> &field)
-{
-  std::vector<T> res;
-  for (const auto &elem : field)
-  {
-    res.emplace_back(elem);
-  }
-  return res;
-}
-
-template <typename T> Dataset<T> as_dataset(const ::google::protobuf::RepeatedPtrField<T> &field)
-{
-  return Dataset<T>(as_vector<T>(field));
-}
-
-} // namespace
-
-namespace
-{
-
-template <typename T> using Dims = std::vector<T>;
-
-Dims<int32_t> as_dims(const tflchef::TensorShape &shape)
-{
-  std::vector<int32_t> res;
-
-  for (auto &dim : shape.dim())
-  {
-    res.emplace_back(static_cast<int32_t>(dim));
-  }
-
-  return res;
-}
-
-int32_t element_count(const Dims<int32_t> &dims)
-{
-  return std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<int32_t>());
-}
-
-} // namespace
-
  namespace
  {
  
diff --git a/compiler/record-minmax/src/CircleExpContract.cpp b/compiler/tflchef/core/src/Op/NonMaxSuppressionV5.cpp

similarity index 60%

rename from compiler/record-minmax/src/CircleExpContract.cpp

rename to compiler/tflchef/core/src/Op/NonMaxSuppressionV5.cpp

index b703250..500aa46 100644 (file)
--- a/compiler/record-minmax/src/CircleExpContract.cpp
+++ b/compiler/tflchef/core/src/Op/NonMaxSuppressionV5.cpp
@@ -14,25 +14,17 @@
   * limitations under the License.
   */
  
-#include "CircleExpContract.h"
+#include "NonMaxSuppressionV5.h"
  
-#include <oops/InternalExn.h>
-
-#include <fstream>
-#include <iostream>
-
-namespace record_minmax
-{
-
-bool CircleExpContract::store(const char *ptr, const size_t size) const
+flatbuffers::Offset<void> NonMaxSuppressionV5Chef::value(flatbuffers::FlatBufferBuilder &fbb) const
  {
-  if (!ptr)
-    INTERNAL_EXN("Graph was not serialized by FlatBuffer for some reason");
+  tflite::NonMaxSuppressionV5OptionsBuilder options_builder{fbb};
  
-  std::ofstream fs(_filepath, std::ofstream::binary);
-  fs.write(ptr, size);
-
-  return fs.good();
+  return options_builder.Finish().Union();
  }
  
-} // namespace record_minmax
+std::unique_ptr<OpChef>
+NonMaxSuppressionV5ChefFactory::create(const tflchef::Operation *operation) const
+{
+  return std::unique_ptr<OpChef>{new NonMaxSuppressionV5Chef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/NonMaxSuppressionV5.h b/compiler/tflchef/core/src/Op/NonMaxSuppressionV5.h

new file mode 100644 (file)

index 0000000..a3c8b60
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/NonMaxSuppressionV5.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_NON_MAX_SUPPRESSION_V5_H__
+#define __OP_NON_MAX_SUPPRESSION_V5_H__
+
+#include "OpChef.h"
+
+class NonMaxSuppressionV5Chef final : public OpChef
+{
+public:
+  explicit NonMaxSuppressionV5Chef(const tflchef::Operation *operation) : _operation{operation}
+  {
+    // DO NOTHING
+  }
+
+public:
+  tflite::BuiltinOperator code(void) const override
+  {
+    return tflite::BuiltinOperator_NON_MAX_SUPPRESSION_V5;
+  }
+
+  tflite::BuiltinOptions type(void) const override
+  {
+    return tflite::BuiltinOptions_NonMaxSuppressionV5Options;
+  }
+
+  flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+  const tflchef::Operation *_operation;
+};
+
+struct NonMaxSuppressionV5ChefFactory final : public OpChefFactory
+{
+  std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_NON_MAX_SUPPRESSION_V5_H__
diff --git a/compiler/tflchef/core/src/OpChef.def b/compiler/tflchef/core/src/OpChef.def

index 2441862..6b242e8 100644 (file)
--- a/compiler/tflchef/core/src/OpChef.def
+++ b/compiler/tflchef/core/src/OpChef.def
@@ -56,6 +56,7 @@ OP_CHEF(MirrorPad, MirrorPadChefFactory)
  OP_CHEF(Mul, MulChefFactory)
  OP_CHEF(Neg, NegChefFactory)
  OP_CHEF(NonMaxSuppressionV4, NonMaxSuppressionV4ChefFactory)
+OP_CHEF(NonMaxSuppressionV5, NonMaxSuppressionV5ChefFactory)
  OP_CHEF(NotEqual, NotEqualChefFactory)
  OP_CHEF(OneHot, OneHotChefFactory)
  OP_CHEF(Pack, PackChefFactory)
diff --git a/compiler/tflchef/core/src/OpChefs.h b/compiler/tflchef/core/src/OpChefs.h

index 5b2e89b..7637b1c 100644 (file)
--- a/compiler/tflchef/core/src/OpChefs.h
+++ b/compiler/tflchef/core/src/OpChefs.h
@@ -69,6 +69,7 @@
  #include "Op/Mul.h"
  #include "Op/Neg.h"
  #include "Op/NonMaxSuppressionV4.h"
+#include "Op/NonMaxSuppressionV5.h"
  #include "Op/NotEqual.h"
  #include "Op/OneHot.h"
  #include "Op/Pack.h"
diff --git a/compiler/tflchef/proto/tflchef.proto b/compiler/tflchef/proto/tflchef.proto

index 70b966e..9909d51 100644 (file)
--- a/compiler/tflchef/proto/tflchef.proto
+++ b/compiler/tflchef/proto/tflchef.proto
@@ -371,6 +371,10 @@ message NonMaxSuppressionV4Options {
    // None
  }
  
+message NonMaxSuppressionV5Options {
+  // None
+}
+
  message NotEqualOptions {
    // None
  }
@@ -544,7 +548,7 @@ message Operation {
    // HardSwishOptions 196
    optional DepthToSpaceOptions depth_to_space_options = 197;
    optional NonMaxSuppressionV4Options non_max_suppression_v4_options = 198;
-  // NonMaxSuppressionV5Options 199
+  optional NonMaxSuppressionV5Options non_max_suppression_v5_options = 199;
    optional ScatterNdOptions scatter_nd_options = 200;
    optional NotEqualOptions notequal_options = 201;
    optional ExpandDimsOptions expand_dims_options = 202;
diff --git a/compiler/tflchef/tflite/CMakeLists.txt b/compiler/tflchef/tflite/CMakeLists.txt

index 645c161..83127cb 100644 (file)
--- a/compiler/tflchef/tflite/CMakeLists.txt
+++ b/compiler/tflchef/tflite/CMakeLists.txt
@@ -7,3 +7,4 @@ target_link_libraries(tflchef_tflite tflchef_proto)
  target_link_libraries(tflchef_tflite mio_tflite)
  target_link_libraries(tflchef_tflite stdex)
  target_link_libraries(tflchef_tflite cwrap)
+target_link_libraries(tflchef_tflite souschef)
diff --git a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp

new file mode 100644 (file)

index 0000000..db7f4c9
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NonMaxSuppressionV5.h"
+
+#include "Convert.h"
+#include "FillerHelper.h"
+
+namespace tflchef
+{
+
+void TFliteOpNonMaxSuppressionV5::filler(const tflite::Operator *op, TFliteImport *import,
+                                         tflchef::ModelRecipe *model_recipe) const
+{
+  const auto &inputs = *op->inputs();
+
+  const tflite::Tensor *max_output_size_tensor = import->tensors()->Get(inputs[2]);
+  assert(max_output_size_tensor->type() == tflite::TensorType::TensorType_INT32);
+
+  const tflite::Tensor *iou_threshold_tensor = import->tensors()->Get(inputs[3]);
+  assert(iou_threshold_tensor->type() == tflite::TensorType::TensorType_FLOAT32);
+
+  const tflite::Tensor *score_threshold_tensor = import->tensors()->Get(inputs[4]);
+  assert(score_threshold_tensor->type() == tflite::TensorType::TensorType_FLOAT32);
+
+  const tflite::Tensor *soft_nms_sigma_tensor = import->tensors()->Get(inputs[5]);
+  assert(soft_nms_sigma_tensor->type() == tflite::TensorType::TensorType_FLOAT32);
+
+  for (int32_t index = 2; index < 6; ++index)
+  {
+    fill_tensor_to_import(index, import);
+  }
+}
+
+tflchef::Operation *TFliteOpNonMaxSuppressionV5::build(const tflite::Operator *op,
+                                                       TFliteImport *import,
+                                                       tflchef::ModelRecipe *model_recipe) const
+{
+  auto operation = model_recipe->add_operation();
+
+  operation->set_type("NonMaxSuppressionV5");
+
+  return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.h b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.h

new file mode 100644 (file)

index 0000000..c948043
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_NON_MAX_SUPPRESSION_V5_H__
+#define __TFLITE_OP_NON_MAX_SUPPRESSION_V5_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for NON_MAX_SUPPRESSION_V5
+ */
+class TFliteOpNonMaxSuppressionV5 : public TFliteOpChef
+{
+public:
+  void filler(const tflite::Operator *op, TFliteImport *import,
+              tflchef::ModelRecipe *model_recipe) const override;
+  tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+                            tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_NON_MAX_SUPPRESSION_V5_H__
diff --git a/compiler/tflchef/tflite/src/TFliteImport.h b/compiler/tflchef/tflite/src/TFliteImport.h

index 5b46f45..9d0a642 100644 (file)
--- a/compiler/tflchef/tflite/src/TFliteImport.h
+++ b/compiler/tflchef/tflite/src/TFliteImport.h
@@ -19,6 +19,8 @@
  
  #include <mio/tflite/schema_generated.h>
  
+#include <souschef/TensorFiller.h>
+
  #include <tflchef.pb.h>
  
  #include <map>
@@ -40,7 +42,7 @@ bool is_custom(const tflite::OperatorCode *opcode);
  /**
   * @brief Loads TF lite file and provides helpers to access attributes
   */
-class TFliteImport
+class TFliteImport : public souschef::TensorFiller
  {
  public:
    TFliteImport(const tflite::Model *model);
@@ -63,63 +65,6 @@ public:
    std::string opcode_name(const tflite::Operator *op) const;
    size_t buffer_info(const tflite::Tensor *tensor, const uint8_t **buff_data);
  
-  /**
-   * @brief This will record the tensor by index, if it needs filler option,
-   *        such as kernel, bias.
-   */
-  void set_tensor_filler(uint32_t tensor_index) { _tensor_filler[tensor_index] = true; }
-
-  /**
-   * @brief This will store int32 filler values such as reshape information for the tensor
-   */
-  void set_tensor_filler(uint32_t tensor_index, std::vector<int32_t> &expvalues)
-  {
-    _tensor_filler_vint32[tensor_index] = expvalues;
-  }
-
-  void set_tensor_filler(uint32_t tensor_index, std::vector<float> &expvalues)
-  {
-    _tensor_filler_vfloat[tensor_index] = expvalues;
-  }
-
-  /**
-   * @brief This will return true if the tensor by index, needs a filler option.
-   */
-  bool get_tensor_filler(uint32_t tensor_index)
-  {
-    auto it = _tensor_filler.find(tensor_index);
-    if (it != _tensor_filler.end())
-    {
-      return it->second;
-    }
-    return false;
-  }
-
-  /**
-   * @brief This will return true if the tensor by index, needs a int array filler option.
-   */
-  bool get_tensor_filler(uint32_t tensor_index, std::vector<int32_t> &expvalues)
-  {
-    auto it = _tensor_filler_vint32.find(tensor_index);
-    if (it != _tensor_filler_vint32.end())
-    {
-      expvalues = it->second;
-      return true;
-    }
-    return false;
-  }
-
-  bool get_tensor_filler(uint32_t tensor_index, std::vector<float> &expvalues)
-  {
-    auto it = _tensor_filler_vfloat.find(tensor_index);
-    if (it != _tensor_filler_vfloat.end())
-    {
-      expvalues = it->second;
-      return true;
-    }
-    return false;
-  }
-
  private:
    const TFliteSubGraphs_t *_subgraphs{nullptr};
    const TFliteBuffers_t *_buffers{nullptr};
@@ -129,10 +74,6 @@ private:
    std::vector<const tflite::OperatorCode *> _op_codes{};
    std::vector<int32_t> _inputs{};
    std::vector<int32_t> _outputs{};
-
-  std::map<uint32_t, bool> _tensor_filler{};
-  std::map<uint32_t, std::vector<int32_t>> _tensor_filler_vint32{};
-  std::map<uint32_t, std::vector<float>> _tensor_filler_vfloat{};
  };
  
  } // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/TFliteOpChefs.h b/compiler/tflchef/tflite/src/TFliteOpChefs.h

index de14e37..36a0109 100644 (file)
--- a/compiler/tflchef/tflite/src/TFliteOpChefs.h
+++ b/compiler/tflchef/tflite/src/TFliteOpChefs.h
@@ -69,6 +69,7 @@
  #include "Op/Mul.h"
  #include "Op/Neg.h"
  #include "Op/NonMaxSuppressionV4.h"
+#include "Op/NonMaxSuppressionV5.h"
  #include "Op/NotEqual.h"
  #include "Op/OneHot.h"
  #include "Op/Pack.h"
diff --git a/compiler/tflchef/tflite/src/TFliteOpRegistry.h b/compiler/tflchef/tflite/src/TFliteOpRegistry.h

index 8d33007..a454e98 100644 (file)
--- a/compiler/tflchef/tflite/src/TFliteOpRegistry.h
+++ b/compiler/tflchef/tflite/src/TFliteOpRegistry.h
@@ -106,6 +106,7 @@ private:
      REG_TFL_OP(MUL, TFliteOpMul);
      REG_TFL_OP(NEG, TFliteOpNeg);
      REG_TFL_OP(NON_MAX_SUPPRESSION_V4, TFliteOpNonMaxSuppressionV4);
+    REG_TFL_OP(NON_MAX_SUPPRESSION_V5, TFliteOpNonMaxSuppressionV5);
      REG_TFL_OP(NOT_EQUAL, TFliteOpNotEqual);
      REG_TFL_OP(ONE_HOT, TFliteOpOneHot);
      REG_TFL_OP(PACK, TFliteOpPack);
diff --git a/compiler/tfldump/src/OpPrinter.cpp b/compiler/tfldump/src/OpPrinter.cpp

index df027c3..24b9264 100644 (file)
--- a/compiler/tfldump/src/OpPrinter.cpp
+++ b/compiler/tfldump/src/OpPrinter.cpp
@@ -677,9 +677,11 @@ OpPrinterRegistry::OpPrinterRegistry()
    _op_map[tflite::BuiltinOperator_MIRROR_PAD] = make_unique<MirrorPadPrinter>();
    _op_map[tflite::BuiltinOperator_MUL] = make_unique<MulPrinter>();
    // There is no Option for NON_MAX_SUPPRESSION_V4
+  // There is no Option for NON_MAX_SUPPRESSION_V5
    _op_map[tflite::BuiltinOperator_ONE_HOT] = make_unique<OneHotPrinter>();
    _op_map[tflite::BuiltinOperator_PACK] = make_unique<PackPrinter>();
    // There is no Option for PAD
+  // There is no Option for PADV2
    // There is no Option for PRELU
    // There is no Option for RELU
    // There is no Option for RELU6
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions.h

index 00b3de9..6801186 100644 (file)
--- a/compiler/tflite2circle/src/BuildBuiltinOptions.h
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions.h
@@ -63,6 +63,7 @@
  #include "BuildBuiltinOptions/MulOptions.h"
  #include "BuildBuiltinOptions/NegOptions.h"
  #include "BuildBuiltinOptions/NonMaxSuppressionV4Options.h"
+#include "BuildBuiltinOptions/NonMaxSuppressionV5Options.h"
  #include "BuildBuiltinOptions/NotEqualOptions.h"
  #include "BuildBuiltinOptions/OneHotOptions.h"
  #include "BuildBuiltinOptions/PackOptions.h"
diff --git a/compiler/circle2circle/src/CircleExpContract.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/NonMaxSuppressionV5Options.cpp

similarity index 61%

rename from compiler/circle2circle/src/CircleExpContract.cpp

rename to compiler/tflite2circle/src/BuildBuiltinOptions/NonMaxSuppressionV5Options.cpp

index b56b7ee..637c544 100644 (file)
--- a/compiler/circle2circle/src/CircleExpContract.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/NonMaxSuppressionV5Options.cpp
@@ -14,20 +14,17 @@
   * limitations under the License.
   */
  
-#include "CircleExpContract.h"
+#include "NonMaxSuppressionV5Options.h"
  
-#include <oops/InternalExn.h>
-
-#include <fstream>
-#include <iostream>
-
-bool CircleExpContract::store(const char *ptr, const size_t size) const
+namespace tflite2circle
  {
-  if (!ptr)
-    INTERNAL_EXN("Graph was not serialized by FlatBuffer for some reason");
-
-  std::ofstream fs(_filepath.c_str(), std::ofstream::binary);
-  fs.write(ptr, size);
  
-  return fs.good();
+flatbuffers::Offset<circle::NonMaxSuppressionV5Options>
+build_circle_NonMaxSuppressionV5Options(flatbuffers::FlatBufferBuilder &fb,
+                                        const tflite::Operator *)
+{
+  circle::NonMaxSuppressionV5OptionsBuilder builtin_options_builder{fb};
+  return builtin_options_builder.Finish();
  }
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/NonMaxSuppressionV5Options.h b/compiler/tflite2circle/src/BuildBuiltinOptions/NonMaxSuppressionV5Options.h

new file mode 100644 (file)

index 0000000..faf989a
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/NonMaxSuppressionV5Options.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_NON_MAX_SUPPRESSION_V5_OPTIONS_H__
+#define __BBO_NON_MAX_SUPPRESSION_V5_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::NonMaxSuppressionV5Options>
+build_circle_NonMaxSuppressionV5Options(flatbuffers::FlatBufferBuilder &fb,
+                                        const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_NON_MAX_SUPPRESSION_V5_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/CircleModel.cpp b/compiler/tflite2circle/src/CircleModel.cpp

index cb4437a..14c44cb 100644 (file)
--- a/compiler/tflite2circle/src/CircleModel.cpp
+++ b/compiler/tflite2circle/src/CircleModel.cpp
@@ -119,6 +119,75 @@ Offset<SubGraphLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_fla
        // is_variable
        bool is_variable = it->is_variable();
  
+      flatbuffers::Offset<circle::SparsityParameters> sparsity;
+      // sparsity
+      if (it->sparsity())
+      {
+        flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order;
+        flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map;
+        flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>>>
+            dim_metadata;
+
+        // traversal_order
+        if (it->sparsity()->traversal_order())
+        {
+          auto traversal_order_vec = std::vector<int32_t>{
+              it->sparsity()->traversal_order()->begin(), it->sparsity()->traversal_order()->end()};
+          traversal_order = fb->CreateVector(traversal_order_vec);
+        }
+
+        // block_map
+        if (it->sparsity()->block_map())
+        {
+          auto block_map_vec = std::vector<int32_t>{it->sparsity()->block_map()->begin(),
+                                                    it->sparsity()->block_map()->end()};
+          block_map = fb->CreateVector(block_map_vec);
+        }
+
+        // dim_metadata
+        std::vector<flatbuffers::Offset<circle::DimensionMetadata>> dim_metadata_vec;
+        auto tflite_dim_metadata = it->sparsity()->dim_metadata();
+        for (auto it : *tflite_dim_metadata)
+        {
+          // array_segments
+          auto tflite_array_segments_type = it->array_segments_type();
+          auto circle_array_segments =
+              get_circle_sparse_index_vector(*fb, it, tflite_array_segments_type);
+          auto circle_array_segments_type =
+              get_circle_sparse_index_vector_type(tflite_array_segments_type);
+
+          // array_indices
+          auto tflite_array_indices_type = it->array_indices_type();
+          auto circle_array_indices =
+              get_circle_sparse_index_vector(*fb, it, tflite_array_indices_type);
+          auto circle_array_indices_type =
+              get_circle_sparse_index_vector_type(tflite_array_indices_type);
+
+          auto circle_dim_metadata_builder = circle::DimensionMetadataBuilder{*fb};
+
+          circle_dim_metadata_builder.add_format(get_circle_dimension_type(it->format()));
+          circle_dim_metadata_builder.add_dense_size(it->dense_size());
+          circle_dim_metadata_builder.add_array_segments(circle_array_segments);
+          circle_dim_metadata_builder.add_array_segments_type(circle_array_segments_type);
+          circle_dim_metadata_builder.add_array_indices(circle_array_indices);
+          circle_dim_metadata_builder.add_array_indices_type(circle_array_indices_type);
+          auto dim_metadata = circle_dim_metadata_builder.Finish();
+          dim_metadata_vec.emplace_back(dim_metadata);
+        }
+        dim_metadata = fb->CreateVector(dim_metadata_vec);
+
+        sparsity = circle::CreateSparsityParameters(*fb, traversal_order, block_map, dim_metadata);
+      }
+
+      // shape signature
+      flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape_signature;
+      if (it->shape_signature())
+      {
+        auto shape_signature_vec =
+            std::vector<int32_t>({it->shape_signature()->begin(), it->shape_signature()->end()});
+        shape_signature = fb->CreateVector(shape_signature_vec);
+      }
+
        circle::TensorBuilder tensor_builder{*fb};
        tensor_builder.add_shape(shape);
        tensor_builder.add_type(get_circle_tensortype(it->type()));
@@ -126,6 +195,8 @@ Offset<SubGraphLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_fla
        tensor_builder.add_name(name);
        tensor_builder.add_quantization(quantization);
        tensor_builder.add_is_variable(is_variable);
+      tensor_builder.add_sparsity(sparsity);
+      tensor_builder.add_shape_signature(shape_signature);
        auto tensor = tensor_builder.Finish();
        tensor_vec.emplace_back(tensor);
      }
@@ -226,6 +297,14 @@ CircleModel::CircleModel(FlatBufBuilder &fb, TFLModel &model)
      : _version{0}, _description{fb->CreateString("nnpackage")}, _fb{fb}
  {
    const tflite::Model *tfl_model = model.load_model();
+  // verify flatbuffers
+  flatbuffers::Verifier verifier{reinterpret_cast<const uint8_t *>(model._data.data()),
+                                 model._data.size()};
+  if (!tflite::VerifyModelBuffer(verifier))
+  {
+    throw std::runtime_error("ERROR: Failed to verify tflite");
+  }
+
    _operator_codes_offset =
        std::make_unique<Offset<OperatorCodeLink>>(fb, tfl_model->operator_codes());
    _subGraphs_offset = std::make_unique<Offset<SubGraphLink>>(fb, tfl_model->subgraphs());
diff --git a/compiler/tflite2circle/src/DataLookup.cpp b/compiler/tflite2circle/src/DataLookup.cpp

index b0d35d1..75504b0 100644 (file)
--- a/compiler/tflite2circle/src/DataLookup.cpp
+++ b/compiler/tflite2circle/src/DataLookup.cpp
@@ -123,4 +123,79 @@ circle::MirrorPadMode get_circle_mirrorpad_mode(tflite::MirrorPadMode tfl_mode)
    }
  }
  
+circle::DimensionType get_circle_dimension_type(tflite::DimensionType tfl_dim_type)
+{
+  switch (tfl_dim_type)
+  {
+    case tflite::DimensionType_DENSE:
+      return circle::DimensionType_DENSE;
+    case tflite::DimensionType_SPARSE_CSR:
+      return circle::DimensionType_SPARSE_CSR;
+    default:
+      throw std::runtime_error("tflite2circle: wrong dimension type.");
+  }
+}
+
+flatbuffers::Offset<void>
+get_circle_sparse_index_vector(flatbuffers::FlatBufferBuilder &fb,
+                               const tflite::DimensionMetadata *dm,
+                               const tflite::SparseIndexVector &tfl_sparse_index_vector_type)
+{
+  switch (tfl_sparse_index_vector_type)
+  {
+    case tflite::SparseIndexVector_NONE:
+      return flatbuffers::Offset<void>();
+    case tflite::SparseIndexVector_Int32Vector:
+    {
+      auto values_vec_int32 =
+          std::vector<int32_t>{dm->array_segments_as_Int32Vector()->values()->begin(),
+                               dm->array_segments_as_Int32Vector()->values()->end()};
+      auto values_int32 = fb.CreateVector(values_vec_int32);
+      circle::Int32VectorBuilder int32_vector_builder{fb};
+      int32_vector_builder.add_values(values_int32);
+      return int32_vector_builder.Finish().Union();
+    }
+    case tflite::SparseIndexVector_Uint16Vector:
+    {
+      auto values_vec_uint16 =
+          std::vector<uint16_t>{dm->array_segments_as_Uint16Vector()->values()->begin(),
+                                dm->array_segments_as_Uint16Vector()->values()->end()};
+      auto values_uint16 = fb.CreateVector(values_vec_uint16);
+      circle::Uint16VectorBuilder uint16_vector_builder{fb};
+      uint16_vector_builder.add_values(values_uint16);
+      return uint16_vector_builder.Finish().Union();
+    }
+    case tflite::SparseIndexVector_Uint8Vector:
+    {
+      auto values_vec_uint8 =
+          std::vector<uint8_t>{dm->array_segments_as_Uint8Vector()->values()->begin(),
+                               dm->array_segments_as_Uint8Vector()->values()->end()};
+      auto values_uint8 = fb.CreateVector(values_vec_uint8);
+      circle::Uint8VectorBuilder uint8_vector_builder{fb};
+      uint8_vector_builder.add_values(values_uint8);
+      return uint8_vector_builder.Finish().Union();
+    }
+    default:
+      throw std::runtime_error("tflite2circle: wrong SparseIndexVector type.");
+  }
+}
+
+circle::SparseIndexVector
+get_circle_sparse_index_vector_type(const tflite::SparseIndexVector &tfl_sparse_index_vector_type)
+{
+  switch (tfl_sparse_index_vector_type)
+  {
+    case tflite::SparseIndexVector_NONE:
+      return circle::SparseIndexVector_NONE;
+    case tflite::SparseIndexVector_Int32Vector:
+      return circle::SparseIndexVector_Int32Vector;
+    case tflite::SparseIndexVector_Uint16Vector:
+      return circle::SparseIndexVector_Uint16Vector;
+    case tflite::SparseIndexVector_Uint8Vector:
+      return circle::SparseIndexVector_Uint8Vector;
+    default:
+      throw std::runtime_error("tflite2circle: wrong SparseIndexVector type.");
+  }
+}
+
  } // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/DataLookup.h b/compiler/tflite2circle/src/DataLookup.h

index 7ea01b9..26ad746 100644 (file)
--- a/compiler/tflite2circle/src/DataLookup.h
+++ b/compiler/tflite2circle/src/DataLookup.h
@@ -76,6 +76,25 @@ circle::BuiltinOptions get_circle_builtin_options_type(const tflite::Operator *o
  */
  circle::MirrorPadMode get_circle_mirrorpad_mode(tflite::MirrorPadMode tfl_mode);
  
+/**
+ * @brief Returns circle DimensionType according to tflite.
+*/
+circle::DimensionType get_circle_dimension_type(tflite::DimensionType tfl_dim_type);
+
+/**
+ * @brief Returns circle SparseIndexVector according to tflite.
+*/
+flatbuffers::Offset<void>
+get_circle_sparse_index_vector(flatbuffers::FlatBufferBuilder &fb,
+                               const tflite::DimensionMetadata *dm,
+                               const tflite::SparseIndexVector &tfl_sparse_index_vector_type);
+
+/**
+ * @brief Returns circle SparseIndexVector type according to tflite.
+*/
+circle::SparseIndexVector
+get_circle_sparse_index_vector_type(const tflite::SparseIndexVector &tfl_sparse_index_vector_type);
+
  } // namespace tflite2circle
  
  #endif // __DATA_LOOKUP_H__
diff --git a/compiler/tflite2circle/src/TFLBuiltinOptions.lst b/compiler/tflite2circle/src/TFLBuiltinOptions.lst

index a2a1453..22b5986 100644 (file)
--- a/compiler/tflite2circle/src/TFLBuiltinOptions.lst
+++ b/compiler/tflite2circle/src/TFLBuiltinOptions.lst
@@ -101,7 +101,7 @@ TFL_BUILTIN_OPTIONS(IfOptions)
  TFL_BUILTIN_OPTIONS(WhileOptions)
  TFL_BUILTIN_OPTIONS(DepthToSpaceOptions)
  TFL_BUILTIN_OPTIONS(NonMaxSuppressionV4Options)
-//TFL_BUILTIN_OPTIONS(NonMaxSuppressionV5Options)
+TFL_BUILTIN_OPTIONS(NonMaxSuppressionV5Options)
  TFL_BUILTIN_OPTIONS(RankOptions)
  TFL_BUILTIN_OPTIONS(ScatterNdOptions)
  TFL_BUILTIN_OPTIONS(SegmentSumOptions)
diff --git a/compiler/vconone/CMakeLists.txt b/compiler/vconone/CMakeLists.txt

index b8cb793..be43989 100644 (file)
--- a/compiler/vconone/CMakeLists.txt
+++ b/compiler/vconone/CMakeLists.txt
@@ -1,5 +1,5 @@
  if (NOT VCONONE_VERSION)
-  set(VCONONE_VERSION 0x0000000000080001)
+  set(VCONONE_VERSION 0x0000000000090001)
    # NOTE order is [build patch minor major]
    # if VCONONE_VERSION is set with -D option, it will be cached
    # you may have to remove cache file if you remove -D option
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp

index a41e6db..3840110 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp
@@ -59,6 +59,8 @@ Status CLReduceOperation::validate(const ITensorInfo *input, const ITensorInfo *
    const size_t num_of_kernels = axis.size();
    const size_t num_of_interm_tensors = num_of_kernels - (keep_dims ? 1 : 0);
  
+  ARM_COMPUTE_RETURN_ERROR_ON(num_of_kernels < 1);
+
    // Create temporary tensor infos
    auto interm_tensors = support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors);
  
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp

index 09f1780..aa165cc 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp
@@ -1,4 +1,20 @@
  /*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
   * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
diff --git a/compute/cker/include/cker/Types.h b/compute/cker/include/cker/Types.h

index 886ce5e..c0c9313 100644 (file)
--- a/compute/cker/include/cker/Types.h
+++ b/compute/cker/include/cker/Types.h
@@ -106,6 +106,9 @@ struct SoftmaxParams
    int32_t reverse_scaling_divisor;
    int32_t reverse_scaling_right_shift;
    int diff_min;
+  int32_t zero_point;
+  float scale;
+  float *table;
  };
  
  struct PackParams
diff --git a/compute/cker/include/cker/eigen/eigen_convolution_helpers.h b/compute/cker/include/cker/eigen/eigen_convolution_helpers.h

index a27871e..dc3e255 100644 (file)
--- a/compute/cker/include/cker/eigen/eigen_convolution_helpers.h
+++ b/compute/cker/include/cker/eigen/eigen_convolution_helpers.h
@@ -1,17 +1,19 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
  
  #ifndef __NNFW_CKER_EIGEN_EIGEN_CONVOLUTION_HELPERS_H__
  #define __NNFW_CKER_EIGEN_EIGEN_CONVOLUTION_HELPERS_H__
diff --git a/compute/cker/include/cker/eigen/eigen_spatial_convolutions.h b/compute/cker/include/cker/eigen/eigen_spatial_convolutions.h

index 5af2e48..c6f1e2e 100644 (file)
--- a/compute/cker/include/cker/eigen/eigen_spatial_convolutions.h
+++ b/compute/cker/include/cker/eigen/eigen_spatial_convolutions.h
@@ -1,17 +1,19 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
  
  #ifndef __NNFW_CKER_EGIEN_EIGEN_SPATIAL_CONVOLUTIONS_H__
  #define __NNFW_CKER_EGIEN_EIGEN_SPATIAL_CONVOLUTIONS_H__
diff --git a/compute/cker/include/cker/neon/neon_check.h b/compute/cker/include/cker/neon/neon_check.h

index 119d827..116f01b 100644 (file)
--- a/compute/cker/include/cker/neon/neon_check.h
+++ b/compute/cker/include/cker/neon/neon_check.h
@@ -1,17 +1,20 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
  
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
  #ifndef __NNFW_CKER_NEON_CHECK_H__
  #define __NNFW_CKER_NEON_CHECK_H__
  
diff --git a/compute/cker/include/cker/operation/AveragePool.h b/compute/cker/include/cker/operation/AveragePool.h

index de43ba3..6149caf 100644 (file)
--- a/compute/cker/include/cker/operation/AveragePool.h
+++ b/compute/cker/include/cker/operation/AveragePool.h
@@ -32,7 +32,16 @@ namespace cker
  {
  
  // TODO Change to apply neon for this function if it is faster
-inline void AveragePool(const PoolParams &params, const Shape &input_shape, const float *input_data,
+template <typename T>
+void AveragePool(const PoolParams &, const Shape &, const T *, const Shape &, T *)
+{
+  static_assert(std::is_integral<T>::value || std::is_floating_point<T>::value,
+                "cker::MaxPool : This function supports only integer or floating point");
+  throw std::runtime_error("cker::AveragePool : Unsupported data type");
+}
+
+template <>
+void AveragePool<float>(const PoolParams &params, const Shape &input_shape, const float *input_data,
                          const Shape &output_shape, float *output_data)
  {
    assert(input_shape.DimensionsCount() == 4);
@@ -371,8 +380,10 @@ inline void AveragePool32(const PoolParams &params, const Shape &input_shape,
    }
  }
  
-inline void AveragePool(const PoolParams &params, const Shape &input_shape,
-                        const uint8_t *input_data, const Shape &output_shape, uint8_t *output_data)
+template <>
+void AveragePool<uint8_t>(const PoolParams &params, const Shape &input_shape,
+                          const uint8_t *input_data, const Shape &output_shape,
+                          uint8_t *output_data)
  {
    if (params.filter_height * params.filter_width > 16 * 16)
    {
diff --git a/compute/cker/include/cker/operation/Conv.h b/compute/cker/include/cker/operation/Conv.h

index 1bf191b..214f2e6 100644 (file)
--- a/compute/cker/include/cker/operation/Conv.h
+++ b/compute/cker/include/cker/operation/Conv.h
@@ -23,6 +23,7 @@
  #include "cker/Utils.h"
  #include "cker/operation/reference/Conv.h"
  #include "cker/operation/optimized/Conv.h"
+#include <iostream>
  #include <vector>
  
  namespace nnfw
@@ -54,18 +55,15 @@ inline void TransposeFloatTensor(const float *input_data, const nnfw::cker::Shap
  class Conv
  {
  public:
-  Conv()
-      : _modified_filter_data(), _im2col_data(), _im2col_shape(4), _need_im2col(false),
-        _prepared(false)
-  {
-  }
+  Conv() : _modified_filter_data(), _im2col_shape(4), _need_im2col(false), _prepared(false) {}
  
    void prepare(const Shape &filter_shape, const float *filter_data, PaddingType padding_type,
-               bool &is_replaced_weights)
+               bool &is_replaced_weights, uint32_t dilationWidthFactor,
+               uint32_t dilationHeightFactor)
    {
      if (!_prepared)
      {
-      if (usableMultiThreaded(padding_type))
+      if (usableMultiThreaded(padding_type, dilationWidthFactor, dilationHeightFactor))
        {
          transposeFilter(filter_shape, filter_data, is_replaced_weights);
        }
@@ -87,7 +85,8 @@ public:
                    const Shape &filter_shape, const float *filter_data, const Shape &bias_shape,
                    const float *bias_data, const Shape &output_shape, float *output_data)
    {
-    if (usableMultiThreaded(params.padding_type))
+    if (usableMultiThreaded(params.padding_type, params.dilation_width_factor,
+                            params.dilation_height_factor))
      {
        bool transposed_in_execution = false;
        if (!_prepared)
@@ -119,15 +118,29 @@ public:
                         params.stride_height);
      }
  
-    uint8_t *im2col_raw_data = _im2col_data.data();
-    optimized::Conv(params, input_shape, input_data, filter_shape, filter_data, bias_shape,
-                    bias_data, output_shape, output_data, _im2col_shape, im2col_raw_data);
+    int im2col_size = _need_im2col ? _im2col_shape.FlatSize() : 1;
+
+    // Use heap if size is larger than 8MB
+    if (im2col_size > 8 * 1024 * 1024)
+    {
+      std::unique_ptr<uint8_t[]> im2col_data = std::make_unique<uint8_t[]>(im2col_size);
+      optimized::Conv(params, input_shape, input_data, filter_shape, filter_data, bias_shape,
+                      bias_data, output_shape, output_data, _im2col_shape, im2col_data.get());
+    }
+    else
+    {
+      uint8_t im2col_data[im2col_size];
+      optimized::Conv(params, input_shape, input_data, filter_shape, filter_data, bias_shape,
+                      bias_data, output_shape, output_data, _im2col_shape, im2col_data);
+    }
    }
  
  private:
-  bool usableMultiThreaded(PaddingType padding_type)
+  bool usableMultiThreaded(PaddingType padding_type, uint32_t dilation_width_factor,
+                           int32_t dilation_height_factor)
    {
-    return padding_type != PaddingType::kNone && std::thread::hardware_concurrency() > 1;
+    return padding_type != PaddingType::kNone && std::thread::hardware_concurrency() > 1 &&
+           dilation_width_factor == 1 && dilation_height_factor == 1;
    }
  
    void transposeFilter(const Shape &filter_shape, const float *filter_data,
@@ -151,13 +164,11 @@ private:
        _im2col_shape.SetDim(1, output_shape.Dims(1));
        _im2col_shape.SetDim(2, output_shape.Dims(2));
        _im2col_shape.SetDim(3, input_shape.Dims(3) * kernel_shape.Dims(1) * kernel_shape.Dims(2));
-      _im2col_data.resize(_im2col_shape.FlatSize());
      }
    }
  
  private:
    std::vector<float> _modified_filter_data;
-  std::vector<uint8_t> _im2col_data;
    Shape _im2col_shape;
    bool _need_im2col;
    bool _prepared;
diff --git a/runtime/onert/core/src/ir/operation/Sin.cc b/compute/cker/include/cker/operation/Erf.h

similarity index 57%

rename from runtime/onert/core/src/ir/operation/Sin.cc

rename to compute/cker/include/cker/operation/Erf.h

index 631505f..a9be365 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Sin.cc
+++ b/compute/cker/include/cker/operation/Erf.h
@@ -14,26 +14,29 @@
   * limitations under the License.
   */
  
-#include "ir/operation/Sin.h"
+#ifndef __NNFW_CKER_ERF_H__
+#define __NNFW_CKER_ERF_H__
  
-#include <cassert>
+#include "cker/Shape.h"
  
-#include "ir/OperationVisitor.h"
+#include <cmath>
  
-namespace onert
+namespace nnfw
  {
-namespace ir
+namespace cker
  {
-namespace operation
-{
-
-void Sin::accept(OperationVisitor &v) const { v.visit(*this); }
  
-Sin::Sin(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+inline void Erf(const Shape &input_shape, const float *input_data, const Shape &output_shape,
+                float *output_data)
  {
+  const int size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < size; i++)
+  {
+    output_data[i] = std::erf(input_data[i]);
+  }
  }
  
-} // namespace operation
-} // namespace ir
-} // namespace onert
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_ERF_H__
diff --git a/compute/cker/include/cker/operation/LogSoftMax.h b/compute/cker/include/cker/operation/LogSoftMax.h

index 326a3ee..326a44f 100644 (file)
--- a/compute/cker/include/cker/operation/LogSoftMax.h
+++ b/compute/cker/include/cker/operation/LogSoftMax.h
@@ -77,6 +77,62 @@ inline void LogSoftmax(const SoftmaxParams &params, const Shape &input_shape,
    }
  }
  
+inline void LogSoftmax(const SoftmaxParams &params, float input_scale, const Shape &input_shape,
+                       const uint8_t *input_data, const Shape &output_shape, uint8_t *output_data)
+{
+  const int rank = input_shape.DimensionsCount();
+  const int axis = (params.axis < 0) ? params.axis + rank : params.axis;
+  const double beta = params.beta;
+  const int depth = MatchingDim(input_shape, axis, output_shape, axis);
+
+  const int32_t clamp_max = std::numeric_limits<uint8_t>::max();
+  const int32_t clamp_min = std::numeric_limits<uint8_t>::min();
+
+  int outer_size = 1;
+  for (int i = 0; i < axis; ++i)
+  {
+    outer_size *= input_shape.Dims(i);
+  }
+
+  int inner_size = 1;
+  for (int i = axis + 1; i < rank; ++i)
+  {
+    inner_size *= input_shape.Dims(i);
+  }
+
+  for (int i = 0; i < outer_size; ++i)
+  {
+    for (int j = 0; j < inner_size; ++j)
+    {
+      uint8_t max_val = std::numeric_limits<uint8_t>::min();
+      for (int c = 0; c < depth; ++c)
+      {
+        max_val = std::max(max_val, input_data[(i * depth + c) * inner_size]);
+      }
+
+      float sum_exp = 0.0f;
+      const int32_t max_uint8 = std::numeric_limits<uint8_t>::max();
+      const float *table_offset = &params.table[max_uint8 - max_val];
+      for (int c = 0; c < depth; ++c)
+      {
+        sum_exp += table_offset[input_data[(i * depth + c) * inner_size]];
+      }
+      const float log_sum_exp = std::log(sum_exp);
+
+      const float scale = input_scale / params.scale;
+      const float precomputed = (input_scale * max_val * beta + log_sum_exp) / params.scale;
+      for (int c = 0; c < depth; ++c)
+      {
+        const float log_prob =
+            scale * input_data[(i * depth + c) * inner_size] * beta - precomputed;
+        const int32_t prob_quantized = std::rint(log_prob) + params.zero_point;
+        output_data[(i * depth + c) * inner_size] =
+            static_cast<uint8_t>(std::max(std::min(clamp_max, prob_quantized), clamp_min));
+      }
+    }
+  }
+}
+
  } // namespace cker
  } // namespace nnfw
  
diff --git a/compute/cker/include/cker/operation/MaxPool.h b/compute/cker/include/cker/operation/MaxPool.h

index 3395472..ea3fcac 100644 (file)
--- a/compute/cker/include/cker/operation/MaxPool.h
+++ b/compute/cker/include/cker/operation/MaxPool.h
@@ -31,7 +31,15 @@ namespace nnfw
  namespace cker
  {
  
-inline void MaxPool(const PoolParams &params, const Shape &input_shape, const float *input_data,
+template <typename T> void MaxPool(const PoolParams &, const Shape &, const T *, const Shape &, T *)
+{
+  static_assert(std::is_integral<T>::value || std::is_floating_point<T>::value,
+                "cker::MaxPool : This function supports only integer or floating point");
+  throw std::runtime_error("cker::MaxPool : Unsupported data type");
+}
+
+template <>
+void MaxPool<float>(const PoolParams &params, const Shape &input_shape, const float *input_data,
                      const Shape &output_shape, float *output_data)
  {
    assert(input_shape.DimensionsCount() == 4);
@@ -86,8 +94,9 @@ inline void MaxPool(const PoolParams &params, const Shape &input_shape, const fl
    }
  }
  
-inline void MaxPool(const PoolParams &params, const Shape &input_shape, const uint8_t *input_data,
-                    const Shape &output_shape, uint8_t *output_data)
+template <>
+void MaxPool<uint8_t>(const PoolParams &params, const Shape &input_shape, const uint8_t *input_data,
+                      const Shape &output_shape, uint8_t *output_data)
  {
  
    // Here, and in other pooling ops, in order to maintain locality of reference,
diff --git a/compute/cker/include/cker/operation/SoftMax.h b/compute/cker/include/cker/operation/SoftMax.h

index bb39461..13e50b8 100644 (file)
--- a/compute/cker/include/cker/operation/SoftMax.h
+++ b/compute/cker/include/cker/operation/SoftMax.h
@@ -32,6 +32,44 @@ namespace nnfw
  namespace cker
  {
  
+// Performs softmax along the input of size (input_size * batch_size).
+inline void Softmax(const float *in, const int input_size, const int batch_size, const float beta,
+                    float *out)
+{
+  assert(input_size > 0);
+
+  // For each batch
+  for (int b = 0; b < batch_size; b++)
+  {
+    // Find the max coeff.
+    float max_coeff = in[0];
+    for (int i = 1; i < input_size; i++)
+    {
+      if (in[i] > max_coeff)
+        max_coeff = in[i];
+    }
+
+    // Compute the normalized sum of exps.
+    float exp_sum = 0.0;
+    for (int i = 0; i < input_size; i++)
+    {
+      out[i] = std::exp((in[i] - max_coeff) * beta);
+      exp_sum += out[i];
+    }
+
+    // Divide by the sum of exps.
+    float reciprocal_sum_exp = 1.f / exp_sum;
+    for (int i = 0; i < input_size; i++)
+    {
+      out[i] *= reciprocal_sum_exp;
+    }
+
+    // Advance in and out pointers for the next batch.
+    in += input_size;
+    out += input_size;
+  }
+}
+
  inline void Softmax(const SoftmaxParams &params, const Shape &input_shape, const float *input_data,
                      const Shape &output_shape, float *output_data)
  {
diff --git a/compute/cker/include/cker/operation/optimized/BinaryArithmeticOps.h b/compute/cker/include/cker/operation/optimized/BinaryArithmeticOps.h

index 2b2ea8f..ac50699 100644 (file)
--- a/compute/cker/include/cker/operation/optimized/BinaryArithmeticOps.h
+++ b/compute/cker/include/cker/operation/optimized/BinaryArithmeticOps.h
@@ -148,9 +148,73 @@ inline void AddElementwiseQuant8(int size, const BinaryArithmeticOpParam &params
                                   uint8_t *output_data)
  {
    int i = 0;
+
+#ifdef USE_NEON
+  const uint8x8_t output_activation_min_vector = vdup_n_u8(params.quantized_activation_min);
+  const uint8x8_t output_activation_max_vector = vdup_n_u8(params.quantized_activation_max);
+  for (; i <= size - 8; i += 8)
+  {
+    const uint8x8_t input1_val_original = vld1_u8(input1_data + i);
+    const uint8x8_t input2_val_original = vld1_u8(input2_data + i);
+    const int16x8_t input1_val_s16 = vreinterpretq_s16_u16(vmovl_u8(input1_val_original));
+    const int16x8_t input2_val_s16 = vreinterpretq_s16_u16(vmovl_u8(input2_val_original));
+    const int16x8_t input1_val = vaddq_s16(input1_val_s16, vdupq_n_s16(params.input1_offset));
+    const int16x8_t input2_val = vaddq_s16(input2_val_s16, vdupq_n_s16(params.input2_offset));
+    const int16x4_t input1_val_high = vget_high_s16(input1_val);
+    const int16x4_t input1_val_low = vget_low_s16(input1_val);
+    const int16x4_t input2_val_high = vget_high_s16(input2_val);
+    const int16x4_t input2_val_low = vget_low_s16(input2_val);
+    int32x4_t x11 = vmovl_s16(input1_val_low);
+    int32x4_t x12 = vmovl_s16(input1_val_high);
+    int32x4_t x21 = vmovl_s16(input2_val_low);
+    int32x4_t x22 = vmovl_s16(input2_val_high);
+    const int32x4_t left_shift_dup = vdupq_n_s32(params.left_shift);
+    x11 = vshlq_s32(x11, left_shift_dup);
+    x12 = vshlq_s32(x12, left_shift_dup);
+    x21 = vshlq_s32(x21, left_shift_dup);
+    x22 = vshlq_s32(x22, left_shift_dup);
+    x11 = vqrdmulhq_n_s32(x11, params.input1_multiplier);
+    x12 = vqrdmulhq_n_s32(x12, params.input1_multiplier);
+    x21 = vqrdmulhq_n_s32(x21, params.input2_multiplier);
+    x22 = vqrdmulhq_n_s32(x22, params.input2_multiplier);
+    const int32x4_t input1_shift_dup = vdupq_n_s32(params.input1_shift);
+    const int32x4_t input2_shift_dup = vdupq_n_s32(params.input2_shift);
+    x11 = vshlq_s32(x11, input1_shift_dup);
+    x12 = vshlq_s32(x12, input1_shift_dup);
+    x21 = vshlq_s32(x21, input2_shift_dup);
+    x22 = vshlq_s32(x22, input2_shift_dup);
+    int32x4_t s1 = vaddq_s32(x11, x21);
+    int32x4_t s2 = vaddq_s32(x12, x22);
+    s1 = vqrdmulhq_n_s32(s1, params.output_multiplier);
+    s2 = vqrdmulhq_n_s32(s2, params.output_multiplier);
+    using gemmlowp::RoundingDivideByPOT;
+    s1 = RoundingDivideByPOT(s1, -params.output_shift);
+    s2 = RoundingDivideByPOT(s2, -params.output_shift);
+    const int16x4_t s1_narrowed = vmovn_s32(s1);
+    const int16x4_t s2_narrowed = vmovn_s32(s2);
+    const int16x8_t s =
+        vaddq_s16(vcombine_s16(s1_narrowed, s2_narrowed), vdupq_n_s16(params.output_offset));
+    const uint8x8_t clamped = vmax_u8(output_activation_min_vector,
+                                      vmin_u8(output_activation_max_vector, vqmovun_s16(s)));
+    vst1_u8(output_data + i, clamped);
+  }
+#endif // NEON
    for (; i < size; ++i)
    {
-    int32_t clamped_output = quant8_sum(params, input1_data[i], input2_data[i]);
+    const int32_t input1_val = params.input1_offset + input1_data[i];
+    const int32_t input2_val = params.input2_offset + input2_data[i];
+    const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
+    const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
+    const int32_t scaled_input1_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+        shifted_input1_val, params.input1_multiplier, params.input1_shift);
+    const int32_t scaled_input2_val = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+        shifted_input2_val, params.input2_multiplier, params.input2_shift);
+    const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
+    const int32_t raw_output = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+                                   raw_sum, params.output_multiplier, params.output_shift) +
+                               params.output_offset;
+    const int32_t clamped_output = std::min(params.quantized_activation_max,
+                                            std::max(params.quantized_activation_min, raw_output));
      output_data[i] = static_cast<uint8_t>(clamped_output);
    }
  }
@@ -392,10 +456,62 @@ inline void MulElementwiseQuant8(int size, const BinaryArithmeticOpParam &params
                                   uint8_t *output_data)
  {
    int i = 0;
-  int32_t clamped_output;
-  for (; i < size; i++)
+
+#ifdef USE_NEON
+  const auto input1_offset_vector = vdupq_n_s16(params.input1_offset);
+  const auto input2_offset_vector = vdupq_n_s16(params.input2_offset);
+  const auto output_offset_vector = vdupq_n_s16(params.output_offset);
+  const auto output_activation_min_vector = vdup_n_u8(params.quantized_activation_min);
+  const auto output_activation_max_vector = vdup_n_u8(params.quantized_activation_max);
+  const int left_shift = std::max(0, params.output_shift);
+  const int right_shift = std::max(0, -params.output_shift);
+  const int32x4_t left_shift_vec = vdupq_n_s32(left_shift);
+  for (; i <= size - 8; i += 8)
+  {
+    // We load / store 8 at a time, multiplying as two sets of 4 int32s.
+    const auto input1_val_original = vld1_u8(input1_data + i);
+    const auto input2_val_original = vld1_u8(input2_data + i);
+    const auto input1_val_s16 = vreinterpretq_s16_u16(vmovl_u8(input1_val_original));
+    const auto input2_val_s16 = vreinterpretq_s16_u16(vmovl_u8(input2_val_original));
+    const auto input1_val = vaddq_s16(input1_val_s16, input1_offset_vector);
+    const auto input2_val = vaddq_s16(input2_val_s16, input2_offset_vector);
+
+    const auto input1_val_low = vget_low_s16(input1_val);
+    const auto input1_val_high = vget_high_s16(input1_val);
+    const auto input2_val_low = vget_low_s16(input2_val);
+    const auto input2_val_high = vget_high_s16(input2_val);
+
+    auto p1 = vmull_s16(input2_val_low, input1_val_low);
+    auto p2 = vmull_s16(input2_val_high, input1_val_high);
+
+    p1 = vshlq_s32(p1, left_shift_vec);
+    p2 = vshlq_s32(p2, left_shift_vec);
+    p1 = vqrdmulhq_n_s32(p1, params.output_multiplier);
+    p2 = vqrdmulhq_n_s32(p2, params.output_multiplier);
+    using gemmlowp::RoundingDivideByPOT;
+    p1 = RoundingDivideByPOT(p1, right_shift);
+    p2 = RoundingDivideByPOT(p2, right_shift);
+
+    const auto p1_narrowed = vqmovn_s32(p1);
+    const auto p2_narrowed = vqmovn_s32(p2);
+    const auto p = vaddq_s16(vcombine_s16(p1_narrowed, p2_narrowed), output_offset_vector);
+    const auto clamped = vmax_u8(output_activation_min_vector,
+                                 vmin_u8(output_activation_max_vector, vqmovun_s16(p)));
+    vst1_u8(output_data + i, clamped);
+  }
+#endif // NEON
+
+  for (; i < size; ++i)
    {
-    clamped_output = quant8_mul(params, input1_data[i], input2_data[i]);
+    const int32_t input1_val = params.input1_offset + input1_data[i];
+    const int32_t input2_val = params.input2_offset + input2_data[i];
+    const int32_t unclamped_result =
+        params.output_offset + MultiplyByQuantizedMultiplier(input1_val * input2_val,
+                                                             params.output_multiplier,
+                                                             params.output_shift);
+    const int32_t clamped_output =
+        std::min(params.quantized_activation_max,
+                 std::max(params.quantized_activation_min, unclamped_result));
      output_data[i] = static_cast<uint8_t>(clamped_output);
    }
  }
diff --git a/compute/cker/include/cker/operation/optimized/OptimizedUtils.h b/compute/cker/include/cker/operation/optimized/OptimizedUtils.h

index 3f4ff8a..ae1f9e7 100644 (file)
--- a/compute/cker/include/cker/operation/optimized/OptimizedUtils.h
+++ b/compute/cker/include/cker/operation/optimized/OptimizedUtils.h
@@ -116,19 +116,106 @@ inline void ExtractPatchIntoBufferColumn(const Shape &input_shape, int w, int h,
    }
  }
  
+// Supports per-batch zero_byte for per-batch asymmetric quantized inputs.
+template <typename T>
+void DilatedIm2col(const ConvParams &params, const Shape &input_shape, const T *input_data,
+                   const Shape &filter_shape, const Shape &output_shape, T *im2col_data,
+                   const int32_t *zero_bytes, const int zero_bytes_len)
+{
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  assert(input_shape.DimensionsCount() == 4);
+  assert(filter_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+
+  // For dilated convolution, the input pixels are not contiguous therefore we
+  // can't use the same optimizations as Im2Col(). Though note this code would
+  // work fine for the non-dilated case too (though likely a bit slower).
+  assert(dilation_width_factor != 1 || dilation_height_factor != 1);
+  assert(im2col_data);
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  MatchingDim(output_shape, 3, filter_shape, 0);
+
+  // Construct the MxN sized im2col matrix.
+  // The rows M, are sub-ordered B x H x W
+  const Shape row_shape({1, batches, output_height, output_width});
+  // The columns, N, are sub-ordered Kh x Kw x Din
+  const Shape col_shape({1, filter_height, filter_width, input_depth});
+  // Use dimensions M and N to construct dims for indexing directly into im2col
+  const Shape im2col_shape({1, 1, row_shape.FlatSize(), col_shape.FlatSize()});
+
+  // Loop through the output rows (B x H x W)
+  for (int batch = 0; batch < batches; ++batch)
+  {
+    const T zero_byte =
+        zero_bytes_len > 1 ? static_cast<T>(zero_bytes[batch]) : static_cast<T>(zero_bytes[0]);
+    for (int out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int out_x = 0; out_x < output_width; ++out_x)
+      {
+        // Each im2col row is an output pixel. Arrange the input data in this
+        // row in an order we can conveniently multiply with the filter data.
+        int row_offset = Offset(row_shape, 0, batch, out_y, out_x);
+        const int in_x_origin = (out_x * stride_width) - pad_width;
+        const int in_y_origin = (out_y * stride_height) - pad_height;
+        // Loop through all the pixels of the filter (Kh x Kw)
+        for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+        {
+          const int in_y = in_y_origin + dilation_height_factor * filter_y;
+          if ((in_y >= 0) && (in_y < input_height))
+          {
+            // Filter row is within the input data.
+            // Loop through all the filter pixels in this row.
+            for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+            {
+              const int in_x = in_x_origin + dilation_width_factor * filter_x;
+              int col_offset = Offset(col_shape, 0, filter_y, filter_x, 0);
+              T *dst = im2col_data + Offset(im2col_shape, 0, 0, row_offset, col_offset);
+              if ((in_x >= 0) && (in_x < input_width))
+              {
+                // Filter pixel is within the input, copy the input data.
+                T const *src = input_data + Offset(input_shape, batch, in_y, in_x, 0);
+                memcpy(dst, src, input_depth * sizeof(T));
+              }
+              else
+              {
+                // Filter pixel is outside the input, zero it out.
+                memset(dst, zero_byte, input_depth * sizeof(T));
+              }
+            }
+          }
+          else
+          {
+            // Filter row is outside the input, zero out the entire filter row.
+            int col_offset = Offset(col_shape, 0, filter_y, 0, 0);
+            T *dst = im2col_data + Offset(im2col_shape, 0, 0, row_offset, col_offset);
+            memset(dst, zero_byte, filter_width * input_depth * sizeof(T));
+          }
+        }
+      }
+    }
+  }
+}
+
  template <typename T>
  void DilatedIm2col(const ConvParams &params, uint8_t zero_byte, const Shape &input_shape,
                     const T *input_data, const Shape &filter_shape, const Shape &output_shape,
                     T *im2col_data)
  {
-  (void)params;
-  (void)zero_byte;
-  (void)input_shape;
-  (void)input_data;
-  (void)filter_shape;
-  (void)output_shape;
-  (void)im2col_data;
-  throw std::runtime_error{"NYI: cker DilatedIm2col"};
+  const int32_t zero_point = static_cast<int32_t>(zero_byte);
+  DilatedIm2col<T>(params, input_shape, input_data, filter_shape, output_shape, im2col_data,
+                   &zero_point, 1);
  }
  
  template <typename T>
diff --git a/compute/cker/include/cker/operation/reference/BinaryArithmeticOps.h b/compute/cker/include/cker/operation/reference/BinaryArithmeticOps.h

index 7a2b896..f7e3924 100644 (file)
--- a/compute/cker/include/cker/operation/reference/BinaryArithmeticOps.h
+++ b/compute/cker/include/cker/operation/reference/BinaryArithmeticOps.h
@@ -37,7 +37,7 @@ inline void BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shap
                                 const T *input2_data, const Shape &output_shape, T *output_data,
                                 const std::function<T(const T &, const T &)> &fn)
  {
-  const int32_t flat_size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
+  const int32_t flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
    for (int i = 0; i < flat_size; ++i)
    {
      output_data[i] = ActivationFunctionWithMinMax(fn(input1_data[i], input2_data[i]),
@@ -53,7 +53,7 @@ inline void BinaryArithmeticOp(const BinaryArithmeticOpParam &params, const Shap
                                 float *output_data,
                                 const std::function<float(const float &, const float &)> &fn)
  {
-  const int size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
+  const int size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
    for (int i = 0; i < size; i++)
    {
      output_data[i] =
diff --git a/compute/test/CMakeLists.txt b/compute/test/CMakeLists.txt

index c016d2b..92aac3e 100644 (file)
--- a/compute/test/CMakeLists.txt
+++ b/compute/test/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
  set(TEST_COMPUTE test_compute)
  
  file(GLOB_RECURSE TESTS "*.cc")
diff --git a/docs/conf.py b/docs/conf.py

index 649b677..9b87009 100644 (file)
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -21,7 +21,7 @@ copyright = '2020, Samsung Research & contributors'
  author = 'Samsung Research & contributors'
  
  # The full version, including alpha/beta/rc tags
-release = '1.8.0'
+release = '1.9.0'
  
  # -- General configuration ---------------------------------------------------
  
diff --git a/docs/howto/how-to-introduce-a-new-operation-into-runtime.md b/docs/howto/how-to-introduce-a-new-operation-into-runtime.md

index 4e295ba..ab449c4 100644 (file)
--- a/docs/howto/how-to-introduce-a-new-operation-into-runtime.md
+++ b/docs/howto/how-to-introduce-a-new-operation-into-runtime.md
@@ -176,6 +176,63 @@ void Dumper::visit(const Select &node)
  }
  ```
  
+5. Add code for shape inference
+- ONE runtime tries to calculate shapes and allocate memory during compilation time. For some calculations of output shapes that cannot be done during compilation time, ONE runtime will calculate shapes and allocate memory during execution time.
+- Calculation of shapes during compilation time is called _static shape inference_ and calculation of shapes during execution time is called _dynamic shape inference_.
+- [`StaticShapeInference.h`](`/runtime/onert/compiler/StaticShapeInference.h`)
+
+```CPP
+  void visit(const ir::operation::Select &op) override;
+```
+- [`StaticShapeInference.cc`](/runtime/onert/core/src/compiler/StaticShapeInference.cc)
+```CPP
+void StaticShapeInferer::visit(const ir::operation::Select &op)
+{
+  const auto input_cond_idx{op.getInputs().at(ir::operation::Select::Input::CONDITION)};
+  const auto &input_cond = _operands.at(input_cond_idx);
+
+  const auto &input_true = ...
+  const auto &input_false = ...
+  ir::Operand &output = ...
+
+  // Select output shpae
+  ir::Shape new_shape = shape_inference::inferSelectShape(
+      input_cond.info().shape(), input_true.info().shape(), input_false.info().shape());
+  output.info().shape(new_shape);
+}
+```
+- [`DynamicShapeInference.h`](/runtime/onert/core/include/exec/DynamicShapeInference.h)
+```CPP
+  void visit(const ir::operation::Select &op) override;
+```
+- [`DynamicShapeInference.cc`](/runtime/onert/core/src/exec/DynamicShapeInference.cc)
+```CPP
+void DynamicShapeInferer::visit(const ir::operation::Select &op)
+{
+  const auto input_cond_idx = op.getInputs().at(ir::operation::Select::Input::CONDITION);
+  const auto &input_cond = _tensor_registry->getITensor(input_cond_idx);
+
+  const auto &input_true = ...
+  const auto &input_false = ...
+  auto output = ...
+
+  if ((!input_cond->is_dynamic()) && (!input_true->is_dynamic()) && (!input_false->is_dynamic()))
+  {
+    return;
+  }
+
+  auto input_cond_shape = input_cond->getShape();
+  auto input_true_shape = input_true->getShape();
+  auto input_false_shape = input_false->getShape();
+
+  // Select output shpae
+  ir::Shape new_shape =
+      shape_inference::inferSelectShape(input_cond_shape, input_true_shape, input_false_shape);
+
+  dynamicTensorManagerOf(output)->applyShape(output_ind, new_shape);
+}
+```
+
  ## Frontend
  
  This module generates IR from a model. There are two kinds of frontend: Loader and NNAPI. First, Loader loads a model file and generates IR from it. Second, NNAPI generates IR from a model set via [Neural Networks API of android](https://developer.android.com/ndk/guides/neuralnetworks)
diff --git a/docs/release/1.9/release-note-1.9.0.md b/docs/release/1.9/release-note-1.9.0.md

new file mode 100644 (file)

index 0000000..5ac434b
--- /dev/null
+++ b/docs/release/1.9/release-note-1.9.0.md
@@ -0,0 +1,38 @@
+# Release Note 1.9.0
+
+## ONE Compiler
+
+### Compiler supports more operations
+
+- NonMaxSuppressionV4, NonMaxSuppressionV5, PadV2, Unique
+
+### Changes
+
+- Quantization enhancements: channel wise UINT8 quantization(Conv2D, DepwiseConv, TransposeConv, FullyConnected)
+- Experimental requantization from INT8 to UINT8
+- Adding more operator value tests
+- tf2tfliteV2 supports conversion from Keras model, saved model
+- Refactoring for better maintenance long Class codes using visitor patterns 
+- Introducing optimization pass that fuses batch normalization with Transposed Convolution.
+
+
+## ONE Runtime
+
+### Runtime backend operation support
+
+- CPU backend: RANK
+- CPU backend qasymm uint8: LOG_SOFTMAX
+- ACL-CL backend: LEAKY_RELU, RESIZE_NEAREST_NEIGHBOR
+
+
+### Optimization
+
+- Copy Elimination between compatible backends
+
+### Operation Implementation
+
+- Operations with same parameters are unified
+
+### Change
+
+- CPU backend qasymm uint8 performance enhancement: arithmetic operations
diff --git a/docs/runtime/compute.md b/docs/runtime/compute.md

index 3768cf0..857a589 100644 (file)
--- a/docs/runtime/compute.md
+++ b/docs/runtime/compute.md
@@ -1 +1,13 @@
  # Compute
+
+`compute` directory is for the libraries for actual computation of neural network operations. These libraries are used by backends. Currently we have two libraries.
+
+## ARMComputeEx
+
+It is an extension of ARM [ComputeLibrary](https://github.com/ARM-software/ComputeLibrary), in order to support some operations that are not yet supported by ComputeLibrary. It is used by `acl_cl` and `acl_neon` backends.
+
+The code structure looks just like ComputeLibrary's. Some of the code could be copied from the latest version of ComputeLibrary to support some operations quickly when those are not included in the latest version yet.
+
+## cker
+
+"cker" stands for Cpu KERnel. It is a port of Tensorflow lite's operation kernels and possibly there are some own code. It is used by `cpu` backend.
diff --git a/infra/3rdparty/Eigen/fd6845384b86/URL.default b/infra/3rdparty/Eigen/fd6845384b86/URL.default

index 1fff1b4..76b000a 100644 (file)
--- a/infra/3rdparty/Eigen/fd6845384b86/URL.default
+++ b/infra/3rdparty/Eigen/fd6845384b86/URL.default
@@ -1 +1 @@
-https://bitbucket.org/eigen/eigen/get/fd6845384b86.tar.gz
+https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/fd6845384b86.tar.gz
diff --git a/infra/cmake/packages/BoostSourceConfig.cmake b/infra/cmake/packages/BoostSourceConfig.cmake

index ea2a6ca..52cda7c 100644 (file)
--- a/infra/cmake/packages/BoostSourceConfig.cmake
+++ b/infra/cmake/packages/BoostSourceConfig.cmake
@@ -9,7 +9,7 @@ function(_BoostSource_import)
  
    # EXTERNAL_DOWNLOAD_SERVER will be overwritten by CI server to use mirror server.
    envoption(EXTERNAL_DOWNLOAD_SERVER "http://sourceforge.net")
-  set(BOOST_URL ${EXTERNAL_DOWNLOAD_SERVER}/projects/boost/files/boost/1.58.0/boost_1_58_0.tar.gz)
+  envoption(BOOST_URL ${EXTERNAL_DOWNLOAD_SERVER}/projects/boost/files/boost/1.58.0/boost_1_58_0.tar.gz)
    ExternalSource_Download(BOOST ${BOOST_URL})
  
    set(BoostSource_DIR ${BOOST_SOURCE_DIR} PARENT_SCOPE)
diff --git a/infra/cmake/packages/EigenSourceConfig.cmake b/infra/cmake/packages/EigenSourceConfig.cmake

index a0cbf82..4aaeb3d 100644 (file)
--- a/infra/cmake/packages/EigenSourceConfig.cmake
+++ b/infra/cmake/packages/EigenSourceConfig.cmake
@@ -10,7 +10,7 @@ function(_EigenSource_import)
    # NOTE TensorFlow 1.13.1 uses https://bitbucket.org/eigen/eigen/get/9f48e814419e.tar.gz
    #      but it has a issue https://eigen.tuxfamily.org/bz/show_bug.cgi?id=1643
    #      The following URL resolves above issue
-  envoption(EXTERNAL_DOWNLOAD_SERVER "https://bitbucket.org")
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://mirror.bazel.build/bitbucket.org")
    envoption(EIGEN_1_13_1_URL ${EXTERNAL_DOWNLOAD_SERVER}/eigen/eigen/get/88fc23324517.tar.gz)
  
    ExternalSource_Download(EIGEN
diff --git a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.3.0/TensorFlowGEMMLowpSourceConfig.cmake b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.3.0/TensorFlowGEMMLowpSourceConfig.cmake

new file mode 100644 (file)

index 0000000..bc13d62
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.3.0/TensorFlowGEMMLowpSourceConfig.cmake
@@ -0,0 +1,20 @@
+function(_TensorFlowGEMMLowpSource_import)
+  if(NOT DOWNLOAD_GEMMLOWP)
+    set(TensorFlowGEMMLowpSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_GEMMLOWP)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  # Exact version used by TensorFlow v2.3.0.
+  # See tensorflow/tensorflow/workspace.bzl.
+  envoption(TENSORFLOW_2_3_0_GEMMLOWP_URL https://github.com/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip)
+
+  ExternalSource_Download(GEMMLOWP DIRNAME TENSORFLOW-2.3.0-GEMMLOWP ${TENSORFLOW_2_3_0_GEMMLOWP_URL})
+
+  set(TensorFlowGEMMLowpSource_DIR ${GEMMLOWP_SOURCE_DIR} PARENT_SCOPE)
+  set(TensorFlowGEMMLowpSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowGEMMLowpSource_import)
+
+_TensorFlowGEMMLowpSource_import()
diff --git a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.3.0/TensorFlowGEMMLowpSourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.3.0/TensorFlowGEMMLowpSourceConfigVersion.cmake

new file mode 100644 (file)

index 0000000..04df5eb
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.3.0/TensorFlowGEMMLowpSourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.3.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/TensorFlowRuySource-2.3.0/TensorFlowRuySourceConfig.cmake b/infra/cmake/packages/TensorFlowRuySource-2.3.0/TensorFlowRuySourceConfig.cmake

new file mode 100644 (file)

index 0000000..3dbf05e
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowRuySource-2.3.0/TensorFlowRuySourceConfig.cmake
@@ -0,0 +1,20 @@
+function(_TensorFlowRuySource_import)
+  if(NOT DOWNLOAD_RUY)
+    set(TensorFlowRuySource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_RUY)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  # Exact version used by TensorFlow v2.3.0.
+  # See tensorflow/third_party/ruy/workspace.bzl
+  envoption(TENSORFLOW_2_3_0_RUY_URL https://github.com/google/ruy/archive/34ea9f4993955fa1ff4eb58e504421806b7f2e8f.zip)
+
+  ExternalSource_Download(RUY DIRNAME TENSORFLOW-2.3.0-RUY ${TENSORFLOW_2_3_0_RUY_URL})
+
+  set(TensorFlowRuySource_DIR ${RUY_SOURCE_DIR} PARENT_SCOPE)
+  set(TensorFlowRuySource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowRuySource_import)
+
+_TensorFlowRuySource_import()
diff --git a/infra/cmake/packages/TensorFlowRuySource-2.3.0/TensorFlowRuySourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowRuySource-2.3.0/TensorFlowRuySourceConfigVersion.cmake

new file mode 100644 (file)

index 0000000..04df5eb
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowRuySource-2.3.0/TensorFlowRuySourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.3.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/nncc/CMakeLists.txt b/infra/nncc/CMakeLists.txt

index 0be6885..d416db2 100644 (file)
--- a/infra/nncc/CMakeLists.txt
+++ b/infra/nncc/CMakeLists.txt
@@ -89,6 +89,7 @@ option(BUILD_PROTOBUF "Locally build Protocol Buffer from the downloaded source"
  option(DOWNLOAD_EIGEN "Download Eigen source" ON)
  option(DOWNLOAD_FARMHASH "Download farmhash source" ON)
  option(DOWNLOAD_GEMMLOWP "Download GEMM low precesion library source" ON)
+option(DOWNLOAD_RUY "Download ruy source" ON)
  option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" ON)
  option(DOWNLOAD_GFLAGS "Download GFlags source" OFF)
  option(DOWNLOAD_FLATBUFFERS "Download FlatBuffers source" ON)
diff --git a/infra/nnfw/cmake/CfgOptionFlags.cmake b/infra/nnfw/cmake/CfgOptionFlags.cmake

index 3c6b7d9..b3d0581 100644 (file)
--- a/infra/nnfw/cmake/CfgOptionFlags.cmake
+++ b/infra/nnfw/cmake/CfgOptionFlags.cmake
@@ -30,6 +30,7 @@ option(BUILD_NNPACKAGE_RUN "Build nnpackge_run" ON)
  option(BUILD_TFLITE_LOADER "Build TensorFlow Lite loader" ON)
  option(BUILD_CIRCLE_LOADER "Build circle loader" ON)
  option(BUILD_TFLITE_LOADER_TEST_TOOL "Build tflite loader testing tool" ON)
+option(BUILD_WITH_HDF5 "Build test tool with HDF5 library" ON)
  option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" ON)
  option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" ON)
  option(INSTALL_TEST_SCRIPTS "Install test scripts" ON)
diff --git a/infra/nnfw/cmake/options/options_aarch64-android.cmake b/infra/nnfw/cmake/options/options_aarch64-android.cmake

index 97371f6..d720b20 100644 (file)
--- a/infra/nnfw/cmake/options/options_aarch64-android.cmake
+++ b/infra/nnfw/cmake/options/options_aarch64-android.cmake
@@ -8,11 +8,11 @@ option(BUILD_ANDROID_TFLITE "Enable android support for TensorFlow Lite" ON)
  option(BUILD_ANDROID_BENCHMARK_APP "Enable Android Benchmark App" ON)
  option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
  # Need boost library
-option(DOWNLOAD_BOOST "Download boost source" OFF)
-option(BUILD_BOOST "Build boost source" OFF)
+option(DOWNLOAD_BOOST "Download boost source" ON)
+option(BUILD_BOOST "Build boost source" ON)
  option(BUILD_RUNTIME_NNAPI_TEST "Build Runtime NN API Generated Test" OFF)
  option(BUILD_NNAPI_TEST "Build nnapi_test" OFF)
-option(BUILD_NNPACKAGE_RUN "Build nnpackge_run" OFF)
+option(BUILD_NNPACKAGE_RUN "Build nnpackge_run" ON)
  option(BUILD_TFLITE_RUN "Build tflite-run" OFF)
  option(BUILD_TFLITE_LOADER_TEST_TOOL "Build tflite loader testing tool" OFF)
  option(BUILD_LOGGING "Build logging runtime" OFF)
diff --git a/infra/nnfw/cmake/options/options_aarch64-tizen.cmake b/infra/nnfw/cmake/options/options_aarch64-tizen.cmake

index 590d128..57d4c10 100644 (file)
--- a/infra/nnfw/cmake/options/options_aarch64-tizen.cmake
+++ b/infra/nnfw/cmake/options/options_aarch64-tizen.cmake
@@ -6,6 +6,5 @@ option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source"
  option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
  
  option(BUILD_LOGGING "Build logging runtime" OFF)
-option(BUILD_TFLITE_RUN "Build tflite-run" OFF)
  option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
  option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OFF)
diff --git a/infra/nnfw/cmake/options/options_armv7l-tizen.cmake b/infra/nnfw/cmake/options/options_armv7l-tizen.cmake

index 25c848f..c27a7ad 100644 (file)
--- a/infra/nnfw/cmake/options/options_armv7l-tizen.cmake
+++ b/infra/nnfw/cmake/options/options_armv7l-tizen.cmake
@@ -6,6 +6,5 @@ option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source"
  option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
  
  option(BUILD_LOGGING "Build logging runtime" OFF)
-option(BUILD_TFLITE_RUN "Build tflite-run" OFF)
  option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
  option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OFF)
diff --git a/infra/nnfw/cmake/packages/BoostConfig.cmake b/infra/nnfw/cmake/packages/BoostConfig.cmake

index c4d7d58..4f60e91 100644 (file)
--- a/infra/nnfw/cmake/packages/BoostConfig.cmake
+++ b/infra/nnfw/cmake/packages/BoostConfig.cmake
@@ -13,7 +13,7 @@ function(_Boost_Build Boost_PREFIX)
                      RESULT_VARIABLE Boost_BUILD)
    endif()
  
-  set(BoostBuild_DIR ${BoostSource_DIR})
+  set(BoostBuild_DIR ${CMAKE_BINARY_DIR}/externals/boost)
    set(BoostInstall_DIR ${Boost_PREFIX})
  
    unset(Boost_Options)
@@ -55,18 +55,13 @@ if (NOT BUILD_BOOST)
    endif()
  endif()
  
-set(Boost_PREFIX ${CMAKE_INSTALL_PREFIX})
+set(Boost_PREFIX ${EXT_OVERLAY_DIR})
  
  if(BUILD_BOOST)
    _Boost_Build("${Boost_PREFIX}")
  
-  # Let's use locally built boost to system-wide one so sub modules
-  # needing Boost library and header files can search for them
-  # in ${Boost_PREFIX} directory
-  list(APPEND CMAKE_PREFIX_PATH "${Boost_PREFIX}")
-
    # Without Boost_INCLUDE_DIR, it complains the variable is missing during find_package.
-  set(Boost_INCLUDE_DIR ${CMAKE_INSTALL_PREFIX}/include)
+  set(Boost_INCLUDE_DIR ${Boost_PREFIX}/include)
  
    # 1) without static build, it will complain it cannot find libc++_shared.so.
    # 2) We uses static libraries for other libraries.
diff --git a/infra/nnfw/cmake/packages/HDF5Config.cmake b/infra/nnfw/cmake/packages/HDF5Config.cmake

index b965ffc..8c2badf 100644 (file)
--- a/infra/nnfw/cmake/packages/HDF5Config.cmake
+++ b/infra/nnfw/cmake/packages/HDF5Config.cmake
@@ -4,6 +4,11 @@ unset(HDF5_INCLUDE_DIRS CACHE)
  unset(HDF5_CXX_LIBRARY_hdf5 CACHE)
  unset(HDF5_CXX_LIBRARY_hdf5_cpp CACHE)
  
+if(NOT BUILD_WITH_HDF5)
+  set(HDF5_FOUND FALSE)
+  return()
+endif(NOT BUILD_WITH_HDF5)
+
  # Case 1. external hdf5
  if(DEFINED EXT_HDF5_DIR)
    find_path(HDF5_INCLUDE_DIRS NAMES H5Cpp.h NO_CMAKE_FIND_ROOT_PATH PATHS "${EXT_HDF5_DIR}/include")
diff --git a/infra/nnfw/command/copyright-check b/infra/nnfw/command/copyright-check

index b9ae5b4..b5e133f 100644 (file)
--- a/infra/nnfw/command/copyright-check
+++ b/infra/nnfw/command/copyright-check
@@ -4,11 +4,16 @@ INVALID_EXIT=0
  
  check_copyright() {
    DIRECTORIES_NOT_TO_BE_TESTED=$1
-  CORRECT_COPYRIGHT="Copyright \(c\) [0-9]+ Samsung Electronics Co\., Ltd\. All Rights Reserved"
+  CORRECT_COPYRIGHT="Copyright \(c\) [0-9\-]+ Samsung Electronics Co\., Ltd\. All Rights Reserved"
  
    FILES_TO_CHECK=$(git ls-files -c --exclude-standard)
    FILES_TO_CHECK_COPYRIGHTS=()
    for f in ${FILES_TO_CHECK[@]}; do
+    # Manually ignore checking
+    if [[ ${f} == +(*/NeuralNetworks.h|*/NeuralNetworksExtensions.h) ]]; then
+      continue
+    fi
+
      # File extension to check
      if [[ ${f} == +(*.h|*.hpp|*.cpp|*.cc|*.c|*.cl) ]]; then
        FILES_TO_CHECK_COPYRIGHTS+=("${f}")
diff --git a/infra/scripts/build-tcm.sh b/infra/scripts/build-tcm.sh

index 22fb335..38533c1 100755 (executable)
--- a/infra/scripts/build-tcm.sh
+++ b/infra/scripts/build-tcm.sh
@@ -18,7 +18,7 @@ echo ${PROJECT_DIR:=${PWD}}
  
  java -jar $PROJECT_DIR/tca-standalone-0.0.8.jar \
    --outdir=$PROJECT_DIR/tcm-output \
-  --config=$PROJECT_DIR/.ahub/tcchecker-tca/config.yaml \
+  --config=$PROJECT_DIR/src/.ahub/tcchecker-tca/config.yaml \
    --local=$PROJECT_DIR/src \
    --logfile=$PROJECT_DIR/tcm-output/tcm.log \
    --debug
diff --git a/infra/scripts/docker_build_nncc.sh b/infra/scripts/docker_build_nncc.sh

index 046bc8a..6cdfdf0 100755 (executable)
--- a/infra/scripts/docker_build_nncc.sh
+++ b/infra/scripts/docker_build_nncc.sh
@@ -57,9 +57,11 @@ mkdir -p ${NNCC_INSTALL_PREFIX}
  # create python virtual environment
  ./nncc docker-run python3 -m venv "${NNCC_INSTALL_PREFIX}/bin/venv"
  
+# TODO remove version number of 'pip==20.2.1 setuptools==49.3.0'
+# NOTE adding version is for temporary hotfix of setuptools 50.x.y version
  ./nncc docker-run "${NNCC_INSTALL_PREFIX}/bin/venv/bin/python" \
    -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
-  install -U pip setuptools
+  install -U pip==20.2.1 setuptools==49.3.0
  ./nncc docker-run "${NNCC_INSTALL_PREFIX}/bin/venv/bin/python" \
    -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
    install tensorflow-cpu==2.3.0
diff --git a/infra/scripts/tizen_xu4_test.sh b/infra/scripts/tizen_xu4_test.sh

index 8f9e86f..05e5584 100755 (executable)
--- a/infra/scripts/tizen_xu4_test.sh
+++ b/infra/scripts/tizen_xu4_test.sh
@@ -12,10 +12,12 @@ function Usage()
      echo "Usage: ./tizen_xu4_test.sh --rpm-dir=path/to/rpm-dir"
      echo "Usage: ./tizen_xu4_test.sh --test-suite-path=path/to/test-suite.tar.gz"
      echo "Usage: ./tizen_xu4_test.sh --skip-install-model"
+    echo "Usage: ./tizen_xu4_test.sh --rpm-dir=path/to/rpm-dir --skip-test"
      echo ""
      echo "--rpm-dir <dir>           : directory containing nnfw.rpm and nnfw-test.rpm"
      echo "--test-suite-path <dir>   : filepath to test-suite.tar.gz"
      echo "--skip-install-model      : skip install downloaded model"
+    echo "--skip-test               : skip running test"
      echo "--gcov-dir <dir>          : directory to save gcov files"
  }
  
@@ -36,7 +38,7 @@ function install_model()
      # download api test model file for nnfw_api_gtest
      MODEL_CACHE_DIR=$(mktemp -d)
      tests/scripts/models/run_test.sh --download=on --run=off \
-        --configdir=test/scripts/nnfw_api_gtest/models \
+        --configdir=tests/scripts/models/nnfw_api_gtest \
          --cachedir=$MODEL_CACHE_DIR
      tar -zcf $MODEL_CACHE_DIR/api_model_test.tar.gz -C $MODEL_CACHE_DIR .
      $SDB_CMD push $MODEL_CACHE_DIR/api_model_test.tar.gz $TEST_ROOT/Product/out/unittest_standalone/nnfw_api_gtest_models/
@@ -74,6 +76,7 @@ function prepare_suite_test()
  }
  
  INSTALL_MODEL="1"
+RUN_TEST="1"
  # Parse command argv
  for i in "$@"
  do
@@ -102,6 +105,9 @@ do
          --gcov-dir=*)
              GCOV_DIR=${i#*=}
              ;;
+        --skip-test)
+            RUN_TEST="0"
+            ;;
      esac
      shift
  done
@@ -148,6 +154,11 @@ else
      echo "======= Skip install model ======="
  fi
  
+if [ $RUN_TEST = "0" ]; then
+    echo "======= Skip test ======="
+    exit 0
+fi
+
  if [ -z "${GCOV_DIR}" ]; then
    ${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --backend acl_cl --tflite-loader"
    ${SDB_CMD} shell /bin/bash -c "IGNORE_MD5=1 ${TEST_ROOT}/infra/scripts/test_ubuntu_runtime.sh --backend acl_neon"
diff --git a/packaging/nnfw.spec b/packaging/nnfw.spec

index a1157c7..1b8c5fb 100644 (file)
--- a/packaging/nnfw.spec
+++ b/packaging/nnfw.spec
@@ -1,6 +1,6 @@
  Name:    nnfw
  Summary: nnfw
-Version: 1.8.0
+Version: 1.9.0
  Release: 1
  Group:   Development
  License: Apache-2.0 and MIT and BSD-2-Clause
@@ -203,10 +203,12 @@ install -m 0644 ./tests/scripts/build_path.txt %{buildroot}%{test_install_dir}/t
  %{_libdir}/pkgconfig/nnfw-plugin.pc
  %endif
  
+%ifarch arm armv7l aarch64
  %files minimal-app
  %manifest %{name}.manifest
  %defattr(-,root,root,-)
  %{_bindir}/onert-minimal-app
+%endif
  
  %if %{test_build} == 1
  %files test
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_000/test.rule b/res/TensorFlowLiteRecipes/Net_TConv_BN_000/test.rule

new file mode 100644 (file)

index 0000000..0988ecf
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_000/test.rule
@@ -0,0 +1,7 @@
+# To check if BatchNorm op(mul + add) is fused to Transposed Convolution op
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "TCONV_EXIST"             $(op_count TRANSPOSE_CONV) '=' 1
+RULE    "NO_MUL"                  $(op_count MUL) '=' 0
+RULE    "NO_ADD"                  $(op_count ADD) '=' 0
diff --git a/res/TensorFlowLiteRecipes/NonMaxSuppressionV4_000/test.recipe b/res/TensorFlowLiteRecipes/NonMaxSuppressionV4_000/test.recipe

new file mode 100644 (file)

index 0000000..fabe5ac
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/NonMaxSuppressionV4_000/test.recipe
@@ -0,0 +1,52 @@
+operand {
+  name: "boxes"
+  type: FLOAT32
+  shape { dim: 10 dim: 4 }
+}
+operand {
+  name: "scores"
+  type: FLOAT32
+  shape { dim: 10 }
+}
+operand {
+  name: "max_output_size"
+  type: INT32
+  shape { }
+}
+operand {
+  name: "iou_threshold"
+  type: FLOAT32
+  shape { }
+}
+operand {
+  name: "score_threshold"
+  type: FLOAT32
+  shape { }
+}
+operand {
+  name: "selected_indices"
+  type: INT32
+  shape { }
+}
+operand {
+  name: "valid_outputs"
+  type: INT32
+  shape { }
+}
+operation {
+  type: "NonMaxSuppressionV4"
+  input: "boxes"
+  input: "scores"
+  input: "max_output_size"
+  input: "iou_threshold"
+  input: "score_threshold"
+  output: "selected_indices"
+  output: "valid_outputs"
+}
+input: "boxes"
+input: "scores"
+input: "max_output_size"
+input: "iou_threshold"
+input: "score_threshold"
+output: "selected_indices"
+output: "valid_outputs"
diff --git a/res/TensorFlowLiteRecipes/NonMaxSuppressionV4_000/test.reverse b/res/TensorFlowLiteRecipes/NonMaxSuppressionV4_000/test.reverse

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/res/TensorFlowLiteRecipes/NonMaxSuppressionV4_001/test.recipe b/res/TensorFlowLiteRecipes/NonMaxSuppressionV4_001/test.recipe

new file mode 100644 (file)

index 0000000..12b6067
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/NonMaxSuppressionV4_001/test.recipe
@@ -0,0 +1,52 @@
+operand {
+  name: "boxes"
+  type: FLOAT32
+  shape { dim: 10 dim: 4 }
+}
+operand {
+  name: "scores"
+  type: FLOAT32
+  shape { dim: 10 }
+}
+operand {
+  name: "max_output_size"
+  type: INT32
+  shape { }
+  filler { tag: "explicit" arg: "5" }
+}
+operand {
+  name: "iou_threshold"
+  type: FLOAT32
+  shape { }
+}
+operand {
+  name: "score_threshold"
+  type: FLOAT32
+  shape { }
+}
+operand {
+  name: "selected_indices"
+  type: INT32
+  shape { }
+}
+operand {
+  name: "valid_outputs"
+  type: INT32
+  shape { }
+}
+operation {
+  type: "NonMaxSuppressionV4"
+  input: "boxes"
+  input: "scores"
+  input: "max_output_size"
+  input: "iou_threshold"
+  input: "score_threshold"
+  output: "selected_indices"
+  output: "valid_outputs"
+}
+input: "boxes"
+input: "scores"
+input: "iou_threshold"
+input: "score_threshold"
+output: "selected_indices"
+output: "valid_outputs"
diff --git a/res/TensorFlowLiteRecipes/NonMaxSuppressionV4_001/test.reverse b/res/TensorFlowLiteRecipes/NonMaxSuppressionV4_001/test.reverse

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/res/TensorFlowLiteRecipes/NonMaxSuppressionV5_000/test.recipe b/res/TensorFlowLiteRecipes/NonMaxSuppressionV5_000/test.recipe

new file mode 100644 (file)

index 0000000..5e2616a
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/NonMaxSuppressionV5_000/test.recipe
@@ -0,0 +1,66 @@
+operand {
+  name: "boxes"
+  type: FLOAT32
+  shape { dim: 10 dim: 4 }
+}
+operand {
+  name: "scores"
+  type: FLOAT32
+  shape { dim: 10 }
+}
+operand {
+  name: "max_output_size"
+  type: INT32
+  shape { }
+}
+operand {
+  name: "iou_threshold"
+  type: FLOAT32
+  shape { }
+}
+operand {
+  name: "score_threshold"
+  type: FLOAT32
+  shape { }
+}
+operand {
+  name: "soft_nms_sigma"
+  type: FLOAT32
+  shape { }
+}
+operand {
+  name: "selected_indices"
+  type: INT32
+  shape { }
+}
+operand {
+  name: "selected_scores"
+  type: FLOAT32
+  shape { }
+}
+operand {
+  name: "valid_outputs"
+  type: INT32
+  shape { }
+}
+operation {
+  type: "NonMaxSuppressionV5"
+  input: "boxes"
+  input: "scores"
+  input: "max_output_size"
+  input: "iou_threshold"
+  input: "score_threshold"
+  input: "soft_nms_sigma"
+  output: "selected_indices"
+  output: "selected_scores"
+  output: "valid_outputs"
+}
+input: "boxes"
+input: "scores"
+input: "max_output_size"
+input: "iou_threshold"
+input: "score_threshold"
+input: "soft_nms_sigma"
+output: "selected_indices"
+output: "selected_scores"
+output: "valid_outputs"
diff --git a/res/TensorFlowLiteRecipes/NonMaxSuppressionV5_000/test.reverse b/res/TensorFlowLiteRecipes/NonMaxSuppressionV5_000/test.reverse

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/res/TensorFlowLiteRecipes/NonMaxSuppressionV5_001/test.recipe b/res/TensorFlowLiteRecipes/NonMaxSuppressionV5_001/test.recipe

new file mode 100644 (file)

index 0000000..11d1059
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/NonMaxSuppressionV5_001/test.recipe
@@ -0,0 +1,66 @@
+operand {
+  name: "boxes"
+  type: FLOAT32
+  shape { dim: 10 dim: 4 }
+}
+operand {
+  name: "scores"
+  type: FLOAT32
+  shape { dim: 10 }
+}
+operand {
+  name: "max_output_size"
+  type: INT32
+  shape { }
+  filler { tag: "explicit" arg: "5" }
+}
+operand {
+  name: "iou_threshold"
+  type: FLOAT32
+  shape { }
+}
+operand {
+  name: "score_threshold"
+  type: FLOAT32
+  shape { }
+}
+operand {
+  name: "soft_nms_sigma"
+  type: FLOAT32
+  shape { }
+}
+operand {
+  name: "selected_indices"
+  type: INT32
+  shape { }
+}
+operand {
+  name: "selected_scores"
+  type: FLOAT32
+  shape { }
+}
+operand {
+  name: "valid_outputs"
+  type: INT32
+  shape { }
+}
+operation {
+  type: "NonMaxSuppressionV5"
+  input: "boxes"
+  input: "scores"
+  input: "max_output_size"
+  input: "iou_threshold"
+  input: "score_threshold"
+  input: "soft_nms_sigma"
+  output: "selected_indices"
+  output: "selected_scores"
+  output: "valid_outputs"
+}
+input: "boxes"
+input: "scores"
+input: "iou_threshold"
+input: "score_threshold"
+input: "soft_nms_sigma"
+output: "selected_indices"
+output: "selected_scores"
+output: "valid_outputs"
diff --git a/res/TensorFlowLiteRecipes/NonMaxSuppressionV5_001/test.reverse b/res/TensorFlowLiteRecipes/NonMaxSuppressionV5_001/test.reverse

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/res/TensorFlowLiteRecipes/PadV2_000/test.recipe b/res/TensorFlowLiteRecipes/PadV2_000/test.recipe

new file mode 100644 (file)

index 0000000..563339d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/PadV2_000/test.recipe
@@ -0,0 +1,40 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "padding"
+  type: INT32
+  shape { dim: 4 dim: 2 }
+  filler {
+    tag: "explicit"
+    arg: "0" arg: "0"
+    arg: "1" arg: "1"
+    arg: "2" arg: "2"
+    arg: "0" arg: "0"
+  }
+}
+operand {
+  name: "constant_values"
+  type: INT32
+  shape { dim: 1 }
+  filler {
+    tag: "explicit"
+    arg: "1" 
+  }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 7 dim: 2 }
+}
+operation {
+  type: "PadV2"
+  input: "ifm"
+  input: "padding"
+  input: "constant_values"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/PadV2_000/test.reverse b/res/TensorFlowLiteRecipes/PadV2_000/test.reverse

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/res/TensorFlowLiteRecipes/Tanh_U8_000/test.recipe b/res/TensorFlowLiteRecipes/Tanh_U8_000/test.recipe

new file mode 100644 (file)

index 0000000..0ecb9e9
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Tanh_U8_000/test.recipe
@@ -0,0 +1,19 @@
+operand {
+  name: "ifm"
+  type: UINT8
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+  quant { min: 0 max: 2 scale: 0.0078125 zero_point: 0 }
+}
+operand {
+  name: "ofm"
+  type: UINT8
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+  quant { min: 0 max: 2 scale: 0.0078125 zero_point: 0 }
+}
+operation {
+  type: "Tanh"
+  input: "ifm"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Tanh_U8_000/test.reverse b/res/TensorFlowLiteRecipes/Tanh_U8_000/test.reverse

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/res/TensorFlowLiteRecipes/Unique_000/test.recipe b/res/TensorFlowLiteRecipes/Unique_000/test.recipe

index 887380c..3110b5e 100644 (file)
--- a/res/TensorFlowLiteRecipes/Unique_000/test.recipe
+++ b/res/TensorFlowLiteRecipes/Unique_000/test.recipe
@@ -6,7 +6,7 @@ operand {
  operand {
    name: "ofm"
    type: FLOAT32
-  shape { }
+  shape { dim: 0 }
  }
  operand {
    name: "ofm_idx"
diff --git a/res/TensorFlowLiteRecipes/Unique_001/test.recipe b/res/TensorFlowLiteRecipes/Unique_001/test.recipe

index 9beb516..d654f79 100644 (file)
--- a/res/TensorFlowLiteRecipes/Unique_001/test.recipe
+++ b/res/TensorFlowLiteRecipes/Unique_001/test.recipe
@@ -6,7 +6,7 @@ operand {
  operand {
    name: "ofm"
    type: FLOAT32
-  shape { }
+  shape { dim: 0 }
  }
  operand {
    name: "ofm_idx"
diff --git a/res/TensorFlowLiteRecipes/Unique_002/test.recipe b/res/TensorFlowLiteRecipes/Unique_002/test.recipe

index 67b947f..d9f2393 100644 (file)
--- a/res/TensorFlowLiteRecipes/Unique_002/test.recipe
+++ b/res/TensorFlowLiteRecipes/Unique_002/test.recipe
@@ -6,7 +6,7 @@ operand {
  operand {
    name: "ofm"
    type: INT32
-  shape { }
+  shape { dim: 0 }
  }
  operand {
    name: "ofm_idx"
diff --git a/res/TensorFlowLiteRecipes/Unique_003/test.recipe b/res/TensorFlowLiteRecipes/Unique_003/test.recipe

index 375db66..de9e87a 100644 (file)
--- a/res/TensorFlowLiteRecipes/Unique_003/test.recipe
+++ b/res/TensorFlowLiteRecipes/Unique_003/test.recipe
@@ -6,7 +6,7 @@ operand {
  operand {
    name: "ofm"
    type: INT32
-  shape { }
+  shape { dim: 0 }
  }
  operand {
    name: "ofm_idx"
diff --git a/res/TensorFlowLiteRecipes/Unique_U8_000/test.recipe b/res/TensorFlowLiteRecipes/Unique_U8_000/test.recipe

index d3985e4..3906d2c 100644 (file)
--- a/res/TensorFlowLiteRecipes/Unique_U8_000/test.recipe
+++ b/res/TensorFlowLiteRecipes/Unique_U8_000/test.recipe
@@ -7,7 +7,7 @@ operand {
  operand {
    name: "ofm"
    type: UINT8
-  shape { }
+  shape { dim: 0 }
  }
  operand {
    name: "ofm_idx"
diff --git a/res/TensorFlowLiteRecipes/Unique_U8_001/test.recipe b/res/TensorFlowLiteRecipes/Unique_U8_001/test.recipe

index b08dd85..2bac10a 100644 (file)
--- a/res/TensorFlowLiteRecipes/Unique_U8_001/test.recipe
+++ b/res/TensorFlowLiteRecipes/Unique_U8_001/test.recipe
@@ -7,7 +7,7 @@ operand {
  operand {
    name: "ofm"
    type: UINT8
-  shape { }
+  shape { dim: 0 }
  }
  operand {
    name: "ofm_idx"
diff --git a/res/TensorFlowPythonExamples/examples/PadV2/__init__.py b/res/TensorFlowPythonExamples/examples/PadV2/__init__.py

new file mode 100644 (file)

index 0000000..99940bf
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/PadV2/__init__.py
@@ -0,0 +1,8 @@
+import tensorflow as tf
+import numpy as np
+
+input_ = tf.compat.v1.placeholder(shape=[1, 1, 1, 1], dtype=tf.float32)
+paddings_ = tf.compat.v1.constant(
+    np.array([[1, 1], [2, 2], [3, 3], [4, 4]], dtype=np.int32))
+constant_values_ = tf.compat.v1.constant(1, shape=(), dtype=tf.float32)
+op_ = tf.compat.v1.pad(input_, paddings=paddings_, constant_values=constant_values_)
diff --git a/res/TensorFlowPythonExamples/examples/gelu/__init__.py b/res/TensorFlowPythonExamples/examples/gelu/__init__.py

new file mode 100644 (file)

index 0000000..cdc4d62
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/gelu/__init__.py
@@ -0,0 +1,7 @@
+# NOTE please use TF2.4.0-dev or above to use gelu op
+import tensorflow as tf
+
+tf.compat.v1.disable_eager_execution()
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
+op_ = tf.nn.gelu(in_, approximate=False, name="Output")
diff --git a/res/TensorFlowPythonExamples/examples/gelu_2/__init__.py b/res/TensorFlowPythonExamples/examples/gelu_2/__init__.py

new file mode 100644 (file)

index 0000000..147688d
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/gelu_2/__init__.py
@@ -0,0 +1,7 @@
+# NOTE please use TF2.4.0-dev or above to use gelu op
+import tensorflow as tf
+
+tf.compat.v1.disable_eager_execution()
+
+in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
+op_ = tf.nn.gelu(in_, approximate=True, name="Output")
diff --git a/res/TensorFlowPythonExamples/examples/non_max_suppression_padded/__init__.py b/res/TensorFlowPythonExamples/examples/non_max_suppression_padded/__init__.py

new file mode 100644 (file)

index 0000000..b8f010c
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/non_max_suppression_padded/__init__.py
@@ -0,0 +1,8 @@
+import tensorflow as tf
+
+max_output_size = tf.compat.v1.constant(4)
+
+in_boxes_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(8, 4), name="Hole")
+in_scores_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(8), name="Hole")
+non_max_suppression_padded_ = tf.compat.v1.image.non_max_suppression_padded(
+    in_boxes_, in_scores_, max_output_size)
diff --git a/res/TensorFlowPythonExamples/examples/non_max_suppression_padded_2/__init__.py b/res/TensorFlowPythonExamples/examples/non_max_suppression_padded_2/__init__.py

new file mode 100644 (file)

index 0000000..42e7bf0
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/non_max_suppression_padded_2/__init__.py
@@ -0,0 +1,13 @@
+import tensorflow as tf
+
+max_output_size = tf.compat.v1.constant(6)
+iou_threshold = tf.compat.v1.constant(0.5)
+score_threshold = tf.compat.v1.constant(0.6)
+pad_to_max_output_size = True
+
+in_boxes_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(12, 4), name="Hole")
+in_scores_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(12), name="Hole")
+
+non_max_suppression_padded_ = tf.compat.v1.image.non_max_suppression_padded(
+    in_boxes_, in_scores_, max_output_size, iou_threshold, score_threshold,
+    pad_to_max_output_size)
diff --git a/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores/__init__.py b/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores/__init__.py

new file mode 100644 (file)

index 0000000..32c6173
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores/__init__.py
@@ -0,0 +1,10 @@
+import tensorflow as tf
+
+max_output_size = tf.compat.v1.constant(4)
+
+in_boxes_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(8, 4), name="Hole")
+in_scores_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(8), name="Hole")
+
+# non_max_suppression_with_scores requires TF 1.15+
+non_max_suppression_with_scores_ = tf.compat.v1.image.non_max_suppression_with_scores(
+    in_boxes_, in_scores_, max_output_size)
diff --git a/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores_2/__init__.py b/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores_2/__init__.py

new file mode 100644 (file)

index 0000000..415f920
--- /dev/null
+++ b/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores_2/__init__.py
@@ -0,0 +1,14 @@
+import tensorflow as tf
+
+max_output_size = tf.compat.v1.constant(6)
+iou_threshold = tf.compat.v1.constant(0.5)
+score_threshold = tf.compat.v1.constant(0.6)
+soft_nms_sigma = tf.compat.v1.constant(0.5)
+
+in_boxes_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(12, 4), name="Hole")
+in_scores_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(12), name="Hole")
+
+# non_max_suppression_with_scores requires TF 1.15+
+non_max_suppression_with_scores_ = tf.compat.v1.image.non_max_suppression_with_scores(
+    in_boxes_, in_scores_, max_output_size, iou_threshold, score_threshold,
+    soft_nms_sigma)
diff --git a/runtime/contrib/android/api/build.gradle b/runtime/contrib/android/api/build.gradle

index 5c17043..def89ee 100644 (file)
--- a/runtime/contrib/android/api/build.gradle
+++ b/runtime/contrib/android/api/build.gradle
@@ -8,7 +8,7 @@ android {
          minSdkVersion 26
          targetSdkVersion 29
          versionCode 1
-        versionName "1.8.0"
+        versionName "1.9.0"
  
          externalNativeBuild {
              ndkBuild {
diff --git a/runtime/contrib/android/api/src/main/native/onert-native-api.h b/runtime/contrib/android/api/src/main/native/onert-native-api.h

index 366627b..13768d4 100644 (file)
--- a/runtime/contrib/android/api/src/main/native/onert-native-api.h
+++ b/runtime/contrib/android/api/src/main/native/onert-native-api.h
@@ -1,3 +1,19 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
  /* DO NOT EDIT THIS FILE - it is machine generated */
  #include <jni.h>
  /* Header for class com_samsung_onert_NativeSessionWrapper */
diff --git a/runtime/libs/benchmark/include/benchmark/Phases.h b/runtime/libs/benchmark/include/benchmark/Phases.h

index abb0561..936a897 100644 (file)
--- a/runtime/libs/benchmark/include/benchmark/Phases.h
+++ b/runtime/libs/benchmark/include/benchmark/Phases.h
@@ -47,7 +47,7 @@ public:
    }
  
    const PhaseOption &option() const { return _option; }
-  const MemoryPoller &mem_poll() const { return _mem_poll; }
+  const MemoryPoller &mem_poll() const { return *_mem_poll; }
    const Phase &at(const std::string &tag) const { return _phases.at(tag); }
  
  private:
@@ -57,7 +57,7 @@ private:
  private:
    const PhaseOption _option;
    std::unordered_map<std::string, Phase> _phases;
-  MemoryPoller _mem_poll;
+  std::unique_ptr<MemoryPoller> _mem_poll;
  };
  
  } // namespace benchmark
diff --git a/runtime/libs/benchmark/src/MemoryPoller.cpp b/runtime/libs/benchmark/src/MemoryPoller.cpp

index ba0e4e4..61fdecd 100644 (file)
--- a/runtime/libs/benchmark/src/MemoryPoller.cpp
+++ b/runtime/libs/benchmark/src/MemoryPoller.cpp
@@ -165,31 +165,24 @@ bool MemoryPoller::end(PhaseEnum phase)
      stop = (_phases.size() == 0);
    }
  
-  if (_rss_map[phase] == 0)
+  mem = getVmRSS();
+  if (_gpu_poll)
    {
-    uint32_t mem = getVmRSS();
-    if (_gpu_poll)
-    {
-      mem += getGpuMemory();
-    }
-    _rss_map[phase] = mem;
+    mem += getGpuMemory();
    }
+  if (mem > _rss_map[phase])
+    _rss_map[phase] = mem;
  
-  if (_hwm_map[phase] == 0)
+  mem = getVmHWM();
+  if (_gpu_poll)
    {
-    uint32_t mem = getVmHWM();
-    if (_gpu_poll)
-    {
-      mem += getGpuMemory();
-    }
-    _hwm_map[phase] = mem;
+    mem += getGpuMemory();
    }
+  _hwm_map[phase] = mem;
  
-  if (_pss_map[phase] == 0)
-  {
-    uint32_t mem = getPssSum();
+  mem = getPssSum();
+  if (mem > _pss_map[phase])
      _pss_map[phase] = mem;
-  }
  
    if (stop)
    {
diff --git a/runtime/libs/benchmark/src/Phases.cpp b/runtime/libs/benchmark/src/Phases.cpp

index 81da508..9ab67cf 100644 (file)
--- a/runtime/libs/benchmark/src/Phases.cpp
+++ b/runtime/libs/benchmark/src/Phases.cpp
@@ -46,11 +46,13 @@ void SleepForMicros(uint64_t micros)
  namespace benchmark
  {
  
-Phases::Phases(const PhaseOption &option)
-    : _option(option),
-      _mem_poll(std::chrono::milliseconds(option.memory_interval), option.memory_gpu)
+Phases::Phases(const PhaseOption &option) : _option(option)
  {
-  // DO NOTHING
+  if (_option.memory)
+  {
+    _mem_poll = std::make_unique<MemoryPoller>(std::chrono::milliseconds(option.memory_interval),
+                                               option.memory_gpu);
+  }
  }
  
  void Phases::run(const std::string &tag, const PhaseFunc &exec, const PhaseFunc *post,
@@ -61,7 +63,7 @@ void Phases::run(const std::string &tag, const PhaseFunc &exec, const PhaseFunc
    for (uint32_t i = 0; i < loop_num; ++i)
    {
      if (!option_disable && _option.memory)
-      _mem_poll.start(p);
+      _mem_poll->start(p);
  
      uint64_t t = 0u;
      t = nowMicros();
@@ -71,15 +73,15 @@ void Phases::run(const std::string &tag, const PhaseFunc &exec, const PhaseFunc
      t = nowMicros() - t;
  
      if (!option_disable && _option.memory)
-      _mem_poll.end(p);
+      _mem_poll->end(p);
  
      phase.time.emplace_back(t);
  
      if (!option_disable && _option.memory)
      {
-      phase.memory[MemoryType::RSS].emplace_back(_mem_poll.getRssMap().at(p));
-      phase.memory[MemoryType::HWM].emplace_back(_mem_poll.getHwmMap().at(p));
-      phase.memory[MemoryType::PSS].emplace_back(_mem_poll.getPssMap().at(p));
+      phase.memory[MemoryType::RSS].emplace_back(_mem_poll->getRssMap().at(p));
+      phase.memory[MemoryType::HWM].emplace_back(_mem_poll->getHwmMap().at(p));
+      phase.memory[MemoryType::PSS].emplace_back(_mem_poll->getPssMap().at(p));
      }
  
      if (post)
diff --git a/runtime/onert/api/include/nnfw.h b/runtime/onert/api/include/nnfw.h

index ef3678b..9348df6 100644 (file)
--- a/runtime/onert/api/include/nnfw.h
+++ b/runtime/onert/api/include/nnfw.h
@@ -103,6 +103,8 @@ typedef enum {
    NNFW_STATUS_INVALID_STATE = 3,
    /** When it is out of memory */
    NNFW_STATUS_OUT_OF_MEMORY = 4,
+  /** When it was given an insufficient output buffer */
+  NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE = 5,
  } NNFW_STATUS;
  
  /**
diff --git a/runtime/onert/api/include/nnfw_experimental.h b/runtime/onert/api/include/nnfw_experimental.h

index 4cd5c58..94f7819 100644 (file)
--- a/runtime/onert/api/include/nnfw_experimental.h
+++ b/runtime/onert/api/include/nnfw_experimental.h
@@ -62,4 +62,38 @@ typedef struct
  NNFW_STATUS nnfw_register_custom_op_info(nnfw_session *session, const char *id,
                                           custom_kernel_registration_info *info);
  
+/**
+ * @brief Get the input tensor index by name
+ *
+ * This function finds an input tensor of the given name.
+ * If found, the index value is set to the address that @c index points to, and returns
+ * @c NNFW_STATUS_NO_ERROR. Otherwise, @c index is unchanged and returns @c NNFW_STATUS_ERROR .
+ *
+ * @note If two or more input tensors are of the same name, the one with the lowest index is always
+ *       returned.
+ *
+ * @param[in]  session    the session object
+ * @param[in]  tensorname the name of the tensor to find, a null terminated char pointer string
+ * @param[out] index      the index to be ret
+ * @return     @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_input_tensorindex(nnfw_session *session, const char *tensorname, uint32_t *index);
+
+/**
+ * @brief Get the input tensor index by name
+ *
+ * This function finds an input tensor of the given name.
+ * If found, the index value is set to the address that @c index points to, and returns
+ * @c NNFW_STATUS_NO_ERROR. Otherwise, @c index is unchanged and returns @c NNFW_STATUS_ERROR .
+ *
+ * @note If two or more input tensors are of the same name, the one with the lowest index is always
+ *       returned.
+ *
+ * @param[in]  session    the session object
+ * @param[in]  tensorname the name of the tensor to find, a null terminated char pointer string
+ * @param[out] index      the index to be ret
+ * @return     @c NNFW_STATUS_NO_ERROR if successful
+ */
+NNFW_STATUS nnfw_output_tensorindex(nnfw_session *session, const char *tensorname, uint32_t *index);
+
  #endif // __NNFW_EXPERIMENTAL_H__
diff --git a/runtime/onert/api/include/nnfw_version.h b/runtime/onert/api/include/nnfw_version.h

index 320271a..42e4376 100644 (file)
--- a/runtime/onert/api/include/nnfw_version.h
+++ b/runtime/onert/api/include/nnfw_version.h
@@ -21,6 +21,6 @@
   * NNFW_VERSION is a uint32 value representing nnfw runtime version
   * in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch
   */
-#define NNFW_VERSION 0x01000800
+#define NNFW_VERSION 0x01000900
  
  #endif // __NNFW_VERSION_H__
diff --git a/runtime/onert/api/src/nnfw_api.cc b/runtime/onert/api/src/nnfw_api.cc

index d65158f..ff5e679 100644 (file)
--- a/runtime/onert/api/src/nnfw_api.cc
+++ b/runtime/onert/api/src/nnfw_api.cc
@@ -33,6 +33,7 @@ STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_ERROR, 1);
  STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_UNEXPECTED_NULL, 2);
  STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_INVALID_STATE, 3);
  STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_OUT_OF_MEMORY, 4);
+STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE, 5);
  
  STATIC_ASSERT_ENUM_CHECK(NNFW_LAYOUT_NONE, 0);
  STATIC_ASSERT_ENUM_CHECK(NNFW_LAYOUT_CHANNELS_LAST, 1);
@@ -347,3 +348,15 @@ NNFW_STATUS nnfw_load_circle_from_buffer(nnfw_session *session, uint8_t *buffer,
    NNFW_RETURN_ERROR_IF_NULL(session);
    return session->load_circle_from_buffer(buffer, size);
  }
+
+NNFW_STATUS nnfw_input_tensorindex(nnfw_session *session, const char *tensorname, uint32_t *index)
+{
+  NNFW_RETURN_ERROR_IF_NULL(session);
+  return session->input_tensorindex(tensorname, index);
+}
+
+NNFW_STATUS nnfw_output_tensorindex(nnfw_session *session, const char *tensorname, uint32_t *index)
+{
+  NNFW_RETURN_ERROR_IF_NULL(session);
+  return session->output_tensorindex(tensorname, index);
+}
diff --git a/runtime/onert/api/src/nnfw_api_internal.cc b/runtime/onert/api/src/nnfw_api_internal.cc

index eb0b743..81b4070 100644 (file)
--- a/runtime/onert/api/src/nnfw_api_internal.cc
+++ b/runtime/onert/api/src/nnfw_api_internal.cc
@@ -18,6 +18,7 @@
  #include "CustomKernelRegistry.h"
  #include "compiler/Compiler.h"
  #include "util/ConfigSource.h"
+#include "util/Exceptions.h"
  #include "exec/Execution.h"
  #include "circle_loader.h"
  #include "tflite_loader.h"
@@ -37,6 +38,7 @@
  #define MAX_BACKEND_NAME_LENGTH 32
  #define MAX_OP_NAME_LENGTH 64
  #define MAX_PATH_LENGTH 1024
+#define MAX_TENSOR_NAME_LENGTH 64
  
  // Is null-terminating in length ?
  static bool null_terminating(const char *str, uint32_t length)
@@ -64,6 +66,32 @@ static onert::ir::Layout convertLayout(NNFW_LAYOUT layout)
    return onert::ir::Layout::UNKNOWN;
  }
  
+NNFW_STATUS getTensorIndexImpl(const onert::ir::Graph &graph, const char *tensorname,
+                               uint32_t *index, bool is_input)
+{
+  if (!tensorname || !index)
+    return NNFW_STATUS_UNEXPECTED_NULL;
+
+  if (!null_terminating(tensorname, MAX_TENSOR_NAME_LENGTH))
+  {
+    std::cerr << "nnpackage path is too long" << std::endl;
+    return NNFW_STATUS_ERROR;
+  }
+
+  auto ind_found = is_input ? graph.getInputIndex(tensorname) : graph.getOutputIndex(tensorname);
+
+  if (ind_found.undefined())
+  {
+    // Not found
+    return NNFW_STATUS_ERROR;
+  }
+  else
+  {
+    *index = ind_found.value();
+    return NNFW_STATUS_NO_ERROR;
+  }
+}
+
  nnfw_session::nnfw_session()
      : _subgraphs{nullptr}, _execution{nullptr},
        _kernel_registry{std::make_shared<onert::frontend::custom::KernelRegistry>()}
@@ -213,6 +241,12 @@ NNFW_STATUS nnfw_session::run()
    {
      _execution->execute();
    }
+  catch (const onert::InsufficientBufferSizeException &e)
+  {
+    // Currently insufficient buffer always means output buffer.
+    std::cerr << "Error during nnfw_session::run : " << e.what() << std::endl;
+    return NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE;
+  }
    catch (const std::exception &e)
    {
      std::cerr << "Error during nnfw_session::run : " << e.what() << std::endl;
@@ -447,26 +481,27 @@ NNFW_STATUS nnfw_session::apply_tensorinfo(uint32_t index, nnfw_tensorinfo ti)
      }
    }
  
+  auto ind = primary_subgraph()->getInputs().at(index);
+  auto &input = primary_subgraph()->operands().at(ind);
+
+  onert::ir::Shape new_shape(ti.rank);
+  for (int32_t i = 0; i < ti.rank; i++)
+    new_shape.dim(i) = ti.dims[i];
+
+  // if passed shape is same with the shape of model, do nothing
+  if (input.info().shape() == new_shape)
+    return NNFW_STATUS_NO_ERROR;
+
    if (!isStatePreparedOrFinishedRun())
    {
      // In this case, if we apply input shape in primary_subgraph, it will propagate after
      // compilation and excution
-    auto ind = primary_subgraph()->getInputs().at(index);
-    auto &input = primary_subgraph()->operands().at(ind);
-
-    onert::ir::Shape new_shape(ti.rank);
-    for (int32_t i = 0; i < ti.rank; i++)
-      new_shape.dim(i) = ti.dims[i];
  
      // overwrite input shape with the shape from ti
      input.info().shape(new_shape);
    }
    else // when called after nnfw_session::prepare()
    {
-    onert::ir::Shape new_shape(ti.rank);
-    for (int32_t i = 0; i < ti.rank; i++)
-      new_shape.dim(i) = ti.dims[i];
-
      _execution->changeInputShape(onert::ir::IOIndex(index), new_shape);
    }
  
@@ -840,3 +875,13 @@ bool nnfw_session::isStatePreparedOrFinishedRun()
  {
    return isStatePrepared() || isStateFinishedRun();
  }
+
+NNFW_STATUS nnfw_session::input_tensorindex(const char *tensorname, uint32_t *index)
+{
+  return getTensorIndexImpl(*primary_subgraph(), tensorname, index, true);
+}
+
+NNFW_STATUS nnfw_session::output_tensorindex(const char *tensorname, uint32_t *index)
+{
+  return getTensorIndexImpl(*primary_subgraph(), tensorname, index, false);
+}
diff --git a/runtime/onert/api/src/nnfw_api_internal.h b/runtime/onert/api/src/nnfw_api_internal.h

index 1c3c370..604ba38 100644 (file)
--- a/runtime/onert/api/src/nnfw_api_internal.h
+++ b/runtime/onert/api/src/nnfw_api_internal.h
@@ -122,8 +122,6 @@ public:
    NNFW_STATUS input_tensorinfo(uint32_t index, nnfw_tensorinfo *ti);
    NNFW_STATUS output_tensorinfo(uint32_t index, nnfw_tensorinfo *ti);
  
-  NNFW_STATUS register_custom_operation(const std::string &id, nnfw_custom_eval eval_func);
-
    NNFW_STATUS set_available_backends(const char *backends);
    NNFW_STATUS set_op_backend(const char *op, const char *backend);
  
@@ -133,9 +131,16 @@ public:
  
    NNFW_STATUS set_config(const char *key, const char *value);
    NNFW_STATUS get_config(const char *key, char *value, size_t value_size);
-
    NNFW_STATUS load_circle_from_buffer(uint8_t *buffer, size_t size);
  
+  //
+  // Experimental API
+  //
+
+  NNFW_STATUS register_custom_operation(const std::string &id, nnfw_custom_eval eval_func);
+  NNFW_STATUS input_tensorindex(const char *tensorname, uint32_t *index);
+  NNFW_STATUS output_tensorindex(const char *tensorname, uint32_t *index);
+
  private:
    onert::ir::Graph *primary_subgraph();
    bool isStateInitialized();
diff --git a/runtime/onert/backend/acl_cl/Backend.h b/runtime/onert/backend/acl_cl/Backend.h

index 8aaf516..5c50413 100644 (file)
--- a/runtime/onert/backend/acl_cl/Backend.h
+++ b/runtime/onert/backend/acl_cl/Backend.h
@@ -25,6 +25,7 @@
  #include "KernelGenerator.h"
  #include "TensorManager.h"
  #include "Optimizer.h"
+#include "AclTensorRegistry.h"
  
  namespace onert
  {
@@ -47,10 +48,13 @@ public:
      const auto &operands = graph.operands();
      const auto &operations = graph.operations();
      auto context = std::make_unique<BackendContext>(this, &graph);
-    auto tb = std::make_shared<TensorBuilder>(operands, createTensorManager(is_linear_executor));
+    auto tm = createTensorManager(is_linear_executor);
+    auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
+    auto tb = std::make_shared<TensorBuilder>(operands, tm, tr);
+    context->tensor_registry = tr;
      context->tensor_builder = tb;
-    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
-    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb);
+    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr);
      context->tensor_register = nullptr;
      context->optimizer = std::make_shared<Optimizer>(context.get());
      return context;
diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.cc b/runtime/onert/backend/acl_cl/ConstantInitializer.cc

index d7f5f80..31f1c10 100644 (file)
--- a/runtime/onert/backend/acl_cl/ConstantInitializer.cc
+++ b/runtime/onert/backend/acl_cl/ConstantInitializer.cc
@@ -24,78 +24,17 @@ namespace acl_cl
  {
  
  ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
-                                         const std::shared_ptr<TensorBuilder> &tensor_builder)
-    : IConstantInitializer{operands}, _tensor_builder{tensor_builder}
+                                         const std::shared_ptr<ITensorRegistry> &tensor_reg)
+    : acl_common::AclConstantInitializer{operands, tensor_reg}
  {
    // DO NOTHING
  }
  
-void ConstantInitializer::copyInputInitialize(const ir::Operation &node, uint32_t index)
-{
-  assert(node.getInputs().size() > index);
-
-  const auto &input_index = node.getInputs().at(index);
-  const auto &input_obj = _operands.at(input_index);
-  registerCopyInitializer(input_index, input_obj);
-}
-
-void ConstantInitializer::permuteInputInitialize(const ir::Operation &node, uint32_t index)
-{
-  assert(node.getInputs().size() > index);
-
-  const auto &input_index = node.getInputs().at(index);
-  const auto &input_obj = _operands.at(input_index);
-  registerPermuteInitializer(input_index, input_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::BatchToSpaceND &node)
-{
-  const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE);
-  const auto &block_size_obj = _operands.at(block_size_index);
-
-  if (block_size_obj.isConstant())
-  {
-    _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
-      assert(model_obj.data());
-      const auto &shape = model_obj.shape();
-      const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
-      assert(model_obj.shape().rank() == 1);
-      obj.access([&](ITensor &tensor) {
-        for (size_t i = 0; i < shape.num_elements(); ++i)
-        {
-          const int32_t value = base[shape.num_elements() - i - 1];
-          int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
-                                                      tensor.calcOffset({static_cast<int32_t>(i)}));
-          *into = value;
-        }
-      });
-    };
-  }
-}
-
-void ConstantInitializer::visit(const ir::operation::Conv2D &node)
-{
-  permuteInputInitialize(node, ir::operation::Conv2D::KERNEL);
-  copyInputInitialize(node, ir::operation::Conv2D::BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
-{
-  permuteInputInitialize(node, ir::operation::DepthwiseConv2D::KERNEL);
-  copyInputInitialize(node, ir::operation::DepthwiseConv2D::BIAS);
-}
-
  void ConstantInitializer::visit(const ir::operation::EmbeddingLookup &node)
  {
    copyInputInitialize(node, ir::operation::EmbeddingLookup::LOOKUPS);
  }
  
-void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
-{
-  copyInputInitialize(node, ir::operation::FullyConnected::WEIGHT);
-  copyInputInitialize(node, ir::operation::FullyConnected::BIAS);
-}
-
  void ConstantInitializer::visit(const ir::operation::Gather &node)
  {
    copyInputInitialize(node, ir::operation::Gather::INDICES);
@@ -107,33 +46,6 @@ void ConstantInitializer::visit(const ir::operation::HashtableLookup &node)
    copyInputInitialize(node, ir::operation::HashtableLookup::KEYS);
  }
  
-void ConstantInitializer::visit(const ir::operation::LSTM &node)
-{
-  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::INPUT_GATE_BIAS);
-  copyInputInitialize(node, ir::operation::LSTM::FORGET_GATE_BIAS);
-  copyInputInitialize(node, ir::operation::LSTM::OUTPUT_GATE_BIAS);
-  copyInputInitialize(node, ir::operation::LSTM::PROJECTION_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::PROJECTION_BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::RNN &node)
-{
-  copyInputInitialize(node, ir::operation::RNN::WEIGHTS);
-  copyInputInitialize(node, ir::operation::RNN::RECURRENT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::RNN::BIAS);
-}
-
  void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
  {
    const auto &block_size_index = node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE);
@@ -184,13 +96,6 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
    }
  }
  
-void ConstantInitializer::visit(const ir::operation::TransposeConv &node)
-{
-  const auto &kernel_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL);
-  const auto &kernel_obj = _operands.at(kernel_index);
-  registerPermuteInitializer(kernel_index, kernel_obj);
-}
-
  } // namespace acl_cl
  } // namespace backend
  } // namespace onert
diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.h b/runtime/onert/backend/acl_cl/ConstantInitializer.h

index c51f72b..4f894fd 100644 (file)
--- a/runtime/onert/backend/acl_cl/ConstantInitializer.h
+++ b/runtime/onert/backend/acl_cl/ConstantInitializer.h
@@ -17,9 +17,7 @@
  #ifndef __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
  #define __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
  
-#include <backend/IConstantInitializer.h>
-#include <ir/Operands.h>
-#include "TensorBuilder.h"
+#include "AclConstantInitializer.h"
  
  namespace onert
  {
@@ -28,32 +26,18 @@ namespace backend
  namespace acl_cl
  {
  
-class ConstantInitializer : public IConstantInitializer
+class ConstantInitializer : public acl_common::AclConstantInitializer
  {
  public:
    ConstantInitializer(const ir::Operands &operands,
-                      const std::shared_ptr<TensorBuilder> &tensor_builder);
+                      const std::shared_ptr<ITensorRegistry> &tensor_reg);
  
  public:
-  void visit(const ir::operation::BatchToSpaceND &) override;
-  void visit(const ir::operation::Conv2D &) override;
-  void visit(const ir::operation::DepthwiseConv2D &) override;
-  void visit(const ir::operation::EmbeddingLookup &) override;
-  void visit(const ir::operation::FullyConnected &) override;
-  void visit(const ir::operation::Gather &) override;
-  void visit(const ir::operation::HashtableLookup &) override;
-  void visit(const ir::operation::LSTM &) override;
-  void visit(const ir::operation::RNN &) override;
-  void visit(const ir::operation::SpaceToBatchND &) override;
-  void visit(const ir::operation::TransposeConv &) override;
-
-private:
-  std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
-  void copyInputInitialize(const ir::Operation &node, uint32_t index);
-  void permuteInputInitialize(const ir::Operation &node, uint32_t index);
-
-private:
-  std::shared_ptr<TensorBuilder> _tensor_builder;
+  using acl_common::AclConstantInitializer::visit;
+  void visit(const ir::operation::EmbeddingLookup &) final;
+  void visit(const ir::operation::Gather &) final;
+  void visit(const ir::operation::HashtableLookup &) final;
+  void visit(const ir::operation::SpaceToBatchND &) final;
  };
  
  } // namespace acl_cl
diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.cc b/runtime/onert/backend/acl_cl/KernelGenerator.cc

index a84f983..9448925 100644 (file)
--- a/runtime/onert/backend/acl_cl/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_cl/KernelGenerator.cc
@@ -40,15 +40,16 @@ namespace backend
  namespace acl_cl
  {
  
-using ::onert::backend::acl_common::asAclClFunction;
+using ::onert::backend::acl_common::asAclFunction;
  using ActivationBuilder = ::onert::backend::acl_common::AclActivationBuilder<
-    ::arm_compute::ICLTensor, ::arm_compute::CLActivationLayer, acl_common::AclClFunction>;
+    ::arm_compute::ICLTensor, ::arm_compute::CLActivationLayer, acl_common::AclFunction>;
  
-KernelGenerator::KernelGenerator(const ir::Operands &operands_ctx,
-                                 const ir::Operations &operations_ctx,
-                                 const std::shared_ptr<TensorBuilder> &tensor_builder)
+KernelGenerator::KernelGenerator(
+    const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+    const std::shared_ptr<TensorBuilder> &tensor_builder,
+    const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
      : _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder),
-      _current_op_seq_layout(ir::Layout::UNKNOWN)
+      _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN)
  {
    // DO NOTHING
  }
@@ -77,51 +78,69 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
    const auto block_size_index{
        node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto block_size_tensor = _tensor_builder->at(block_size_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
  
    assert(_ctx.at(block_size_index).data());
  
-  auto fn = std::make_unique<::arm_compute::CLBatchToSpaceLayer>();
+  auto fn = acl_common::generateLayer<arm_compute::CLBatchToSpaceLayer>(
+      ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
  
-  fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
-void KernelGenerator::visit(const ir::operation::Cast &node)
+void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
  {
    const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
+  const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
+  const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  const auto activation = node.param().activation;
  
-  std::unique_ptr<::arm_compute::IFunction> fn;
-  if (ifm_tensor->data_type() == ofm_tensor->data_type())
-  {
-    auto l = std::make_unique<::arm_compute::CLCopy>();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
  
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle());
+  const auto act_info = acl_common::asActivationLayerInfo(activation);
  
-    fn = std::move(l);
-  }
-  else
+  std::unique_ptr<arm_compute::IFunction> fn;
+  switch (node.param().arithmetic_type)
    {
-    auto l = std::make_unique<::arm_compute::CLCast>();
-
-    // TODO Support converting float to int32 as round down
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
-
-    fn = std::move(l);
+    case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLArithmeticAddition>(
+          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+          arm_compute::ConvertPolicy::SATURATE, act_info);
+      break;
+    }
+    case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLArithmeticSubtraction>(
+          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+          arm_compute::ConvertPolicy::SATURATE, act_info);
+      break;
+    }
+    case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLPixelWiseMultiplication>(
+          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
+          arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN,
+          act_info);
+      break;
+    }
+    case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLArithmeticDivision>(
+          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), act_info);
+      break;
+    }
+    default:
+      assert(false && "The BinaryArithmetic operation supports only binary arithmetic operations");
+      break;
    }
  
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::Conv2D &node)
@@ -145,22 +164,20 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
                                              ker_width, ker_height);
    const auto activation = node.param().activation;
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto ker_tensor = _tensor_builder->at(ker_index).get();
-  auto bias_tensor = _tensor_builder->at(bias_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
+  auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
  
    const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
    const auto act_info = acl_common::asActivationLayerInfo(activation);
  
-  auto fn = std::make_unique<::arm_compute::CLConvolutionLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
-  fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
-                ofm_tensor->handle(), conv_info, ::arm_compute::WeightsInfo(),
-                ::arm_compute::Size2D(1U, 1U), act_info);
+  auto fn = acl_common::generateLayer<arm_compute::CLConvolutionLayer>(
+      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
+      ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info,
+      ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
  
-  _return_fn = asAclClFunction(std::move(fn));
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
@@ -185,50 +202,23 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
    const auto multiplier = node.param().multiplier;
    const auto activation = node.param().activation;
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto ker_tensor = _tensor_builder->at(ker_index).get();
-  auto bias_tensor = _tensor_builder->at(bias_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
+  auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
  
    const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
    const auto act_info = acl_common::asActivationLayerInfo(activation);
  
    {
-    auto fn = std::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
-
-    fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
-                  ofm_tensor->handle(), conv_info, multiplier, act_info);
+    auto fn = acl_common::generateLayer<arm_compute::CLDepthwiseConvolutionLayer>(
+        ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(),
+        conv_info, multiplier, act_info);
  
-    _return_fn = asAclClFunction(std::move(fn));
+    _return_fn = asAclFunction(std::move(fn));
    }
  }
  
-void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
-{
-  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
-      node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::MAX);
-
-  const auto ofm_index{node.getOutputs().at(0)};
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  const auto activation = node.param().activation;
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(raw_fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
-{
-  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
-      node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::AVG);
-
-  const auto ofm_index{node.getOutputs().at(0)};
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  const auto activation = node.param().activation;
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(raw_fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
  void KernelGenerator::visit(const ir::operation::Concat &node)
  {
    const auto ofm_index{node.getOutputs().at(0)};
@@ -250,70 +240,44 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
      return;
    }
  
-  auto output_tensor = _tensor_builder->at(ofm_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(ofm_index).get();
    std::vector<::arm_compute::ICLTensor *> input_tensors;
    for (auto &ifm_ind : input_indexes)
-    input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle());
+    input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle());
  
    std::unique_ptr<::arm_compute::IFunction> fn;
    if (input_indexes.size() < 2)
    {
-    auto l = std::make_unique<::arm_compute::CLCopy>();
-    l->configure(input_tensors.at(0), output_tensor->handle());
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tensors.at(0),
+                                                        output_tensor->handle());
    }
    else
    {
-    auto l = std::make_unique<::arm_compute::CLConcatenateLayer>();
      const auto rank = _ctx.at(ofm_index).shape().rank();
      const auto frontend_layout = _current_op_seq_layout;
      const auto backend_layout = output_tensor->layout();
      const auto fixed_axis =
          acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
-    l->configure(input_tensors, output_tensor->handle(), fixed_axis);
-    fn = std::move(l);
+    fn = acl_common::generateLayer<::arm_compute::CLConcatenateLayer>(
+        input_tensors, output_tensor->handle(), fixed_axis);
    }
  
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::FullyConnected &node)
  {
    const auto output_index{node.getOutputs().at(0)};
-  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
    const auto activation = node.param().activation;
  
-  auto fn = acl_common::kernelGenFullyConnected<acl_common::AclClFunction, ::arm_compute::ICLTensor,
+  auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ICLTensor,
                                                  ::arm_compute::CLFullyConnectedReshapingLayer>(
-      node, _ctx, _tensor_builder, _current_op_seq_layout);
+      node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout);
    _return_fn = std::make_unique<exec::FunctionSequence>(
        std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
  }
  
-void KernelGenerator::visit(const ir::operation::Mul &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
-
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLPixelWiseMultiplication>();
-
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
-                arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
-
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
  void KernelGenerator::visit(const ir::operation::Reduce &node)
  {
    const auto output_index{node.getOutputs().at(0)};
@@ -322,8 +286,8 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
    const auto keep_dims{node.param().keep_dims};
    const auto reduce_type = node.param().reduce_type;
  
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
  
    // Convert to ACL axes taking into account negative values and possible duplicates.
    const auto &axes = _ctx.at(axes_index);
@@ -334,29 +298,21 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
    std::unique_ptr<arm_compute::IFunction> fn;
    if (reduce_type == ir::operation::Reduce::ReduceType::MEAN)
    {
-    auto l = std::make_unique<::arm_compute::CLReduceMean>();
-
      const auto acl_axes =
          acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
-    l->configure(input_tensor->handle(), acl_axes, keep_dims, output_tensor->handle());
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::CLReduceMean>(input_tensor->handle(), acl_axes,
+                                                              keep_dims, output_tensor->handle());
    }
    else
    {
-    auto l = std::make_unique<::arm_compute::CLReduceOperation>(
-        _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
      const auto acl_axes = acl_common::asSet(axes, input_rank, frontend_layout, backend_layout);
-    l->configure(input_tensor->handle(), output_tensor->handle(), acl_axes, keep_dims,
-                 acl_common::convertReduceType(reduce_type));
  
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::CLReduceOperation>(
+        _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+        output_tensor->handle(), acl_axes, keep_dims, acl_common::convertReduceType(reduce_type));
    }
  
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::Reshape &node)
@@ -364,8 +320,8 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
  
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
  
    // NOTE This operation must not be changed the layout from frontend to backend
    //      So, PermutationOperationPass makes layouts of frontend and backend the same.
@@ -376,13 +332,10 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
    UNUSED_RELEASE(frontend_layout);
    UNUSED_RELEASE(backend_layout);
  
-  auto fn = std::make_unique<::arm_compute::CLReshapeLayer>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
+                                                                   output_tensor->handle());
  
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::Squeeze &node)
@@ -398,32 +351,11 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
    (void)dims;
    (void)ndim;
  
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-  auto fn = std::make_unique<arm_compute::CLReshapeLayer>();
-  fn->configure(input_tensor->handle(), output_tensor->handle());
-  auto acl_fn = asAclClFunction(std::move(fn));
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Tanh &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<arm_compute::CLActivationLayer>();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+  auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
+                                                                   output_tensor->handle());
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::Softmax &node)
@@ -433,17 +365,14 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
  
    const auto beta = node.param().beta;
  
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLSoftmaxLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), beta);
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
  
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLSoftmaxLayer>(
+      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+      output_tensor->handle(), beta);
  
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::Slice &node)
@@ -453,8 +382,8 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
    const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
    const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
  
-  auto outputData_tensor = _tensor_builder->at(output_index).get();
-  auto inputData_tensor = _tensor_builder->at(input_index).get();
+  auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
    const auto frontend_layout = _current_op_seq_layout;
    const auto backend_layout = inputData_tensor->layout();
  
@@ -506,13 +435,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
      ends_set.set(i, ends[i]);
    }
  
-  auto fn = std::make_unique<::arm_compute::CLSlice>();
-
-  fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLSlice>(
+      inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
  
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::StridedSlice &node)
@@ -523,8 +449,8 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
    const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
    const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
  
-  auto outputData_tensor = _tensor_builder->at(output_index).get();
-  auto inputData_tensor = _tensor_builder->at(input_index).get();
+  auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
    const auto frontend_layout = _current_op_seq_layout;
    const auto backend_layout = inputData_tensor->layout();
  
@@ -597,14 +523,11 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
      strides_set.set(i, strides[i]);
    }
  
-  auto fn = std::make_unique<::arm_compute::CLStridedSlice>();
-
-  fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set,
-                strides_set, begin_mask, end_mask, shrink_axis_mask);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLStridedSlice>(
+      inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
+      begin_mask, end_mask, shrink_axis_mask);
  
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::Transpose &node)
@@ -615,8 +538,8 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
  
    const auto rank = _ctx.at(ifm_idx).shape().rank();
  
-  auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
    const auto frontend_layout = _current_op_seq_layout;
    const auto backend_layout = ifm_tensor->layout();
  
@@ -625,93 +548,168 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
    auto backend_pv = ::onert::backend::acl_common::getARMComputePermutationVector(
        rank, pv, frontend_layout, backend_layout);
  
-  auto fn = std::make_unique<::arm_compute::CLPermute>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), backend_pv);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Add &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
-
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLArithmeticAddition>();
-
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
-                arm_compute::ConvertPolicy::SATURATE);
+  auto fn = acl_common::generateLayer<::arm_compute::CLPermute>(ifm_tensor->handle(),
+                                                                ofm_tensor->handle(), backend_pv);
  
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
+  _return_fn = asAclFunction(std::move(fn));
  }
  
-void KernelGenerator::visit(const ir::operation::Sub &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
  {
    const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
-
-  const auto activation = node.param().activation;
+  const auto ifm_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
  
-  auto fn = std::make_unique<::arm_compute::CLArithmeticSubtraction>();
+  const ::arm_compute::ActivationLayerInfo act_info = acl_common::asActivationLayerInfo(
+      node.param().op_type, node.param().alpha, node.param().beta);
  
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
-                arm_compute::ConvertPolicy::SATURATE);
+  auto fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
+      ifm_tensor->handle(), ofm_tensor->handle(), act_info);
  
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
+  _return_fn = asAclFunction(std::move(fn));
  }
  
-void KernelGenerator::visit(const ir::operation::Div &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
  {
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
-
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
+  const auto output_index{node.getOutputs().at(0)};
+  const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
+  const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
  
-  auto fn = std::make_unique<::arm_compute::CLArithmeticDivision>();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
  
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+  std::unique_ptr<arm_compute::IFunction> fn;
+  switch (node.param().op_type)
+  {
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLBinaryLogicalOp>(
+          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle(),
+          arm_compute::BinaryLogicalOperation::AND);
+      break;
+    }
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLBitwiseOr>(
+          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLElementwiseMax>(
+          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLElementwiseMin>(
+          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+      break;
+    }
+    default:
+    {
+      std::string err_msg("acl_cl KernelGenerator : " + node.name() +
+                          "is not elementwise-binary operations");
+      assert(false && err_msg.c_str());
+      break;
+    }
+  }
  
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
+  _return_fn = asAclFunction(std::move(fn));
  }
  
-void KernelGenerator::visit(const ir::operation::Exp &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
  {
    const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
+  const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
  
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
  
-  auto fn = std::make_unique<::arm_compute::CLExpLayer>();
+  std::unique_ptr<arm_compute::IFunction> fn;
+  switch (node.param().op_type)
+  {
+    case ir::operation::ElementwiseUnary::Type::ABS:
+    {
+      const ::arm_compute::ActivationLayerInfo act_info{
+          ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
  
-  fn->configure(input_tensor->handle(), output_tensor->handle());
+      fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
+          input_tensor->handle(), output_tensor->handle(), act_info);
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::CAST:
+    {
+      if (input_tensor->data_type() == output_tensor->data_type())
+      {
+        fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tensor->handle(),
+                                                            output_tensor->handle());
+        ;
+      }
+      else
+      {
+        // TODO Support converting float to int32 as round down
+        fn = acl_common::generateLayer<arm_compute::CLCast>(
+            input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
+      }
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::DEQUANTIZE:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLDequantizationLayer>(input_tensor->handle(),
+                                                                         output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::EXP:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLExpLayer>(input_tensor->handle(),
+                                                              output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::FLOOR:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLFloor>(input_tensor->handle(),
+                                                           output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLBitwiseNot>(input_tensor->handle(),
+                                                                output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::NEG:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLNeg>(input_tensor->handle(),
+                                                         output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::RSQRT:
+    {
+      fn = acl_common::generateLayer<arm_compute::CLRsqrtLayer>(input_tensor->handle(),
+                                                                output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::SQRT:
+    {
+      const ::arm_compute::ActivationLayerInfo act_info{
+          ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
  
-  auto acl_fn = asAclClFunction(std::move(fn));
+      fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
+          input_tensor->handle(), output_tensor->handle(), act_info);
+      break;
+    }
+    default:
+    {
+      throw std::runtime_error("acl_cl KernelGenerator : " + node.name() + "is not supported yet");
+      break;
+    }
+  }
+
+  auto acl_fn = asAclFunction(std::move(fn));
  
    _return_fn = std::move(acl_fn);
  }
@@ -721,16 +719,13 @@ void KernelGenerator::visit(const ir::operation::ExpandDims &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
  
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLReshapeLayer>();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
  
-  fn->configure(input_tensor->handle(), output_tensor->handle());
+  auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
+                                                                   output_tensor->handle());
  
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
@@ -740,67 +735,25 @@ void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
    const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
    const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto gamma_tensor = _tensor_builder->at(gamma_index).get();
-  auto beta_tensor = _tensor_builder->at(beta_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto gamma_tensor = _tensor_reg->getAclTensor(gamma_index).get();
+  auto beta_tensor = _tensor_reg->getAclTensor(beta_index).get();
    auto epsilon = node.param().epsilon;
    auto activation = node.param().activation;
  
-  auto fn = std::make_unique<::arm_compute::CLInstanceNormalizationLayerEx>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(),
-                beta_tensor->handle(), epsilon);
+  auto fn = acl_common::generateLayer<arm_compute::CLInstanceNormalizationLayerEx>(
+      ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(),
+      epsilon);
  
    _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::Logistic &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
-
-  auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalAnd &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
-  const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input0_tensor = _tensor_builder->at(input0_index).get();
-  auto input1_tensor = _tensor_builder->at(input1_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLBinaryLogicalOp>();
-
-  fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
-                ::arm_compute::BinaryLogicalOperation::AND);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
  }
  
  void KernelGenerator::visit(const ir::operation::LSTM &node)
  {
-  _return_fn = acl_common::kernelGenLSTM<acl_common::AclClFunction, ::arm_compute::ICLTensor,
-                                         ::arm_compute::CLLSTMLayer>(node, _ctx, _tensor_builder);
+  _return_fn = acl_common::kernelGenLSTM<acl_common::AclFunction, ::arm_compute::ICLTensor,
+                                         ::arm_compute::CLLSTMLayer>(node, _ctx, _tensor_reg);
  }
  
  void KernelGenerator::visit(const ir::operation::Comparison &node)
@@ -811,18 +764,15 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
  
    const auto comparison_type = node.param().comparison_type;
  
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input0_tensor = _tensor_builder->at(input0_index).get();
-  auto input1_tensor = _tensor_builder->at(input1_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLComparison>();
-
-  fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
-                (arm_compute::ComparisonOperation)comparison_type);
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input0_tensor = _tensor_reg->getAclTensor(input0_index).get();
+  auto input1_tensor = _tensor_reg->getAclTensor(input1_index).get();
  
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLComparison>(
+      input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
+      (arm_compute::ComparisonOperation)comparison_type);
  
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::Pack &node)
@@ -836,26 +786,24 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
    for (const auto &input_index : node.getInputs())
      input_indexes.emplace_back(input_index);
  
-  auto output = _tensor_builder->at(output_index).get()->handle();
+  auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
    std::vector<arm_compute::ICLTensor *> inputs;
    for (const auto &input_index : input_indexes)
-    inputs.emplace_back(_tensor_builder->at(input_index)->handle());
+    inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
  
    const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = _tensor_builder->at(output_index).get()->layout();
+  const auto backend_layout = _tensor_reg->getAclTensor(output_index).get()->layout();
  
    if (axis < 0)
      axis += output_rank;
    axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
  
-  auto fn = std::make_unique<::arm_compute::CLStackLayer>();
-
    // Disable applied dim_correction
    std::vector<arm_compute::TensorShape> orig_inputs_acl_tensor_shapes;
    for (const auto &input_index : input_indexes)
    {
      size_t input_rank = _ctx.at(input_index).shape().rank();
-    const auto &input_tensor = _tensor_builder->at(input_index);
+    const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
      orig_inputs_acl_tensor_shapes.emplace_back(input_tensor->info()->tensor_shape());
      assert(input_rank == input_tensor->num_dimensions());
      if (input_rank != input_tensor->info()->num_dimensions())
@@ -866,7 +814,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
      }
    }
  
-  fn->configure(inputs, axis, output);
+  auto fn = acl_common::generateLayer<arm_compute::CLStackLayer>(inputs, axis, output);
  
    // Revert disabling applied dim_correction
    assert(inputs.size() == orig_inputs_acl_tensor_shapes.size());
@@ -875,7 +823,21 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
      inputs.at(i)->info()->set_tensor_shape(orig_inputs_acl_tensor_shapes.at(i));
    }
  
-  _return_fn = asAclClFunction(std::move(fn));
+  _return_fn = asAclFunction(std::move(fn));
+}
+
+void KernelGenerator::visit(const ir::operation::Pool2D &node)
+{
+  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
+      node, _ctx, _tensor_reg, _current_op_seq_layout,
+      acl_common::convertPoolType(node.param().op_type));
+
+  const auto ofm_index{node.getOutputs().at(0)};
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  const auto activation = node.param().activation;
+  _return_fn = std::make_unique<exec::FunctionSequence>(
+      asAclFunction(std::move(raw_fn)),
+      ActivationBuilder::generate(activation, ofm_tensor->handle()));
  }
  
  void KernelGenerator::visit(const ir::operation::Permute &node)
@@ -883,8 +845,8 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
    const auto ofm_idx{node.getOutputs().at(0)};
    const auto ifm_idx{node.getInputs().at(0)};
    const auto permute_type = node.getPermuteType();
-  auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
    const auto rank = _ctx.at(ofm_idx).shape().rank();
    assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
  
@@ -895,70 +857,23 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
      // WHCN -> CWHN
      pv = arm_compute::PermutationVector{2, 0, 1};
  
-    auto l = std::make_unique<::arm_compute::CLPermute>();
-
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::CLPermute>(ifm_tensor->handle(),
+                                                           ofm_tensor->handle(), pv);
    }
    else if (permute_type == ir::operation::Permute::Type::NHWC_TO_NCHW && rank == 4)
    {
      // CWHN -> WHCN
      pv = arm_compute::PermutationVector{1, 2, 0};
  
-    auto l = std::make_unique<::arm_compute::CLPermute>();
-
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<::arm_compute::CLPermute>(ifm_tensor->handle(),
+                                                             ofm_tensor->handle(), pv);
    }
    else
    {
-    auto l = std::make_unique<::arm_compute::CLCopy>();
-
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::CLCopy>(ifm_tensor->handle(), ofm_tensor->handle());
    }
  
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::RSQRT &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLRsqrtLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
-  _return_fn = asAclClFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<arm_compute::CLActivationLayer>();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
@@ -967,58 +882,32 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
  
    const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
  
-  auto fn = std::make_unique<::arm_compute::CLScale>();
+  auto fn = acl_common::generateLayer<arm_compute::CLScale>(
+      ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::InterpolationPolicy::BILINEAR,
+      ::arm_compute::BorderMode::REPLICATE, ::arm_compute::PixelValue(0.f),
+      ::arm_compute::SamplingPolicy::TOP_LEFT);
  
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(),
-                ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
-                ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU1 &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
-
-  auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
-void KernelGenerator::visit(const ir::operation::ReLU6 &node)
+void KernelGenerator::visit(const ir::operation::ResizeNearestNeighbor &node)
  {
    const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
  
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f};
+  const auto ifm_index{node.getInputs().at(ir::operation::ResizeNearestNeighbor::Input::INPUT)};
  
-  auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
  
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
+  auto fn = acl_common::generateLayer<arm_compute::CLScale>(
+      ifm_tensor->handle(), ofm_tensor->handle(),
+      ::arm_compute::InterpolationPolicy::NEAREST_NEIGHBOR, ::arm_compute::BorderMode::REPLICATE,
+      ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
  
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::RNN &node)
@@ -1036,43 +925,25 @@ void KernelGenerator::visit(const ir::operation::RNN &node)
  
    const auto activation = node.param().activation;
  
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto hidden_state_out_tensor = _tensor_builder->at(hidden_state_out_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto hidden_state_out_tensor = _tensor_reg->getAclTensor(hidden_state_out_index).get();
  
-  auto input_tensor = _tensor_builder->at(input_index).get();
-  auto weights_tensor = _tensor_builder->at(weights_index).get();
-  auto recurrent_weights_tensor = _tensor_builder->at(recurrent_weights_index).get();
-  auto bias_tensor = _tensor_builder->at(bias_index).get();
-  auto hidden_state_in_tensor = _tensor_builder->at(hidden_state_in_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+  auto weights_tensor = _tensor_reg->getAclTensor(weights_index).get();
+  auto recurrent_weights_tensor = _tensor_reg->getAclTensor(recurrent_weights_index).get();
+  auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
+  auto hidden_state_in_tensor = _tensor_reg->getAclTensor(hidden_state_in_index).get();
    auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
  
-  auto copy_layer = std::make_unique<::arm_compute::CLCopy>();
-  copy_layer->configure(hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
-  _return_fn = asAclClFunction(std::move(copy_layer));
+  auto copy_layer = acl_common::generateLayer<arm_compute::CLCopy>(
+      hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
+  _return_fn = asAclFunction(std::move(copy_layer));
  
-  auto fn = std::make_unique<::arm_compute::CLRNNLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-  fn->configure(input_tensor->handle(), weights_tensor->handle(),
-                recurrent_weights_tensor->handle(), bias_tensor->handle(),
-                hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
-  _return_fn = asAclClFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Floor &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLFloor>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  auto fn = acl_common::generateLayer<arm_compute::CLRNNLayer>(
+      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+      weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(),
+      hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
@@ -1083,24 +954,19 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
        node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
    const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto block_size_tensor = _tensor_builder->at(block_size_index).get();
-  auto paddings_tensor = _tensor_builder->at(paddings_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
+  auto paddings_tensor = _tensor_reg->getAclTensor(paddings_index).get();
  
    assert(_ctx.at(block_size_index).data());
    assert(_ctx.at(paddings_index).data());
  
-  std::unique_ptr<::arm_compute::IFunction> fn;
-
-  auto l = std::make_unique<::arm_compute::CLSpaceToBatchLayer>();
-  l->configure(ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
-               ofm_tensor->handle());
-  fn = std::move(l);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLSpaceToBatchLayer>(
+      ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
+      ofm_tensor->handle());
  
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
@@ -1110,29 +976,13 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
  
    auto block_size = node.param().block_size;
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLSpaceToDepthLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), block_size);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
  
-void KernelGenerator::visit(const ir::operation::L2Pool2D &node)
-{
-  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
-      node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::L2);
+  auto fn = acl_common::generateLayer<arm_compute::CLSpaceToDepthLayer>(
+      ifm_tensor->handle(), ofm_tensor->handle(), block_size);
  
-  const auto ofm_index{node.getOutputs().at(0)};
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  const auto activation = node.param().activation;
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclClFunction(std::move(raw_fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
@@ -1141,17 +991,14 @@ void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
    const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
    const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
  
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto lookups_tensor = _tensor_builder->at(lookups_index).get();
-  auto values_tensor = _tensor_builder->at(values_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLEmbeddingLookup>();
-
-  fn->configure(values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
+  auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
  
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLEmbeddingLookup>(
+      values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
  
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::L2Normalization &node)
@@ -1173,19 +1020,16 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
    float beta = 0.5f;                             // pow(reduction, -0.5) = 1 / sqrt(reduction)
    float bias = 0.0f;                             // Don't offset the reduction.
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
  
    const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
                                                                 radius, alpha, beta, bias, false);
  
-  auto fn = std::make_unique<::arm_compute::CLNormalizationLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLNormalizationLayer>(
+      ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
  
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
@@ -1197,21 +1041,18 @@ void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
    const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
    const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
  
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto hits_tensor = _tensor_builder->at(hits_index).get();
-
-  auto lookups_tensor = _tensor_builder->at(lookups_index).get();
-  auto keys_tensor = _tensor_builder->at(keys_index).get();
-  auto values_tensor = _tensor_builder->at(values_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLHashtableLookup>();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto hits_tensor = _tensor_reg->getAclTensor(hits_index).get();
  
-  fn->configure(lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
-                output_tensor->handle(), hits_tensor->handle());
+  auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
+  auto keys_tensor = _tensor_reg->getAclTensor(keys_index).get();
+  auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
  
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLHashtableLookup>(
+      lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
+      output_tensor->handle(), hits_tensor->handle());
  
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::PReLU &node)
@@ -1220,17 +1061,14 @@ void KernelGenerator::visit(const ir::operation::PReLU &node)
    const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
    const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto alpha_tensor = _tensor_builder->at(alpha_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLPReluLayer>();
-
-  fn->configure(ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index).get();
  
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLPReluLayer>(
+      ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
  
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::TransposeConv &node)
@@ -1258,77 +1096,18 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
      invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
    }
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto ker_tensor = _tensor_builder->at(ker_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
  
    const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
  
-  auto fn = std::make_unique<::arm_compute::CLTransposeConvLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
-  fn->configure(ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(),
-                tconv_info, invalid_horizontal, invalid_vertical);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::SQRT &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
-
-  auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalOr &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
-  const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input0_tensor = _tensor_builder->at(input0_index).get();
-  auto input1_tensor = _tensor_builder->at(input1_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLBitwiseOr>();
-
-  fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalNot &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLBitwiseNot>();
+  auto fn = acl_common::generateLayer<arm_compute::CLTransposeConvLayer>(
+      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
+      ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info, invalid_horizontal,
+      invalid_vertical);
  
-  fn->configure(input_tensor->handle(), output_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
@@ -1337,17 +1116,14 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
    const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
    const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
  
-  auto fn = std::make_unique<::arm_compute::CLElementwiseSquaredDiff>();
+  auto fn = acl_common::generateLayer<arm_compute::CLElementwiseSquaredDiff>(
+      lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
  
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::TopKV2 &node)
@@ -1364,17 +1140,14 @@ void KernelGenerator::visit(const ir::operation::TopKV2 &node)
  
    const auto k = node.param().k;
  
-  auto values_tensor = _tensor_builder->at(outputValues_index).get();
-  auto indices_tensor = _tensor_builder->at(outputIndices_index).get();
-  auto input_tensor = _tensor_builder->at(inputData_index).get();
+  auto values_tensor = _tensor_reg->getAclTensor(outputValues_index).get();
+  auto indices_tensor = _tensor_reg->getAclTensor(outputIndices_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(inputData_index).get();
  
-  auto fn = std::make_unique<::arm_compute::CLTopKV2>();
+  auto fn = acl_common::generateLayer<arm_compute::CLTopKV2>(
+      input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle());
  
-  fn->configure(input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::Gather &node)
@@ -1389,9 +1162,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
    const auto axis_value = (axis_raw < 0 ? (ifm_rank + axis_raw) : axis_raw);
    const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto indices_tensor = _tensor_builder->at(indices_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto indices_tensor = _tensor_reg->getAclTensor(indices_index).get();
  
    // NOTE The frontend layout and backend layout must be the same for this operation.
    //      If not the same, we have to add a stage(?) to perform permutation of output tensor. It
@@ -1407,8 +1180,6 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
    assert(backend_layout == indices_tensor->layout());
    assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
  
-  auto fn = std::make_unique<::arm_compute::CLGatherEx>();
-
    // input is n-D, indices k-D, output is (n + k - 1)-D
    size_t n = ifm_rank;
    assert(n == ifm_tensor->num_dimensions());
@@ -1433,52 +1204,14 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
          acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false));
    }
  
-  fn->configure(ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
+  auto fn = acl_common::generateLayer<arm_compute::CLGatherEx>(
+      ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
  
    // Revert disabling applied dim_correction
    ifm_tensor->info()->set_tensor_shape(orig_ifm_acl_tensor_shape);
    indices_tensor->info()->set_tensor_shape(orig_indice_acl_tensor_shape);
  
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Neg &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLNeg>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Abs &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
-
-  auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::ArgMax &node)
@@ -1491,8 +1224,8 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
  
    assert((ifm_shape.rank() - 1) == ofm_shape.rank());
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
    const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
    auto frontend_layout = _current_op_seq_layout;
    auto backend_layout = ifm_tensor->layout();
@@ -1506,31 +1239,11 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
    auto acl_axis =
        acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
  
-  auto fn = std::make_unique<::arm_compute::CLArgMinMaxLayer>();
+  auto fn = acl_common::generateLayer<arm_compute::CLArgMinMaxLayer>(
+      ifm_tensor->handle(), acl_axis, ofm_tensor->handle(),
+      ::arm_compute::ReductionOperation::ARG_IDX_MAX);
  
-  fn->configure(ifm_tensor->handle(), acl_axis, ofm_tensor->handle(),
-                ::arm_compute::ReductionOperation::ARG_IDX_MAX);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Dequantize &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLDequantizationLayer>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node)
@@ -1544,19 +1257,16 @@ void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &nod
    auto beta = node.param().beta;
    auto bias = node.param().bias;
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
  
    const auto norm_info = ::arm_compute::NormalizationLayerInfo(
        ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
  
-  auto fn = std::make_unique<::arm_compute::CLNormalizationLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
-
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLNormalizationLayer>(
+      ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
  
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
@@ -1567,16 +1277,13 @@ void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
    auto block_size = node.param().block_size;
    assert(block_size > 0);
  
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLDepthToSpaceLayer>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), block_size);
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
  
-  auto acl_fn = asAclClFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::CLDepthToSpaceLayer>(
+      input_tensor->handle(), output_tensor->handle(), block_size);
  
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::Split &node)
@@ -1590,10 +1297,10 @@ void KernelGenerator::visit(const ir::operation::Split &node)
    for (const auto &output : node.getOutputs())
      output_indexes.emplace_back(output);
  
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
    std::vector<arm_compute::ICLTensor *> output_tensors;
    for (const auto &ofm_ind : output_indexes)
-    output_tensors.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
+    output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind).get()->handle());
  
    const auto frontend_layout = _current_op_seq_layout;
    const auto backend_layout = ifm_tensor->layout();
@@ -1602,11 +1309,10 @@ void KernelGenerator::visit(const ir::operation::Split &node)
      axis += ifm_rank;
    axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value();
  
-  auto fn = std::make_unique<::arm_compute::CLSplit>();
-
-  fn->configure(ifm_tensor->handle(), output_tensors, axis);
+  auto fn =
+      acl_common::generateLayer<arm_compute::CLSplit>(ifm_tensor->handle(), output_tensors, axis);
  
-  _return_fn = asAclClFunction(std::move(fn));
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::Unpack &node)
@@ -1620,13 +1326,13 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
    for (const auto &output_index : node.getOutputs())
      output_indexes.emplace_back(output_index);
  
-  auto input = _tensor_builder->at(input_index).get()->handle();
+  auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
    std::vector<arm_compute::ICLTensor *> outputs;
    for (const auto &output_index : output_indexes)
-    outputs.emplace_back(_tensor_builder->at(output_index)->handle());
+    outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
  
    const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
+  const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
    if (axis < 0)
      axis += input_rank;
    axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value();
@@ -1636,7 +1342,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
    for (const auto &output_index : output_indexes)
    {
      size_t output_rank = _ctx.at(output_index).shape().rank();
-    const auto &output_tensor = _tensor_builder->at(output_index);
+    const auto &output_tensor = _tensor_reg->getAclTensor(output_index);
      orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape());
      assert(output_rank == output_tensor->num_dimensions());
      if (output_rank != output_tensor->info()->num_dimensions())
@@ -1647,11 +1353,9 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
      }
    }
  
-  auto fn = std::make_unique<::arm_compute::CLUnstack>();
-
-  fn->configure(input, outputs, axis);
+  auto fn = acl_common::generateLayer<arm_compute::CLUnstack>(input, outputs, axis);
  
-  _return_fn = asAclClFunction(std::move(fn));
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::Pad &node)
@@ -1669,11 +1373,11 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
    auto quant_info = ::arm_compute::QuantizationInfo(input_type.scale(), input_type.offset());
    const auto pixel_value = ::arm_compute::PixelValue(0, data_type, quant_info);
  
-  auto input = _tensor_builder->at(input_index).get()->handle();
-  auto output = _tensor_builder->at(output_index).get()->handle();
+  auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
+  auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
  
    const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
+  const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
  
    ::arm_compute::PaddingList padding_list;
    padding_list.resize(rank);
@@ -1685,11 +1389,10 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
          acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
      padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
    }
-  auto fn = std::make_unique<::arm_compute::CLPadLayer>();
  
    // Disable applied dim_correction
    size_t input_rank = _ctx.at(input_index).shape().rank();
-  const auto &input_tensor = _tensor_builder->at(input_index);
+  const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
    assert(input_rank == input_tensor->num_dimensions());
    if (input_rank != input_tensor->info()->num_dimensions())
    {
@@ -1698,50 +1401,13 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
          _ctx.at(input_index).shape(), frontend_layout, backend_layout, false));
    }
  
-  fn->configure(input, output, padding_list, pixel_value);
+  auto fn =
+      acl_common::generateLayer<arm_compute::CLPadLayer>(input, output, padding_list, pixel_value);
  
    // Do not revert disabling applied dim_correction CLPadKernel has cl kernel for 4-dimension
    // It would produce a mistach of result
  
-  _return_fn = asAclClFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Min &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLElementwiseMin>();
-
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Max &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLElementwiseMax>();
-
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::ConvertFp32ToFp16 &node)
@@ -1749,17 +1415,13 @@ void KernelGenerator::visit(const ir::operation::ConvertFp32ToFp16 &node)
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp32ToFp16::Input::INPUT)};
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLDepthConvertLayer>();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
  
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE,
-                0);
+  auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>(
+      ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
  
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::ConvertFp16ToFp32 &node)
@@ -1767,17 +1429,13 @@ void KernelGenerator::visit(const ir::operation::ConvertFp16ToFp32 &node)
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp16ToFp32::Input::INPUT)};
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  auto fn = std::make_unique<::arm_compute::CLDepthConvertLayer>();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
  
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE,
-                0);
+  auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>(
+      ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
  
-  auto acl_fn = asAclClFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  } // namespace acl_cl
diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.h b/runtime/onert/backend/acl_cl/KernelGenerator.h

index 1e3b064..d188d6d 100644 (file)
--- a/runtime/onert/backend/acl_cl/KernelGenerator.h
+++ b/runtime/onert/backend/acl_cl/KernelGenerator.h
@@ -21,6 +21,8 @@
  
  #include "ir/Operands.h"
  #include "TensorBuilder.h"
+#include "AclTensorRegistry.h"
+#include "TensorManager.h"
  
  namespace onert
  {
@@ -33,70 +35,52 @@ class KernelGenerator : public IKernelGenerator
  {
  public:
    KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
-                  const std::shared_ptr<TensorBuilder> &tensor_builder);
+                  const std::shared_ptr<TensorBuilder> &tensor_builder,
+                  const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg);
  
    void visit(const ir::OpSequence &) override;
    void visit(const ir::operation::BatchToSpaceND &) override;
+  void visit(const ir::operation::BinaryArithmetic &) override;
    void visit(const ir::operation::Conv2D &) override;
    void visit(const ir::operation::DepthwiseConv2D &) override;
-  void visit(const ir::operation::MaxPool2D &) override;
-  void visit(const ir::operation::AvgPool2D &) override;
    void visit(const ir::operation::Concat &) override;
    void visit(const ir::operation::FullyConnected &) override;
-  void visit(const ir::operation::Mul &) override;
    void visit(const ir::operation::Reduce &) override;
    void visit(const ir::operation::Reshape &) override;
    void visit(const ir::operation::Squeeze &) override;
-  void visit(const ir::operation::Tanh &) override;
    void visit(const ir::operation::Softmax &) override;
    void visit(const ir::operation::Slice &) override;
    void visit(const ir::operation::StridedSlice &) override;
    void visit(const ir::operation::Transpose &) override;
-  void visit(const ir::operation::Add &) override;
-  void visit(const ir::operation::Sub &) override;
-  void visit(const ir::operation::Cast &) override;
-  void visit(const ir::operation::Div &) override;
-  void visit(const ir::operation::Exp &) override;
+  void visit(const ir::operation::ElementwiseActivation &) override;
+  void visit(const ir::operation::ElementwiseBinary &) override;
+  void visit(const ir::operation::ElementwiseUnary &) override;
    void visit(const ir::operation::ExpandDims &) override;
    void visit(const ir::operation::InstanceNorm &) override;
-  void visit(const ir::operation::Logistic &) override;
    void visit(const ir::operation::Comparison &) override;
-  void visit(const ir::operation::LogicalAnd &) override;
    void visit(const ir::operation::LSTM &) override;
    void visit(const ir::operation::Pack &) override;
+  void visit(const ir::operation::Pool2D &) override;
    void visit(const ir::operation::Permute &) override;
-  void visit(const ir::operation::RSQRT &) override;
-  void visit(const ir::operation::ReLU &) override;
    void visit(const ir::operation::ResizeBilinear &) override;
-  void visit(const ir::operation::ReLU1 &) override;
-  void visit(const ir::operation::ReLU6 &) override;
+  void visit(const ir::operation::ResizeNearestNeighbor &) override;
    void visit(const ir::operation::RNN &) override;
-  void visit(const ir::operation::Floor &) override;
    void visit(const ir::operation::SpaceToBatchND &) override;
    void visit(const ir::operation::SpaceToDepth &) override;
-  void visit(const ir::operation::L2Pool2D &) override;
    void visit(const ir::operation::EmbeddingLookup &) override;
    void visit(const ir::operation::L2Normalization &) override;
    void visit(const ir::operation::HashtableLookup &) override;
    void visit(const ir::operation::PReLU &) override;
    void visit(const ir::operation::TransposeConv &) override;
-  void visit(const ir::operation::SQRT &) override;
-  void visit(const ir::operation::LogicalOr &) override;
-  void visit(const ir::operation::LogicalNot &) override;
    void visit(const ir::operation::SquaredDifference &) override;
    void visit(const ir::operation::TopKV2 &) override;
    void visit(const ir::operation::Gather &) override;
-  void visit(const ir::operation::Neg &) override;
-  void visit(const ir::operation::Abs &) override;
    void visit(const ir::operation::ArgMax &) override;
-  void visit(const ir::operation::Dequantize &) override;
    void visit(const ir::operation::LocalResponseNormalization &) override;
    void visit(const ir::operation::DepthToSpace &) override;
    void visit(const ir::operation::Split &) override;
    void visit(const ir::operation::Unpack &) override;
    void visit(const ir::operation::Pad &) override;
-  void visit(const ir::operation::Min &) override;
-  void visit(const ir::operation::Max &) override;
    void visit(const ir::operation::ConvertFp32ToFp16 &) override;
    void visit(const ir::operation::ConvertFp16ToFp32 &) override;
  
@@ -104,6 +88,7 @@ private:
    const ir::Operands &_ctx;
    const ir::Operations &_operations_ctx;
    std::shared_ptr<TensorBuilder> _tensor_builder;
+  std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg;
    ir::Layout _current_op_seq_layout;
  };
  
diff --git a/runtime/onert/backend/acl_cl/Optimizer.cc b/runtime/onert/backend/acl_cl/Optimizer.cc

index 6ba3143..9134d3f 100644 (file)
--- a/runtime/onert/backend/acl_cl/Optimizer.cc
+++ b/runtime/onert/backend/acl_cl/Optimizer.cc
@@ -19,7 +19,7 @@
  #include "ParentInfo.h"
  
  #include <cassert>
-#include <ir/LoweredGraph.h>
+#include <compiler/LoweredGraph.h>
  #include <util/logging.h>
  #include "AclSubTensorAnalyzer.h"
  
diff --git a/runtime/onert/backend/acl_cl/TensorManager.h b/runtime/onert/backend/acl_cl/TensorManager.h

index bdbd036..ab295db 100644 (file)
--- a/runtime/onert/backend/acl_cl/TensorManager.h
+++ b/runtime/onert/backend/acl_cl/TensorManager.h
@@ -56,7 +56,7 @@ using InternalBufferManager = acl_common::AclInternalBufferManager<
  using TensorManager =
      acl_common::AclTensorManager<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>;
  
-TensorManager *createTensorManager(bool is_linear_executor)
+inline TensorManager *createTensorManager(bool is_linear_executor)
  {
    if (is_linear_executor)
    {
diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.cc b/runtime/onert/backend/acl_common/AclConstantInitializer.cc

new file mode 100644 (file)

index 0000000..6ad5b7b
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclConstantInitializer.cc
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AclConstantInitializer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+AclConstantInitializer::AclConstantInitializer(const ir::Operands &operands,
+                                               const std::shared_ptr<ITensorRegistry> &tensor_reg)
+    : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
+{
+  // DO NOTHING
+}
+
+void AclConstantInitializer::copyInputInitialize(const ir::Operation &node, uint32_t index)
+{
+  assert(node.getInputs().size() > index);
+
+  const auto &input_index = node.getInputs().at(index);
+  const auto &input_obj = _operands.at(input_index);
+  registerCopyInitializer(input_index, input_obj);
+}
+
+void AclConstantInitializer::permuteInputInitialize(const ir::Operation &node, uint32_t index)
+{
+  assert(node.getInputs().size() > index);
+
+  const auto &input_index = node.getInputs().at(index);
+  const auto &input_obj = _operands.at(input_index);
+  registerPermuteInitializer(input_index, input_obj);
+}
+
+void AclConstantInitializer::visit(const ir::operation::BatchToSpaceND &node)
+{
+  const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE);
+  const auto &block_size_obj = _operands.at(block_size_index);
+
+  if (block_size_obj.isConstant())
+  {
+    _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
+      assert(model_obj.data());
+      const auto &shape = model_obj.shape();
+      const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
+      assert(model_obj.shape().rank() == 1);
+      obj.access([&](ITensor &tensor) {
+        for (size_t i = 0; i < shape.num_elements(); ++i)
+        {
+          const int32_t value = base[shape.num_elements() - i - 1];
+          int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
+                                                      tensor.calcOffset({static_cast<int32_t>(i)}));
+          *into = value;
+        }
+      });
+    };
+  }
+}
+
+void AclConstantInitializer::visit(const ir::operation::Conv2D &node)
+{
+  permuteInputInitialize(node, ir::operation::Conv2D::KERNEL);
+  copyInputInitialize(node, ir::operation::Conv2D::BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
+{
+  permuteInputInitialize(node, ir::operation::DepthwiseConv2D::KERNEL);
+  copyInputInitialize(node, ir::operation::DepthwiseConv2D::BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::FullyConnected &node)
+{
+  copyInputInitialize(node, ir::operation::FullyConnected::WEIGHT);
+  copyInputInitialize(node, ir::operation::FullyConnected::BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::LSTM &node)
+{
+  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::INPUT_GATE_BIAS);
+  copyInputInitialize(node, ir::operation::LSTM::FORGET_GATE_BIAS);
+  copyInputInitialize(node, ir::operation::LSTM::OUTPUT_GATE_BIAS);
+  copyInputInitialize(node, ir::operation::LSTM::PROJECTION_WEIGHTS);
+  copyInputInitialize(node, ir::operation::LSTM::PROJECTION_BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::RNN &node)
+{
+  copyInputInitialize(node, ir::operation::RNN::WEIGHTS);
+  copyInputInitialize(node, ir::operation::RNN::RECURRENT_WEIGHTS);
+  copyInputInitialize(node, ir::operation::RNN::BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::TransposeConv &node)
+{
+  permuteInputInitialize(node, ir::operation::TransposeConv::KERNEL);
+}
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.h b/runtime/onert/backend/acl_common/AclConstantInitializer.h

new file mode 100644 (file)

index 0000000..52f4c54
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclConstantInitializer.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
+#define __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
+
+#include <backend/IConstantInitializer.h>
+#include <ir/Operands.h>
+#include "AclTensorRegistry.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+class AclConstantInitializer : public IConstantInitializer
+{
+public:
+  AclConstantInitializer(const ir::Operands &operands,
+                         const std::shared_ptr<ITensorRegistry> &tensor_reg);
+
+public:
+  void visit(const ir::operation::BatchToSpaceND &) override;
+  void visit(const ir::operation::Conv2D &) override;
+  void visit(const ir::operation::DepthwiseConv2D &) override;
+  void visit(const ir::operation::FullyConnected &) override;
+  void visit(const ir::operation::LSTM &) override;
+  void visit(const ir::operation::RNN &) override;
+  void visit(const ir::operation::TransposeConv &) override;
+
+protected:
+  void copyInputInitialize(const ir::Operation &node, uint32_t index);
+  void permuteInputInitialize(const ir::Operation &node, uint32_t index);
+
+private:
+  std::shared_ptr<ITensorRegistry> tensor_registry() const final { return _tensor_reg; }
+
+protected:
+  std::shared_ptr<ITensorRegistry> _tensor_reg;
+};
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/acl_common/AclFunction.h b/runtime/onert/backend/acl_common/AclFunction.h

index 85b18e8..94b6586 100644 (file)
--- a/runtime/onert/backend/acl_common/AclFunction.h
+++ b/runtime/onert/backend/acl_common/AclFunction.h
@@ -47,12 +47,6 @@ private:
    std::unique_ptr<::arm_compute::IFunction> _func;
  };
  
-class AclClFunction : public AclFunction
-{
-public:
-  using AclFunction::AclFunction;
-};
-
  } // namespace acl_common
  } // namespace backend
  } // namespace onert
diff --git a/runtime/onert/backend/acl_common/AclKernelGen.h b/runtime/onert/backend/acl_common/AclKernelGen.h

index 9f7ce37..372ce68 100644 (file)
--- a/runtime/onert/backend/acl_common/AclKernelGen.h
+++ b/runtime/onert/backend/acl_common/AclKernelGen.h
@@ -30,11 +30,32 @@ namespace backend
  namespace acl_common
  {
  
+template <typename Layer, typename... Args>
+std::unique_ptr<arm_compute::IFunction> generateLayer(Args &&... args)
+{
+  auto l = std::make_unique<Layer>();
+
+  l->configure(std::forward<Args>(args)...);
+
+  return l;
+}
+
+template <typename Layer, typename... Args>
+std::unique_ptr<arm_compute::IFunction>
+generateLayer(std::shared_ptr<arm_compute::IMemoryManager> memory_manager, Args &&... args)
+{
+  auto l = std::make_unique<Layer>(memory_manager);
+
+  l->configure(std::forward<Args>(args)...);
+
+  return l;
+}
+
  template <typename T_FunctionWrapper, typename T_Tensor, typename T_ACLLayer,
-          typename T_TensorBuilder>
-std::unique_ptr<exec::IFunction>
-kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands,
-              const std::shared_ptr<T_TensorBuilder> &tensor_builder)
+          typename T_TensorRegistry>
+std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node,
+                                               const ir::Operands &operands,
+                                               const std::shared_ptr<T_TensorRegistry> &tensor_reg)
  {
    // TODO Support dynamic rnn
    // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
@@ -117,43 +138,44 @@ kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands,
    const auto projection_clip = projection_threshold;
    assert(cell_clip >= 0.f && projection_clip >= 0.f);
  
-  auto scratch_buffer_tensor = tensor_builder->at(scratch_buffer_index).get();
-  auto output_state_out_tensor = tensor_builder->at(output_state_out_index).get();
-  auto cell_state_out_tensor = tensor_builder->at(cell_state_out_index).get();
-  auto output_tensor = tensor_builder->at(output_index).get();
+  auto scratch_buffer_tensor = tensor_reg->getAclTensor(scratch_buffer_index).get();
+  auto output_state_out_tensor = tensor_reg->getAclTensor(output_state_out_index).get();
+  auto cell_state_out_tensor = tensor_reg->getAclTensor(cell_state_out_index).get();
+  auto output_tensor = tensor_reg->getAclTensor(output_index).get();
  
-  auto input_tensor = tensor_builder->at(input_index).get();
+  auto input_tensor = tensor_reg->getAclTensor(input_index).get();
  
-  auto input_to_forget_weights_tensor = tensor_builder->at(input_to_forget_weights_index).get();
-  auto input_to_cell_weights_tensor = tensor_builder->at(input_to_cell_weights_index).get();
-  auto input_to_output_weights_tensor = tensor_builder->at(input_to_output_weights_index).get();
+  auto input_to_forget_weights_tensor =
+      tensor_reg->getAclTensor(input_to_forget_weights_index).get();
+  auto input_to_cell_weights_tensor = tensor_reg->getAclTensor(input_to_cell_weights_index).get();
+  auto input_to_output_weights_tensor =
+      tensor_reg->getAclTensor(input_to_output_weights_index).get();
    auto recurrent_to_forget_weights_tensor =
-      tensor_builder->at(recurrent_to_forget_weights_index).get();
-  auto recurrent_to_cell_weights_tensor = tensor_builder->at(recurrent_to_cell_weights_index).get();
+      tensor_reg->getAclTensor(recurrent_to_forget_weights_index).get();
+  auto recurrent_to_cell_weights_tensor =
+      tensor_reg->getAclTensor(recurrent_to_cell_weights_index).get();
    auto recurrent_to_output_weights_tensor =
-      tensor_builder->at(recurrent_to_output_weights_index).get();
+      tensor_reg->getAclTensor(recurrent_to_output_weights_index).get();
  
-  auto forget_gate_bias_tensor = tensor_builder->at(forget_gate_bias_index).get();
-  auto cell_bias_tensor = tensor_builder->at(cell_bias_index).get();
-  auto output_gate_bias_tensor = tensor_builder->at(output_gate_bias_index).get();
-  auto output_state_in_tensor = tensor_builder->at(output_state_in_index).get();
-  auto cell_state_in_tensor = tensor_builder->at(cell_state_in_index).get();
+  auto forget_gate_bias_tensor = tensor_reg->getAclTensor(forget_gate_bias_index).get();
+  auto cell_bias_tensor = tensor_reg->getAclTensor(cell_bias_index).get();
+  auto output_gate_bias_tensor = tensor_reg->getAclTensor(output_gate_bias_index).get();
+  auto output_state_in_tensor = tensor_reg->getAclTensor(output_state_in_index).get();
+  auto cell_state_in_tensor = tensor_reg->getAclTensor(cell_state_in_index).get();
  
-  auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
-
-  auto fn = std::make_unique<T_ACLLayer>();
+  auto act_info = asActivationLayerInfo(activation);
  
    ::arm_compute::LSTMParams<T_Tensor> lstm_params{};
    if (has_cifg_param)
    {
      auto input_to_input_weights_tensor =
-        tensor_builder->at(input_to_input_weights_index).get(); // optional
+        tensor_reg->getAclTensor(input_to_input_weights_index).get(); // optional
      auto recurrent_to_input_weights_tensor =
-        tensor_builder->at(recurrent_to_input_weights_index).get(); // optional
+        tensor_reg->getAclTensor(recurrent_to_input_weights_index).get(); // optional
      auto cell_to_input_weights_handle =
-        has_peephole_param ? tensor_builder->at(cell_to_input_weights_index).get()->handle()
+        has_peephole_param ? tensor_reg->getAclTensor(cell_to_input_weights_index).get()->handle()
                             : nullptr; // optional (non-cifg && peephole)
-    auto input_gate_bias_tensor = tensor_builder->at(input_gate_bias_index).get(); // optional
+    auto input_gate_bias_tensor = tensor_reg->getAclTensor(input_gate_bias_index).get(); // optional
      lstm_params.set_cifg_params(input_to_input_weights_tensor->handle(),
                                  recurrent_to_input_weights_tensor->handle(),
                                  cell_to_input_weights_handle, input_gate_bias_tensor->handle());
@@ -161,40 +183,42 @@ kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands,
    if (has_peephole_param)
    {
      auto cell_to_forget_weights_tensor =
-        tensor_builder->at(cell_to_forget_weights_index).get(); // optional
+        tensor_reg->getAclTensor(cell_to_forget_weights_index).get(); // optional
      auto cell_to_output_weights_tensor =
-        tensor_builder->at(cell_to_output_weights_index).get(); // optional
+        tensor_reg->getAclTensor(cell_to_output_weights_index).get(); // optional
      lstm_params.set_peephole_params(cell_to_forget_weights_tensor->handle(),
                                      cell_to_output_weights_tensor->handle());
    }
    if (has_projection_param)
    {
-    auto projection_weights_tensor = tensor_builder->at(projection_weights_index).get(); // optional
-    auto projection_bias_handle = has_projection_bias
-                                      ? tensor_builder->at(projection_bias_index).get()->handle()
-                                      : nullptr; // optional
+    auto projection_weights_tensor =
+        tensor_reg->getAclTensor(projection_weights_index).get(); // optional
+    auto projection_bias_handle =
+        has_projection_bias ? tensor_reg->getAclTensor(projection_bias_index).get()->handle()
+                            : nullptr; // optional
      lstm_params.set_projection_params(projection_weights_tensor->handle(), projection_bias_handle);
    }
  
-  fn->configure(input_tensor->handle(), input_to_forget_weights_tensor->handle(),
-                input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(),
-                recurrent_to_forget_weights_tensor->handle(),
-                recurrent_to_cell_weights_tensor->handle(),
-                recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(),
-                cell_bias_tensor->handle(), output_gate_bias_tensor->handle(),
-                output_state_in_tensor->handle(), cell_state_in_tensor->handle(),
-                scratch_buffer_tensor->handle(), output_state_out_tensor->handle(),
-                cell_state_out_tensor->handle(), output_tensor->handle(), lstm_params, act_info,
-                cell_clip, projection_clip);
+  auto fn = generateLayer<T_ACLLayer>(
+      input_tensor->handle(), input_to_forget_weights_tensor->handle(),
+      input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(),
+      recurrent_to_forget_weights_tensor->handle(), recurrent_to_cell_weights_tensor->handle(),
+      recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(),
+      cell_bias_tensor->handle(), output_gate_bias_tensor->handle(),
+      output_state_in_tensor->handle(), cell_state_in_tensor->handle(),
+      scratch_buffer_tensor->handle(), output_state_out_tensor->handle(),
+      cell_state_out_tensor->handle(), output_tensor->handle(), lstm_params, act_info, cell_clip,
+      projection_clip);
  
    return std::make_unique<T_FunctionWrapper>(std::move(fn));
  }
  
  template <typename T_FunctionWrapper, typename T_Tensor, typename T_ACLLayer,
-          typename T_TensorBuilder>
+          typename T_TensorBuilder, typename T_TensorRegistry>
  std::unique_ptr<exec::IFunction>
  kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Operands &operands,
-                        const std::shared_ptr<T_TensorBuilder> &tensor_builder, ir::Layout layout)
+                        const std::shared_ptr<T_TensorBuilder> &tensor_builder,
+                        const std::shared_ptr<T_TensorRegistry> &tensor_reg, ir::Layout layout)
  {
    using ir::operation::FullyConnected;
  
@@ -236,16 +260,13 @@ kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Ope
      reshape.dim(1) = input_size; /* W */
    }
  
-  auto output_tensor = tensor_builder->at(output_index).get();
-  const auto input_tensor = tensor_builder->at(input_index).get();
-  const auto weight_tensor = tensor_builder->at(weight_index).get();
-  const auto bias_tensor = tensor_builder->at(bias_index).get();
+  auto output_tensor = tensor_reg->getAclTensor(output_index).get();
+  const auto input_tensor = tensor_reg->getAclTensor(input_index).get();
+  const auto weight_tensor = tensor_reg->getAclTensor(weight_index).get();
+  const auto bias_tensor = tensor_reg->getAclTensor(bias_index).get();
    const auto frontend_layout = layout;
    const auto acl_layout = output_tensor->handle()->info()->data_layout();
  
-  auto fn =
-      std::make_unique<T_ACLLayer>(tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
    typename T_ACLLayer::KernelType kernel_type = T_ACLLayer::KernelType::GENERAL;
    if (operands.at(weight_index).isConstant())
    {
@@ -253,20 +274,18 @@ kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Ope
      assert(operands.at(weight_index).data());
    }
  
-  fn->configure(
-      input_tensor->handle(), weight_tensor->handle(), bias_tensor->handle(),
-      output_tensor->handle(), needs_reshape,
-      ::onert::backend::acl_common::asTensorShape(
-          reshape, frontend_layout, ::onert::backend::acl_common::asRuntimeLayout(acl_layout)),
-      kernel_type);
+  auto fn = generateLayer<T_ACLLayer>(
+      tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+      weight_tensor->handle(), bias_tensor->handle(), output_tensor->handle(), needs_reshape,
+      asTensorShape(reshape, frontend_layout, asRuntimeLayout(acl_layout)), kernel_type);
  
    return std::make_unique<T_FunctionWrapper>(std::move(fn));
  }
  
-template <typename T_ACLLayer, typename T_PoolOp, typename T_TensorBuilder>
+template <typename T_ACLLayer, typename T_PoolOp, typename T_AclTensorRegistry>
  std::unique_ptr<::arm_compute::IFunction>
  kernelGenPool2D(const T_PoolOp &node, const ir::Operands &operands,
-                const std::shared_ptr<T_TensorBuilder> &tensor_builder, ir::Layout layout,
+                const std::shared_ptr<T_AclTensorRegistry> &tensor_reg, ir::Layout layout,
                  ::arm_compute::PoolingType pooling_type)
  {
    const auto ofm_index{node.getOutputs().at(0)};
@@ -294,16 +313,14 @@ kernelGenPool2D(const T_PoolOp &node, const ir::Operands &operands,
    VERBOSE(Pool2DParam) << "PAD(L): " << padding.left << std::endl;
    VERBOSE(Pool2DParam) << "PAD(R): " << padding.right << std::endl;
  
-  auto ofm_tensor = tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = tensor_reg->getAclTensor(ifm_index).get();
  
    ::arm_compute::PoolingLayerInfo info{
        pooling_type, ::arm_compute::Size2D{kw, kh}, ifm_tensor->info()->data_layout(),
-      acl_common::asPadStrideInfo(padding, stride), true /* exclude_padding */};
-
-  auto fn = std::make_unique<T_ACLLayer>();
+      asPadStrideInfo(padding, stride), true /* exclude_padding */};
  
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), info);
+  auto fn = generateLayer<T_ACLLayer>(ifm_tensor->handle(), ofm_tensor->handle(), info);
  
    return fn;
  }
diff --git a/runtime/onert/backend/acl_common/AclTensorBuilder.h b/runtime/onert/backend/acl_common/AclTensorBuilder.h

index 6b03fdf..9145201 100644 (file)
--- a/runtime/onert/backend/acl_common/AclTensorBuilder.h
+++ b/runtime/onert/backend/acl_common/AclTensorBuilder.h
@@ -25,6 +25,7 @@
  #include "ir/OperandIndexMap.h"
  #include <ir/Operands.h>
  #include "AclTensorManager.h"
+#include "AclTensorRegistry.h"
  #include <memory>
  #include "ParentInfo.h"
  #include <util/Utils.h>
@@ -48,7 +49,8 @@ class AclTensorBuilder : public ITensorBuilder
  public:
    using T_AclTensorManager = AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>;
  
-  AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr);
+  AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr,
+                   const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg);
  
    /**
     * @brief     Register tensor information to allocate on ACL-CL backend
@@ -63,19 +65,13 @@ public:
    void notifyLastUse(const ir::OperandIndex &) override;
  
    bool isRegistered(const ir::OperandIndex &) const override;
-  std::shared_ptr<backend::ITensorRegistry> tensorRegistry() override { return nullptr; }
  
    void prepare(void) override;
    void allocate() override;
    void postFunctionPrepare() override;
  
-  std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) override;
-  void iterate(const IterateFunction &fn) override;
-
    std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) override;
  
-  std::shared_ptr<T_ITensor> at(const ir::OperandIndex &ind);
-
    T_AclTensorManager *acl_tensor_manager(void) { return _tensor_mgr.get(); }
  
    void setUsesCount(const ir::OperandIndex &index, size_t num_uses)
@@ -100,8 +96,6 @@ public:
     */
    bool isSubTensorOf(const ir::OperandIndex &parent, const ir::OperandIndex &child);
  
-  bool supportDynamicTensor() override { return false; }
-
  private:
    void buildTensors(void);
    ir::OperandIndex findRootParent(ir::OperandIndex index);
@@ -113,6 +107,7 @@ private:
    ir::OperandIndexMap<size_t> _uses_count_map;
  
    std::unique_ptr<T_AclTensorManager> _tensor_mgr;
+  std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> _tensor_reg;
  
    // for linear executor
    std::vector<std::pair<UsesType, ir::OperandIndex>> _lifetime_seq;
@@ -140,9 +135,10 @@ namespace acl_common
  {
  
  template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(const ir::Operands &operands,
-                                                                     T_AclTensorManager *tensor_mgr)
-    : _operands{operands}, _tensor_mgr{tensor_mgr}
+AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(
+    const ir::Operands &operands, T_AclTensorManager *tensor_mgr,
+    const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg)
+    : _operands{operands}, _tensor_mgr{tensor_mgr}, _tensor_reg{tensor_reg}
  {
    assert(_tensor_mgr);
  }
@@ -310,28 +306,6 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::postFunctionPrepare(voi
  }
  
  template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-std::shared_ptr<ITensor>
-AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::tensorAt(const ir::OperandIndex &ind)
-{
-  return _tensor_mgr->at(ind);
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::iterate(const IterateFunction &fn)
-{
-  _tensor_mgr->iterate(fn);
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-std::shared_ptr<T_ITensor>
-AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::at(const ir::OperandIndex &ind)
-{
-  auto ret = _tensor_mgr->at(ind);
-  assert(ret != nullptr);
-  return ret;
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
  std::unique_ptr<ITensorManager>
  AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::releaseStaticTensorManager(void)
  {
diff --git a/runtime/onert/backend/acl_common/AclTensorRegistry.h b/runtime/onert/backend/acl_common/AclTensorRegistry.h

new file mode 100644 (file)

index 0000000..1ef9f4b
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclTensorRegistry.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_COMMON_ACL_TENSOR_REGISTRY_H__
+#define __ONERT_BACKEND_ACL_COMMON_ACL_TENSOR_REGISTRY_H__
+
+#include "backend/ITensorRegistry.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+/**
+ * @brief Tensor registry class for acl backends
+ *
+ * This is implemented as a wrapper of AclTensorManager.
+ */
+template <typename T_AclTensorManager> class AclTensorRegistry : public ITensorRegistry
+{
+public:
+  AclTensorRegistry(T_AclTensorManager *tensor_mgr) : _tensor_mgr{tensor_mgr} {}
+
+  std::shared_ptr<ITensor> getITensor(const ir::OperandIndex &ind) override
+  {
+    return _tensor_mgr->at(ind);
+  }
+
+  std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &ind) override
+  {
+    return getITensor(ind);
+  }
+
+  auto getAclTensor(const ir::OperandIndex &ind) { return _tensor_mgr->at(ind); }
+
+private:
+  T_AclTensorManager *_tensor_mgr;
+};
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_COMMON_ACL_TENSOR_REGISTRY_H__
diff --git a/runtime/onert/backend/acl_common/Convert.cc b/runtime/onert/backend/acl_common/Convert.cc

index a5bbe16..67dcc81 100644 (file)
--- a/runtime/onert/backend/acl_common/Convert.cc
+++ b/runtime/onert/backend/acl_common/Convert.cc
@@ -18,6 +18,7 @@
  
  #include "Swizzle.h"
  #include "ir/DataType.h"
+#include "ir/operation/ElementwiseActivation.h"
  #include <memory>
  
  namespace
@@ -177,6 +178,50 @@ namespace acl_common
    }
  }
  
+::arm_compute::ActivationLayerInfo
+asActivationLayerInfo(const ir::operation::ElementwiseActivation::Type op_type, float alpha,
+                      float beta)
+{
+  switch (op_type)
+  {
+    case ir::operation::ElementwiseActivation::Type::RELU:
+      if (beta == 0.f)
+      {
+        if (alpha == ir::operation::ElementwiseActivation::infinity)
+        {
+          return ::arm_compute::ActivationLayerInfo{
+              ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
+        }
+        else
+        {
+          return ::arm_compute::ActivationLayerInfo{
+              ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, alpha};
+        }
+      }
+      else
+      {
+        return ::arm_compute::ActivationLayerInfo{
+            ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, alpha, beta};
+      }
+    case ir::operation::ElementwiseActivation::Type::TANH:
+      return ::arm_compute::ActivationLayerInfo{
+          ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, alpha, beta};
+    case ir::operation::ElementwiseActivation::Type::LOGISTIC:
+      // NOTE The sigmoid function is a special case of the Logistic function when L=1, k=1, x0=0.
+      // TODO In ACL and nnapi sepc, currently, Logistic's L always is 1, k always is 1, x0 always
+      // 0(always sigmoid) regardless of values of the parameter.
+      //      If ACL support non-sigmoid logistic, should fix param values.
+      return ::arm_compute::ActivationLayerInfo{
+          ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
+    case ir::operation::ElementwiseActivation::Type::LEAKY_RELU:
+      return ::arm_compute::ActivationLayerInfo{
+          ::arm_compute::ActivationLayerInfo::ActivationFunction::LEAKY_RELU, alpha};
+    default:
+      throw std::runtime_error{"Not supported, yet"};
+      break;
+  }
+}
+
  arm_compute::Coordinates asCoordinates(const ir::Operand &operand, int32_t rank,
                                         ir::Layout frontend_layout, ir::Layout backend_layout)
  {
@@ -223,11 +268,6 @@ std::unique_ptr<AclFunction> asAclFunction(std::unique_ptr<::arm_compute::IFunct
    return std::make_unique<AclFunction>(std::move(layer));
  }
  
-std::unique_ptr<AclClFunction> asAclClFunction(std::unique_ptr<::arm_compute::IFunction> &&layer)
-{
-  return std::make_unique<AclClFunction>(std::move(layer));
-}
-
  ir::Layout asRuntimeLayout(::arm_compute::DataLayout data_layout)
  {
    switch (data_layout)
@@ -265,6 +305,21 @@ ir::DataType asRuntimeDataType(::arm_compute::DataType data_type)
    }
  }
  
+arm_compute::PoolingType convertPoolType(ir::operation::Pool2D::PoolType pool_type_ir)
+{
+  switch (pool_type_ir)
+  {
+    case ir::operation::Pool2D::PoolType::AVG:
+      return arm_compute::PoolingType::AVG;
+    case ir::operation::Pool2D::PoolType::L2:
+      return arm_compute::PoolingType::L2;
+    case ir::operation::Pool2D::PoolType::MAX:
+      return arm_compute::PoolingType::MAX;
+    default:
+      throw std::runtime_error("convertPoolType: Not supported operation yet");
+  }
+}
+
  arm_compute::ReduceOperation convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
  {
    switch (reduce_type_ir)
diff --git a/runtime/onert/backend/acl_common/Convert.h b/runtime/onert/backend/acl_common/Convert.h

index 9362098..380321c 100644 (file)
--- a/runtime/onert/backend/acl_common/Convert.h
+++ b/runtime/onert/backend/acl_common/Convert.h
@@ -25,7 +25,9 @@
  #include "ir/Layout.h"
  #include "ir/InternalType.h"
  #include "ir/Operand.h"
+#include "ir/operation/Pool2D.h"
  #include "ir/operation/Reduce.h"
+#include "ir/operation/ElementwiseActivation.h"
  #include "ir/Shape.h"
  #include "ir/TypeInfo.h"
  #include "ir/Coordinates.h"
@@ -59,6 +61,9 @@ namespace acl_common
                                               const ir::Stride &stride);
  
  ::arm_compute::ActivationLayerInfo asActivationLayerInfo(ir::Activation act_code);
+::arm_compute::ActivationLayerInfo
+asActivationLayerInfo(const ir::operation::ElementwiseActivation::Type op_type, float alpha,
+                      float beta);
  
  arm_compute::Coordinates asCoordinates(const ir::Operand &operand, int32_t rank,
                                         ir::Layout frontend_layout, ir::Layout backend_layout);
@@ -67,7 +72,6 @@ std::set<uint32_t> asSet(const ir::Operand &operand, int32_t rank, ir::Layout fr
                           ir::Layout backend_layout);
  
  std::unique_ptr<AclFunction> asAclFunction(std::unique_ptr<::arm_compute::IFunction> &&layer);
-std::unique_ptr<AclClFunction> asAclClFunction(std::unique_ptr<::arm_compute::IFunction> &&layer);
  
  template <typename T_Function>
  std::unique_ptr<T_Function> asFunction(std::unique_ptr<::arm_compute::IFunction> &&fn)
@@ -78,6 +82,7 @@ std::unique_ptr<T_Function> asFunction(std::unique_ptr<::arm_compute::IFunction>
  ir::Layout asRuntimeLayout(::arm_compute::DataLayout data_layout);
  ir::DataType asRuntimeDataType(::arm_compute::DataType data_type);
  
+arm_compute::PoolingType convertPoolType(ir::operation::Pool2D::PoolType pool_type_ir);
  arm_compute::ReduceOperation convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir);
  
  } // namespace acl_common
diff --git a/runtime/onert/backend/acl_neon/Backend.h b/runtime/onert/backend/acl_neon/Backend.h

index a0b145e..35d6e4e 100644 (file)
--- a/runtime/onert/backend/acl_neon/Backend.h
+++ b/runtime/onert/backend/acl_neon/Backend.h
@@ -48,10 +48,13 @@ public:
      const auto &operands = graph.operands();
      const auto &operations = graph.operations();
      auto context = std::make_unique<BackendContext>(this, &graph);
-    auto tb = std::make_shared<TensorBuilder>(operands, createTensorManager(is_linear_executor));
+    auto tm = createTensorManager(is_linear_executor);
+    auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
+    auto tb = std::make_shared<TensorBuilder>(operands, tm, tr);
+    context->tensor_registry = tr;
      context->tensor_builder = tb;
-    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
-    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb);
+    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr);
      context->tensor_register = nullptr;
      context->optimizer = std::make_shared<Optimizer>(context.get());
      return context;
diff --git a/runtime/onert/backend/acl_neon/ConstantInitializer.cc b/runtime/onert/backend/acl_neon/ConstantInitializer.cc

index 4191b27..79edb9d 100644 (file)
--- a/runtime/onert/backend/acl_neon/ConstantInitializer.cc
+++ b/runtime/onert/backend/acl_neon/ConstantInitializer.cc
@@ -24,100 +24,12 @@ namespace acl_neon
  {
  
  ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
-                                         const std::shared_ptr<TensorBuilder> &tensor_builder)
-    : IConstantInitializer{operands}, _tensor_builder{tensor_builder}
+                                         const std::shared_ptr<ITensorRegistry> &tensor_reg)
+    : acl_common::AclConstantInitializer{operands, tensor_reg}
  {
    // DO NOTHING
  }
  
-void ConstantInitializer::copyInputInitialize(const ir::Operation &node, uint32_t index)
-{
-  assert(node.getInputs().size() > index);
-
-  const auto &input_index = node.getInputs().at(index);
-  const auto &input_obj = _operands.at(input_index);
-  registerCopyInitializer(input_index, input_obj);
-}
-
-void ConstantInitializer::permuteInputInitialize(const ir::Operation &node, uint32_t index)
-{
-  assert(node.getInputs().size() > index);
-
-  const auto &input_index = node.getInputs().at(index);
-  const auto &input_obj = _operands.at(input_index);
-  registerPermuteInitializer(input_index, input_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::BatchToSpaceND &node)
-{
-  const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE);
-  const auto &block_size_obj = _operands.at(block_size_index);
-
-  if (block_size_obj.isConstant())
-  {
-    _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
-      assert(model_obj.data());
-      const auto &shape = model_obj.shape();
-      const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
-      assert(model_obj.shape().rank() == 1);
-      obj.access([&](ITensor &tensor) {
-        for (size_t i = 0; i < shape.num_elements(); ++i)
-        {
-          const int32_t value = base[shape.num_elements() - i - 1];
-          int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
-                                                      tensor.calcOffset({static_cast<int32_t>(i)}));
-          *into = value;
-        }
-      });
-    };
-  }
-}
-
-void ConstantInitializer::visit(const ir::operation::Conv2D &node)
-{
-  permuteInputInitialize(node, ir::operation::Conv2D::KERNEL);
-  copyInputInitialize(node, ir::operation::Conv2D::BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
-{
-  permuteInputInitialize(node, ir::operation::DepthwiseConv2D::KERNEL);
-  copyInputInitialize(node, ir::operation::DepthwiseConv2D::BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
-{
-  copyInputInitialize(node, ir::operation::FullyConnected::WEIGHT);
-  copyInputInitialize(node, ir::operation::FullyConnected::BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::LSTM &node)
-{
-  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::INPUT_GATE_BIAS);
-  copyInputInitialize(node, ir::operation::LSTM::FORGET_GATE_BIAS);
-  copyInputInitialize(node, ir::operation::LSTM::OUTPUT_GATE_BIAS);
-  copyInputInitialize(node, ir::operation::LSTM::PROJECTION_WEIGHTS);
-  copyInputInitialize(node, ir::operation::LSTM::PROJECTION_BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::RNN &node)
-{
-  copyInputInitialize(node, ir::operation::RNN::WEIGHTS);
-  copyInputInitialize(node, ir::operation::RNN::RECURRENT_WEIGHTS);
-  copyInputInitialize(node, ir::operation::RNN::BIAS);
-}
-
  void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
  {
    const auto &block_size_index = node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE);
@@ -173,11 +85,6 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
    }
  }
  
-void ConstantInitializer::visit(const ir::operation::TransposeConv &node)
-{
-  permuteInputInitialize(node, ir::operation::TransposeConv::KERNEL);
-}
-
  } // namespace acl_neon
  } // namespace backend
  } // namespace onert
diff --git a/runtime/onert/backend/acl_neon/ConstantInitializer.h b/runtime/onert/backend/acl_neon/ConstantInitializer.h

index 6b4c1f1..c7d71cd 100644 (file)
--- a/runtime/onert/backend/acl_neon/ConstantInitializer.h
+++ b/runtime/onert/backend/acl_neon/ConstantInitializer.h
@@ -17,9 +17,7 @@
  #ifndef __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
  #define __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
  
-#include <backend/IConstantInitializer.h>
-#include <ir/Operands.h>
-#include "TensorBuilder.h"
+#include "AclConstantInitializer.h"
  
  namespace onert
  {
@@ -28,29 +26,15 @@ namespace backend
  namespace acl_neon
  {
  
-class ConstantInitializer : public IConstantInitializer
+class ConstantInitializer : public acl_common::AclConstantInitializer
  {
  public:
    ConstantInitializer(const ir::Operands &operands,
-                      const std::shared_ptr<TensorBuilder> &tensor_builder);
+                      const std::shared_ptr<ITensorRegistry> &tensor_reg);
  
  public:
-  void visit(const ir::operation::BatchToSpaceND &) override;
-  void visit(const ir::operation::Conv2D &) override;
-  void visit(const ir::operation::DepthwiseConv2D &) override;
-  void visit(const ir::operation::FullyConnected &) override;
-  void visit(const ir::operation::LSTM &) override;
-  void visit(const ir::operation::RNN &) override;
-  void visit(const ir::operation::SpaceToBatchND &) override;
-  void visit(const ir::operation::TransposeConv &) override;
-
-private:
-  std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
-  void copyInputInitialize(const ir::Operation &node, uint32_t index);
-  void permuteInputInitialize(const ir::Operation &node, uint32_t index);
-
-private:
-  std::shared_ptr<TensorBuilder> _tensor_builder;
+  using acl_common::AclConstantInitializer::visit;
+  void visit(const ir::operation::SpaceToBatchND &node) final;
  };
  
  } // namespace acl_neon
diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.cc b/runtime/onert/backend/acl_neon/KernelGenerator.cc

index 1195b83..6d53c12 100644 (file)
--- a/runtime/onert/backend/acl_neon/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_neon/KernelGenerator.cc
@@ -44,11 +44,12 @@ using ::onert::backend::acl_common::asAclFunction;
  using ActivationBuilder = ::onert::backend::acl_common::AclActivationBuilder<
      ::arm_compute::ITensor, ::arm_compute::NEActivationLayer, acl_common::AclFunction>;
  
-KernelGenerator::KernelGenerator(const ir::Operands &operands_ctx,
-                                 const ir::Operations &operations_ctx,
-                                 const std::shared_ptr<TensorBuilder> &tensor_builder)
+KernelGenerator::KernelGenerator(
+    const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+    const std::shared_ptr<TensorBuilder> &tensor_builder,
+    const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
      : _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder),
-      _current_op_seq_layout(ir::Layout::UNKNOWN)
+      _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN)
  {
    // DO NOTHING
  }
@@ -70,26 +71,6 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
    }
  }
  
-void KernelGenerator::visit(const ir::operation::Abs &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
-
-  auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
  void KernelGenerator::visit(const ir::operation::ArgMax &node)
  {
    const auto ofm_index{node.getOutputs().at(0)};
@@ -97,8 +78,8 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
  
    const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
    auto frontend_layout = _current_op_seq_layout;
    auto backend_layout = ifm_tensor->layout();
  
@@ -111,14 +92,11 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
    const auto fixed_axis =
        acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
  
-  auto fn = std::make_unique<::arm_compute::NEArgMinMaxLayer>();
-
-  fn->configure(ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(),
-                arm_compute::ReductionOperation::ARG_IDX_MAX);
+  auto fn = acl_common::generateLayer<arm_compute::NEArgMinMaxLayer>(
+      ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(),
+      arm_compute::ReductionOperation::ARG_IDX_MAX);
  
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
@@ -128,50 +106,67 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
    const auto block_size_index{
        node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto block_size_tensor = _tensor_builder->at(block_size_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
  
    assert(_ctx.at(block_size_index).data());
  
-  auto fn = std::make_unique<::arm_compute::NEBatchToSpaceLayer>();
-
-  fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
+  auto fn = acl_common::generateLayer<arm_compute::NEBatchToSpaceLayer>(
+      ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
  
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
-void KernelGenerator::visit(const ir::operation::Cast &node)
+void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
  {
    const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
+  const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
+  const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  std::unique_ptr<::arm_compute::IFunction> fn;
-  if (ifm_tensor->data_type() == ofm_tensor->data_type())
-  {
-    auto l = std::make_unique<::arm_compute::NECopy>();
+  const auto activation = node.param().activation;
  
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle());
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
  
-    fn = std::move(l);
-  }
-  else
+  std::unique_ptr<arm_compute::IFunction> fn;
+  switch (node.param().arithmetic_type)
    {
-    auto l = std::make_unique<::arm_compute::NECast>();
-
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
-
-    fn = std::move(l);
+    case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEArithmeticAddition>(
+          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+          arm_compute::ConvertPolicy::SATURATE);
+      break;
+    }
+    case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEArithmeticSubtraction>(
+          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+          arm_compute::ConvertPolicy::SATURATE);
+      break;
+    }
+    case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+    {
+      // RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO
+      fn = acl_common::generateLayer<arm_compute::NEPixelWiseMultiplication>(
+          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
+          arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
+      break;
+    }
+    case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEElementwiseDivision>(
+          lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+      break;
+    }
+    default:
+      assert(false && "The BinaryArithmetic operation supports only binary arithmetic operations");
+      break;
    }
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = std::make_unique<exec::FunctionSequence>(
+      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
  }
  
  void KernelGenerator::visit(const ir::operation::Conv2D &node)
@@ -195,20 +190,18 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
                                              ker_width, ker_height);
    const auto activation = node.param().activation;
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto ker_tensor = _tensor_builder->at(ker_index).get();
-  auto bias_tensor = _tensor_builder->at(bias_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
+  auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
  
    const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
    const auto act_info = acl_common::asActivationLayerInfo(activation);
  
-  auto fn = std::make_unique<::arm_compute::NEConvolutionLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
-  fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
-                ofm_tensor->handle(), conv_info, ::arm_compute::WeightsInfo(),
-                ::arm_compute::Size2D(1U, 1U), act_info);
+  auto fn = acl_common::generateLayer<arm_compute::NEConvolutionLayer>(
+      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
+      ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info,
+      ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
  
    _return_fn = asAclFunction(std::move(fn));
  }
@@ -221,16 +214,13 @@ void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
    auto block_size = node.param().block_size;
    assert(block_size > 0);
  
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
  
-  auto fn = std::make_unique<::arm_compute::NEDepthToSpaceLayer>();
+  auto fn = acl_common::generateLayer<arm_compute::NEDepthToSpaceLayer>(
+      input_tensor->handle(), output_tensor->handle(), block_size);
  
-  fn->configure(input_tensor->handle(), output_tensor->handle(), block_size);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
@@ -255,67 +245,23 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
    const auto multiplier = node.param().multiplier;
    const auto activation = node.param().activation;
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto ker_tensor = _tensor_builder->at(ker_index).get();
-  auto bias_tensor = _tensor_builder->at(bias_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
+  auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
  
    const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
    const auto act_info = acl_common::asActivationLayerInfo(activation);
  
    {
-    auto fn = std::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
-
-    fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
-                  ofm_tensor->handle(), conv_info, multiplier, act_info);
+    auto fn = acl_common::generateLayer<arm_compute::NEDepthwiseConvolutionLayer>(
+        ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(),
+        conv_info, multiplier, act_info);
  
      _return_fn = asAclFunction(std::move(fn));
    }
  }
  
-void KernelGenerator::visit(const ir::operation::Dequantize &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEDequantizationLayer>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle());
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
-{
-  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
-      node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::MAX);
-
-  const auto ofm_index{node.getOutputs().at(0)};
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  const auto activation = node.param().activation;
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(raw_fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
-{
-  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
-      node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::AVG);
-
-  const auto ofm_index{node.getOutputs().at(0)};
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  const auto activation = node.param().activation;
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(raw_fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
  void KernelGenerator::visit(const ir::operation::Concat &node)
  {
    const auto ofm_index{node.getOutputs().at(0)};
@@ -336,80 +282,223 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
      return;
    }
  
-  auto output_tensor = _tensor_builder->at(ofm_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(ofm_index).get();
    std::vector<::arm_compute::ITensor *> input_tensors;
    for (const auto &ifm_ind : input_indexes)
-    input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle());
+    input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle());
  
    std::unique_ptr<::arm_compute::IFunction> fn;
    if (input_indexes.size() < 2)
    {
-    auto l = std::make_unique<::arm_compute::NECopy>();
-    l->configure(input_tensors.at(0), output_tensor->handle());
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::NECopy>(input_tensors.at(0),
+                                                        output_tensor->handle());
    }
    else
    {
-    auto l = std::make_unique<::arm_compute::NEConcatenateLayer>();
      const auto rank = _ctx.at(ofm_index).shape().rank();
      const auto frontend_layout = _current_op_seq_layout;
      const auto backend_layout = output_tensor->layout();
      const auto fixed_axis =
          acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
-    l->configure(input_tensors, output_tensor->handle(), fixed_axis);
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::NEConcatenateLayer>(
+        input_tensors, output_tensor->handle(), fixed_axis);
    }
  
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
-void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
  {
-  const auto output_index{node.getOutputs().at(0)};
-  const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
-  const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
+
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+
+  const ::arm_compute::ActivationLayerInfo act_info = acl_common::asActivationLayerInfo(
+      node.param().op_type, node.param().alpha, node.param().beta);
  
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto lookups_tensor = _tensor_builder->at(lookups_index).get();
-  auto values_tensor = _tensor_builder->at(values_index).get();
+  std::unique_ptr<arm_compute::IFunction> fn;
+  if (node.param().op_type == ir::operation::ElementwiseActivation::Type::LOGISTIC)
+  {
+    // NOTE NEActivationLayer can generate produce erroneous results. it were caused by
+    // 'vexpq_f32()'.
+    // The neon function returns a value outside of the limit of representation in float as 'NaN'
+    // instead of 'INF', and then the result of this op will be errors due to the 'NaN'.
+    fn = acl_common::generateLayer<arm_compute::NEActivationLayerEx>(
+        ifm_tensor->handle(), ofm_tensor->handle(), act_info);
+  }
+  else
+  {
+    fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(ifm_tensor->handle(),
+                                                                   ofm_tensor->handle(), act_info);
+  }
  
-  auto fn = std::make_unique<::arm_compute::NEEmbeddingLookup>();
+  _return_fn = asAclFunction(std::move(fn));
+}
  
-  fn->configure(values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
+void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
+  const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
  
-  auto acl_fn = asAclFunction(std::move(fn));
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
  
-  _return_fn = std::move(acl_fn);
+  std::unique_ptr<arm_compute::IFunction> fn;
+  switch (node.param().op_type)
+  {
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
+    {
+      fn = acl_common::generateLayer<arm_compute::NELogicalAnd>(
+          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
+    {
+      fn = acl_common::generateLayer<arm_compute::NELogicalOr>(
+          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEElementwiseMax>(
+          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEElementwiseMin>(
+          lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+      break;
+    }
+    default:
+    {
+      std::string err_msg("acl_neon KernelGenerator : " + node.name() +
+                          "is not elementwise-binary operations");
+      assert(false && err_msg.c_str());
+      break;
+    }
+  }
+  _return_fn = asAclFunction(std::move(fn));
  }
  
-void KernelGenerator::visit(const ir::operation::Floor &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
  {
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
+
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+
+  std::unique_ptr<arm_compute::IFunction> fn;
+  switch (node.param().op_type)
+  {
+    case ir::operation::ElementwiseUnary::Type::ABS:
+    {
+      const ::arm_compute::ActivationLayerInfo act_info{
+          ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+      fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(
+          input_tensor->handle(), output_tensor->handle(), act_info);
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::CAST:
+    {
+      if (input_tensor->data_type() == output_tensor->data_type())
+      {
+        fn = acl_common::generateLayer<arm_compute::NECopy>(input_tensor->handle(),
+                                                            output_tensor->handle());
+      }
+      else
+      {
+        fn = acl_common::generateLayer<arm_compute::NECast>(
+            input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
+      }
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::DEQUANTIZE:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEDequantizationLayer>(input_tensor->handle(),
+                                                                         output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::EXP:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEExpLayer>(input_tensor->handle(),
+                                                              output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::FLOOR:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEFloor>(input_tensor->handle(),
+                                                           output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
+    {
+      fn = acl_common::generateLayer<arm_compute::NEBitwiseNot>(input_tensor->handle(),
+                                                                output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::NEG:
+    {
+      fn = acl_common::generateLayer<arm_compute::NENegLayer>(input_tensor->handle(),
+                                                              output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::RSQRT:
+    {
+      fn = acl_common::generateLayer<arm_compute::NERsqrtLayer>(input_tensor->handle(),
+                                                                output_tensor->handle());
+      break;
+    }
+    case ir::operation::ElementwiseUnary::Type::SQRT:
+    {
+      const ::arm_compute::ActivationLayerInfo act_info{
+          ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
  
-  auto fn = std::make_unique<::arm_compute::NEFloor>();
+      fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(
+          input_tensor->handle(), output_tensor->handle(), act_info);
+      break;
+    }
+    default:
+    {
+      throw std::runtime_error("acl_neon KernelGenerator : " + node.name() +
+                               "is not supported yet");
+      break;
+    }
+  }
+  _return_fn = asAclFunction(std::move(fn));
+}
  
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
+void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
+  const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
  
-  auto acl_fn = asAclFunction(std::move(fn));
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
+  auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
  
-  _return_fn = std::move(acl_fn);
+  auto fn = acl_common::generateLayer<arm_compute::NEEmbeddingLookup>(
+      values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
+
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::FullyConnected &node)
  {
    const auto output_index{node.getOutputs().at(0)};
-  auto output_tensor = _tensor_builder->at(output_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
    const auto activation = node.param().activation;
  
    auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ITensor,
                                                  ::arm_compute::NEFullyConnectedReshapingLayer>(
-      node, _ctx, _tensor_builder, _current_op_seq_layout);
+      node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout);
    _return_fn = std::make_unique<exec::FunctionSequence>(
        std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
  }
@@ -423,21 +512,18 @@ void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
    const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
    const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
  
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto hits_tensor = _tensor_builder->at(hits_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto hits_tensor = _tensor_reg->getAclTensor(hits_index).get();
  
-  auto lookups_tensor = _tensor_builder->at(lookups_index).get();
-  auto keys_tensor = _tensor_builder->at(keys_index).get();
-  auto values_tensor = _tensor_builder->at(values_index).get();
+  auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
+  auto keys_tensor = _tensor_reg->getAclTensor(keys_index).get();
+  auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
  
-  auto fn = std::make_unique<::arm_compute::NEHashtableLookup>();
+  auto fn = acl_common::generateLayer<arm_compute::NEHashtableLookup>(
+      lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
+      output_tensor->handle(), hits_tensor->handle());
  
-  fn->configure(lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
-                output_tensor->handle(), hits_tensor->handle());
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::Gather &node)
@@ -453,9 +539,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
    // Converting in reverse order
    const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto indices_tensor = _tensor_builder->at(indices_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto indices_tensor = _tensor_reg->getAclTensor(indices_index).get();
    const auto backend_layout = ofm_tensor->layout();
    UNUSED_RELEASE(backend_layout);
  
@@ -471,8 +557,6 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
    assert(backend_layout == indices_tensor->layout());
    assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
  
-  auto fn = std::make_unique<::arm_compute::NEGatherEx>();
-
    // input is n-D, indices k-D, output is (n + k - 1)-D
    size_t n = ifm_rank;
    assert(n == ifm_tensor->num_dimensions());
@@ -495,15 +579,14 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
          acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false));
    }
  
-  fn->configure(ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
+  auto fn = acl_common::generateLayer<arm_compute::NEGatherEx>(
+      ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
  
    // acl_neon doesn't not revert disabling applied dim_correction because acl_neon's kernels would
    // use arm_compute::TensorInfo::offset_element_in_bytes()
    // It would create an error when the kernel accesses high dimension that its value is 1
  
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
@@ -513,17 +596,16 @@ void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
    const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
    const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto gamma_tensor = _tensor_builder->at(gamma_index).get();
-  auto beta_tensor = _tensor_builder->at(beta_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto gamma_tensor = _tensor_reg->getAclTensor(gamma_index).get();
+  auto beta_tensor = _tensor_reg->getAclTensor(beta_index).get();
    auto epsilon = node.param().epsilon;
    auto activation = node.param().activation;
  
-  auto fn = std::make_unique<::arm_compute::NEInstanceNormalizationLayerEx>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(),
-                beta_tensor->handle(), epsilon);
+  auto fn = acl_common::generateLayer<arm_compute::NEInstanceNormalizationLayerEx>(
+      ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(),
+      epsilon);
  
    _return_fn = std::make_unique<exec::FunctionSequence>(
        asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
@@ -548,32 +630,16 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
    float beta = 0.5f;                             // pow(reduction, -0.5) = 1 / sqrt(reduction)
    float bias = 0.0f;                             // Don't offset the reduction.
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
  
    const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
                                                                 radius, alpha, beta, bias, false);
  
-  auto fn = std::make_unique<::arm_compute::NENormalizationLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
-
-  auto acl_fn = asAclFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::NENormalizationLayer>(
+      ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
  
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::L2Pool2D &node)
-{
-  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
-      node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::L2);
-
-  const auto ofm_index{node.getOutputs().at(0)};
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  const auto activation = node.param().activation;
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(raw_fn)),
-      ActivationBuilder::generate(activation, ofm_tensor->handle()));
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node)
@@ -587,142 +653,22 @@ void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &nod
    auto beta = node.param().beta;
    auto bias = node.param().bias;
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
  
    const auto norm_info = ::arm_compute::NormalizationLayerInfo(
        ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
  
-  auto fn = std::make_unique<::arm_compute::NENormalizationLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
-
-  auto acl_fn = asAclFunction(std::move(fn));
+  auto fn = acl_common::generateLayer<arm_compute::NENormalizationLayer>(
+      ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
  
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalAnd &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
-  const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input0_tensor = _tensor_builder->at(input0_index).get();
-  auto input1_tensor = _tensor_builder->at(input1_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NELogicalAnd>();
-
-  fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalNot &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEBitwiseNot>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle());
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalOr &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
-  const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input0_tensor = _tensor_builder->at(input0_index).get();
-  auto input1_tensor = _tensor_builder->at(input1_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NELogicalOr>();
-
-  fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Logistic &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
-
-  // NOTE NEActivationLayer can generate produce erroneous results. it were caused by 'vexpq_f32()'.
-  // The neon function returns a value outside of the limit of representation in float as 'NaN'
-  // instead of 'INF', and then the result of this op will be errors due to the 'NaN'.
-  auto fn = std::make_unique<::arm_compute::NEActivationLayerEx>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::LSTM &node)
  {
    _return_fn = acl_common::kernelGenLSTM<acl_common::AclFunction, ::arm_compute::ITensor,
-                                         ::arm_compute::NELSTMLayer>(node, _ctx, _tensor_builder);
-}
-
-void KernelGenerator::visit(const ir::operation::Mul &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
-
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEPixelWiseMultiplication>();
-
-  // RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
-                arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
-
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::Neg &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NENegLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+                                         ::arm_compute::NELSTMLayer>(node, _ctx, _tensor_reg);
  }
  
  void KernelGenerator::visit(const ir::operation::Pack &node)
@@ -736,25 +682,23 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
    for (const auto &input_index : node.getInputs())
      input_indexes.emplace_back(input_index);
  
-  auto output = _tensor_builder->at(output_index).get()->handle();
+  auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
    std::vector<arm_compute::ITensor *> inputs;
    for (const auto &input_index : input_indexes)
-    inputs.emplace_back(_tensor_builder->at(input_index)->handle());
+    inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
  
    const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = _tensor_builder->at(output_index).get()->layout();
+  const auto backend_layout = _tensor_reg->getAclTensor(output_index).get()->layout();
  
    if (axis < 0)
      axis += output_rank;
    axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
  
-  auto fn = std::make_unique<::arm_compute::NEStackLayer>();
-
    // Disable applied dim_correction
    for (const auto &input_index : input_indexes)
    {
      size_t input_rank = _ctx.at(input_index).shape().rank();
-    const auto &input_tensor = _tensor_builder->at(input_index);
+    const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
      assert(input_rank == input_tensor->num_dimensions());
      if (input_rank != input_tensor->info()->num_dimensions())
      {
@@ -764,7 +708,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
      }
    }
  
-  fn->configure(inputs, axis, output);
+  auto fn = acl_common::generateLayer<arm_compute::NEStackLayer>(inputs, axis, output);
  
    // acl_neon doesn't not revert disabling applied dim_correction because acl_neon's kernels would
    // use arm_compute::TensorInfo::offset_element_in_bytes()
@@ -783,8 +727,8 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
    auto rank = _ctx.at(input_index).shape().rank();
    auto pad_base = _ctx.at(pad_index).data()->base();
  
-  auto input = _tensor_builder->at(input_index).get()->handle();
-  auto output = _tensor_builder->at(output_index).get()->handle();
+  auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
+  auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
  
    ::arm_compute::PaddingList padding_list;
    padding_list.resize(rank);
@@ -793,7 +737,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
      const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2);
  
      const auto frontend_layout = _current_op_seq_layout;
-    const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
+    const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
      const auto axis =
          acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
      padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
@@ -807,19 +751,33 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
    const auto pixel_value =
        ::arm_compute::PixelValue(0, input->info()->data_type(), input->info()->quantization_info());
  
-  auto fn = std::make_unique<::arm_compute::NEPadLayer>();
-  fn->configure(input, output, padding_list, pixel_value);
+  auto fn =
+      acl_common::generateLayer<arm_compute::NEPadLayer>(input, output, padding_list, pixel_value);
  
    _return_fn = asAclFunction(std::move(fn));
  }
  
+void KernelGenerator::visit(const ir::operation::Pool2D &node)
+{
+  auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
+      node, _ctx, _tensor_reg, _current_op_seq_layout,
+      acl_common::convertPoolType(node.param().op_type));
+
+  const auto ofm_index{node.getOutputs().at(0)};
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  const auto activation = node.param().activation;
+  _return_fn = std::make_unique<exec::FunctionSequence>(
+      asAclFunction(std::move(raw_fn)),
+      ActivationBuilder::generate(activation, ofm_tensor->handle()));
+}
+
  void KernelGenerator::visit(const ir::operation::Permute &node)
  {
    const auto ofm_idx{node.getOutputs().at(0)};
    const auto ifm_idx{node.getInputs().at(0)};
    const auto permute_type = node.getPermuteType();
-  auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
    const auto rank = _ctx.at(ofm_idx).shape().rank();
    assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
  
@@ -830,35 +788,22 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
      // WHCN -> CWHN
      pv = arm_compute::PermutationVector{2, 0, 1};
  
-    auto l = std::make_unique<::arm_compute::NEPermute>();
-
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(),
+                                                           ofm_tensor->handle(), pv);
    }
    else if (permute_type == ir::operation::Permute::Type::NHWC_TO_NCHW && rank == 4)
    {
      // CWHN -> WHCN
      pv = arm_compute::PermutationVector{1, 2, 0};
  
-    auto l = std::make_unique<::arm_compute::NEPermute>();
-
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(),
+                                                           ofm_tensor->handle(), pv);
    }
    else
    {
-    auto l = std::make_unique<::arm_compute::NECopy>();
-
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::NECopy>(ifm_tensor->handle(), ofm_tensor->handle());
    }
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::PReLU &node)
@@ -867,21 +812,14 @@ void KernelGenerator::visit(const ir::operation::PReLU &node)
    const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
    const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto alpha_tensor = _tensor_builder->at(alpha_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index).get();
  
-  std::unique_ptr<::arm_compute::IFunction> fn;
-
-  auto l = std::make_unique<::arm_compute::NEPReluLayer>();
-
-  l->configure(ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
-
-  fn = std::move(l);
+  auto fn = acl_common::generateLayer<arm_compute::NEPReluLayer>(
+      ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
  
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::Reduce &node)
@@ -890,8 +828,8 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
    const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
    const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
  
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
  
    // Convert to ACL axes taking into account negative values and possible duplicates.
    const auto &axes = _ctx.at(axes_index);
@@ -906,93 +844,21 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
    std::unique_ptr<::arm_compute::IFunction> fn;
    if (reduce_type == ir::operation::Reduce::ReduceType::MEAN)
    {
-    auto l = std::make_unique<::arm_compute::NEReduceMean>();
-
-    l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle());
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::NEReduceMean>(input_tensor->handle(), reduce_axes,
+                                                              keep_dims, output_tensor->handle());
    }
    else if (reduce_type == ir::operation::Reduce::ReduceType::SUM)
    {
-    auto l = std::make_unique<::arm_compute::NEReduceSum>();
-
-    l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle());
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::NEReduceSum>(input_tensor->handle(), reduce_axes,
+                                                             keep_dims, output_tensor->handle());
    }
    else
    {
-    auto l = std::make_unique<::arm_compute::NEReduceOperation>();
-
-    l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle(),
-                 acl_common::convertReduceType(reduce_type));
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::NEReduceOperation>(
+        input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle(),
+        acl_common::convertReduceType(reduce_type));
    }
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<arm_compute::NEActivationLayer>();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU1 &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
-
-  auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU6 &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f};
-
-  auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::Reshape &node)
@@ -1000,8 +866,8 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
  
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
  
    // NOTE This operation must not be changed the layout from frontend to backend
    //      So, PermutationOperationPass makes layouts of frontend and backend the same.
@@ -1012,13 +878,10 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
    UNUSED_RELEASE(frontend_layout);
    UNUSED_RELEASE(backend_layout);
  
-  auto fn = std::make_unique<arm_compute::NEReshapeLayer>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle());
+  auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(),
+                                                                   output_tensor->handle());
  
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
@@ -1027,18 +890,15 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
  
    const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEScale>();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
  
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(),
-                ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
-                ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
+  auto fn = acl_common::generateLayer<arm_compute::NEScale>(
+      ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::InterpolationPolicy::BILINEAR,
+      ::arm_compute::BorderMode::REPLICATE, ::arm_compute::PixelValue(0.f),
+      ::arm_compute::SamplingPolicy::TOP_LEFT);
  
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::RNN &node)
@@ -1056,40 +916,24 @@ void KernelGenerator::visit(const ir::operation::RNN &node)
  
    const auto activation = node.param().activation;
  
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto hidden_state_out_tensor = _tensor_builder->at(hidden_state_out_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto hidden_state_out_tensor = _tensor_reg->getAclTensor(hidden_state_out_index).get();
  
-  auto input_tensor = _tensor_builder->at(input_index).get();
-  auto weights_tensor = _tensor_builder->at(weights_index).get();
-  auto recurrent_weights_tensor = _tensor_builder->at(recurrent_weights_index).get();
-  auto bias_tensor = _tensor_builder->at(bias_index).get();
-  auto hidden_state_in_tensor = _tensor_builder->at(hidden_state_in_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+  auto weights_tensor = _tensor_reg->getAclTensor(weights_index).get();
+  auto recurrent_weights_tensor = _tensor_reg->getAclTensor(recurrent_weights_index).get();
+  auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
+  auto hidden_state_in_tensor = _tensor_reg->getAclTensor(hidden_state_in_index).get();
    auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
  
-  auto copy_layer = std::make_unique<::arm_compute::NECopy>();
-  copy_layer->configure(hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
+  auto copy_layer = acl_common::generateLayer<arm_compute::NECopy>(
+      hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
    _return_fn = asAclFunction(std::move(copy_layer));
  
-  auto fn = std::make_unique<::arm_compute::NERNNLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-  fn->configure(input_tensor->handle(), weights_tensor->handle(),
-                recurrent_weights_tensor->handle(), bias_tensor->handle(),
-                hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
-  _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::RSQRT &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NERsqrtLayer>();
-
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
+  auto fn = acl_common::generateLayer<arm_compute::NERNNLayer>(
+      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+      weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(),
+      hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
    _return_fn = asAclFunction(std::move(fn));
  }
  
@@ -1105,32 +949,11 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
    (void)dims;
    (void)ndim;
  
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-  auto fn = std::make_unique<arm_compute::NEReshapeLayer>();
-  fn->configure(input_tensor->handle(), output_tensor->handle());
-  auto acl_fn = asAclFunction(std::move(fn));
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Tanh &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<arm_compute::NEActivationLayer>();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+  auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(),
+                                                                   output_tensor->handle());
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::Softmax &node)
@@ -1139,8 +962,8 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
    const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
    const auto beta = node.param().beta;
  
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
    const auto frontend_layout = _current_op_seq_layout;
    const auto backend_layout = input_tensor->layout();
  
@@ -1154,14 +977,11 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
          acl_common::asTensorShape(input.shape(), frontend_layout, backend_layout, false));
    }
  
-  auto fn = std::make_unique<::arm_compute::NESoftmaxLayer>(
-      _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), beta);
+  auto fn = acl_common::generateLayer<arm_compute::NESoftmaxLayer>(
+      _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+      output_tensor->handle(), beta);
  
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
@@ -1172,22 +992,19 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
        node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
    const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto block_size_tensor = _tensor_builder->at(block_size_index).get();
-  auto paddings_tensor = _tensor_builder->at(paddings_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
+  auto paddings_tensor = _tensor_reg->getAclTensor(paddings_index).get();
  
    assert(_ctx.at(block_size_index).data());
    assert(_ctx.at(paddings_index).data());
  
-  auto fn = std::make_unique<::arm_compute::NESpaceToBatchLayer>();
-
-  fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
-                ofm_tensor->handle());
+  auto fn = acl_common::generateLayer<arm_compute::NESpaceToBatchLayer>(
+      ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
+      ofm_tensor->handle());
  
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
@@ -1197,16 +1014,13 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
  
    auto block_size = node.param().block_size;
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NESpaceToDepthLayer>();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
  
-  fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), block_size);
+  auto fn = acl_common::generateLayer<arm_compute::NESpaceToDepthLayer>(
+      ifm_tensor->handle(), ofm_tensor->handle(), block_size);
  
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::Split &node)
@@ -1221,10 +1035,10 @@ void KernelGenerator::visit(const ir::operation::Split &node)
    for (const auto &output : node.getOutputs())
      output_indexes.emplace_back(output);
  
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
    std::vector<arm_compute::ITensor *> output_tensors;
    for (const auto &ofm_ind : output_indexes)
-    output_tensors.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
+    output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind).get()->handle());
  
    const auto frontend_layout = _current_op_seq_layout;
    const auto backend_layout = ifm_tensor->layout();
@@ -1233,71 +1047,26 @@ void KernelGenerator::visit(const ir::operation::Split &node)
      axis += ifm_rank;
    axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value();
  
-  auto fn = std::make_unique<::arm_compute::NESplit>();
-
-  fn->configure(ifm_tensor->handle(), output_tensors, axis);
+  auto fn =
+      acl_common::generateLayer<arm_compute::NESplit>(ifm_tensor->handle(), output_tensors, axis);
  
    _return_fn = asAclFunction(std::move(fn));
  }
  
-void KernelGenerator::visit(const ir::operation::SQRT &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
-
-  auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
  void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
  {
    const auto ofm_index{node.getOutputs().at(0)};
    const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
    const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEElementwiseSquaredDiff>();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
  
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+  auto fn = acl_common::generateLayer<arm_compute::NEElementwiseSquaredDiff>(
+      lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
  
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Sub &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
-
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEArithmeticSubtraction>();
-
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
-                arm_compute::ConvertPolicy::SATURATE);
-
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::Slice &node)
@@ -1307,8 +1076,8 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
    const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
    const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
  
-  auto outputData_tensor = _tensor_builder->at(output_index).get();
-  auto inputData_tensor = _tensor_builder->at(input_index).get();
+  auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
    const auto frontend_layout = _current_op_seq_layout;
    const auto backend_layout = inputData_tensor->layout();
  
@@ -1358,13 +1127,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
      ends_set.set(i, ends[i]);
    }
  
-  auto fn = std::make_unique<::arm_compute::NESlice>();
-
-  fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
+  auto fn = acl_common::generateLayer<arm_compute::NESlice>(
+      inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
  
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::StridedSlice &node)
@@ -1375,8 +1141,8 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
    const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
    const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
  
-  auto outputData_tensor = _tensor_builder->at(output_index).get();
-  auto inputData_tensor = _tensor_builder->at(input_index).get();
+  auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
    const auto frontend_layout = _current_op_seq_layout;
    const auto backend_layout = inputData_tensor->layout();
  
@@ -1445,14 +1211,11 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
      strides_set.set(i, strides[i]);
    }
  
-  auto fn = std::make_unique<::arm_compute::NEStridedSlice>();
+  auto fn = acl_common::generateLayer<arm_compute::NEStridedSlice>(
+      inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
+      begin_mask, end_mask, shrink_axis_mask);
  
-  fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set,
-                strides_set, begin_mask, end_mask, shrink_axis_mask);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::TransposeConv &node)
@@ -1481,20 +1244,17 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
      invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
    }
  
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-  auto ker_tensor = _tensor_builder->at(ker_index).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+  auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
  
    const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
  
-  auto fn = std::make_unique<::arm_compute::NETransposeConvLayer>();
+  auto fn = acl_common::generateLayer<arm_compute::NETransposeConvLayer>(
+      ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info,
+      invalid_horizontal, invalid_vertical);
  
-  fn->configure(ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(),
-                tconv_info, invalid_horizontal, invalid_vertical);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::Transpose &node)
@@ -1503,8 +1263,8 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
    const auto ifm_idx{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
    const auto &perm{node.param().perm};
  
-  auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
-  const auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
+  auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
+  const auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
    const auto frontend_layout = _current_op_seq_layout;
    const auto backend_layout = ifm_tensor->layout();
  
@@ -1514,27 +1274,17 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
        rank, pv, frontend_layout, backend_layout);
  
    std::unique_ptr<::arm_compute::IFunction> fn;
-
    if (ifm_tensor->num_dimensions() <= 2 && ofm_tensor->num_dimensions() <= 2)
    {
-    auto l = std::make_unique<::arm_compute::NETranspose>();
-
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::NETranspose>(ifm_tensor->handle(),
+                                                             ofm_tensor->handle());
    }
    else
    {
-    auto l = std::make_unique<::arm_compute::NEPermute>();
-
-    l->configure(ifm_tensor->handle(), ofm_tensor->handle(), backend_pv);
-
-    fn = std::move(l);
+    fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(),
+                                                           ofm_tensor->handle(), backend_pv);
    }
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::Unpack &node)
@@ -1548,25 +1298,23 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
    for (const auto &output_index : node.getOutputs())
      output_indexes.emplace_back(output_index);
  
-  auto input = _tensor_builder->at(input_index).get()->handle();
+  auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
    std::vector<arm_compute::ITensor *> outputs;
    for (const auto &output_index : output_indexes)
-    outputs.emplace_back(_tensor_builder->at(output_index)->handle());
+    outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
  
    const auto frontend_layout = _current_op_seq_layout;
-  const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
+  const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
    if (axis < 0)
      axis += input_rank;
    axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value();
  
-  auto fn = std::make_unique<::arm_compute::NEUnstack>();
-
    // Disable applied dim_correction
    std::vector<arm_compute::TensorShape> orig_outputs_acl_tensor_shapes;
    for (const auto &output_index : output_indexes)
    {
      size_t output_rank = _ctx.at(output_index).shape().rank();
-    const auto &output_tensor = _tensor_builder->at(output_index);
+    const auto &output_tensor = _tensor_reg->getAclTensor(output_index);
      orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape());
      assert(output_rank == output_tensor->num_dimensions());
      if (output_rank != output_tensor->info()->num_dimensions())
@@ -1577,84 +1325,23 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
      }
    }
  
-  fn->configure(input, outputs, axis);
+  auto fn = acl_common::generateLayer<arm_compute::NEUnstack>(input, outputs, axis);
  
    _return_fn = asAclFunction(std::move(fn));
  }
  
-void KernelGenerator::visit(const ir::operation::Add &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
-
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEArithmeticAddition>();
-
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
-                arm_compute::ConvertPolicy::SATURATE);
-
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::Div &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
-
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEElementwiseDivision>();
-
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
-  _return_fn = std::make_unique<exec::FunctionSequence>(
-      asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::Exp &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
-
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEExpLayer>();
-
-  fn->configure(input_tensor->handle(), output_tensor->handle());
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
  void KernelGenerator::visit(const ir::operation::ExpandDims &node)
  {
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
  
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input_tensor = _tensor_builder->at(input_index).get();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
  
-  auto fn = std::make_unique<::arm_compute::NEReshapeLayer>();
+  auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(),
+                                                                   output_tensor->handle());
  
-  fn->configure(input_tensor->handle(), output_tensor->handle());
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::Comparison &node)
@@ -1665,56 +1352,15 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
  
    const auto comparison_type = node.param().comparison_type;
  
-  auto output_tensor = _tensor_builder->at(output_index).get();
-  auto input0_tensor = _tensor_builder->at(input0_index).get();
-  auto input1_tensor = _tensor_builder->at(input1_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEElementwiseComparison>();
-
-  fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
-                (arm_compute::ComparisonOperation)comparison_type);
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Min &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEElementwiseMin>();
+  auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+  auto input0_tensor = _tensor_reg->getAclTensor(input0_index).get();
+  auto input1_tensor = _tensor_reg->getAclTensor(input1_index).get();
  
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+  auto fn = acl_common::generateLayer<arm_compute::NEElementwiseComparison>(
+      input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
+      (arm_compute::ComparisonOperation)comparison_type);
  
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Max &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
-
-  auto ofm_tensor = _tensor_builder->at(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->at(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
-  auto fn = std::make_unique<::arm_compute::NEElementwiseMax>();
-
-  fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
-  auto acl_fn = asAclFunction(std::move(fn));
-
-  _return_fn = std::move(acl_fn);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  void KernelGenerator::visit(const ir::operation::OneHot &node)
@@ -1726,17 +1372,16 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
    const auto offvalue_idx{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)};
    const auto axis = node.param().axis;
  
-  auto output_tensor = _tensor_builder->at(out_idx).get();
-  auto indices_tensor = _tensor_builder->at(indices_idx).get();
-  auto depth_tensor = _tensor_builder->at(depth_idx).get();
-  auto onvalue_tensor = _tensor_builder->at(onvalue_idx).get();
-  auto offvalue_tensor = _tensor_builder->at(offvalue_idx).get();
-
-  auto fn = std::make_unique<::arm_compute::CPPOneHotEx>();
-  fn->configure(indices_tensor->handle(), depth_tensor->handle(), onvalue_tensor->handle(),
-                offvalue_tensor->handle(), output_tensor->handle(), axis);
-  auto acl_fn = asAclFunction(std::move(fn));
-  _return_fn = std::move(acl_fn);
+  auto output_tensor = _tensor_reg->getAclTensor(out_idx).get();
+  auto indices_tensor = _tensor_reg->getAclTensor(indices_idx).get();
+  auto depth_tensor = _tensor_reg->getAclTensor(depth_idx).get();
+  auto onvalue_tensor = _tensor_reg->getAclTensor(onvalue_idx).get();
+  auto offvalue_tensor = _tensor_reg->getAclTensor(offvalue_idx).get();
+
+  auto fn = acl_common::generateLayer<arm_compute::CPPOneHotEx>(
+      indices_tensor->handle(), depth_tensor->handle(), onvalue_tensor->handle(),
+      offvalue_tensor->handle(), output_tensor->handle(), axis);
+  _return_fn = asAclFunction(std::move(fn));
  }
  
  } // namespace acl_neon
diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.h b/runtime/onert/backend/acl_neon/KernelGenerator.h

index d6f7932..4d269cd 100644 (file)
--- a/runtime/onert/backend/acl_neon/KernelGenerator.h
+++ b/runtime/onert/backend/acl_neon/KernelGenerator.h
@@ -21,6 +21,8 @@
  
  #include "ir/Operands.h"
  #include "TensorBuilder.h"
+#include "AclTensorRegistry.h"
+#include "TensorManager.h"
  
  namespace onert
  {
@@ -33,75 +35,57 @@ class KernelGenerator : public IKernelGenerator
  {
  public:
    KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
-                  const std::shared_ptr<TensorBuilder> &tensor_builder);
+                  const std::shared_ptr<TensorBuilder> &tensor_builder,
+                  const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg);
  
    void visit(const ir::OpSequence &) override;
-  void visit(const ir::operation::Abs &) override;
    void visit(const ir::operation::ArgMax &) override;
    void visit(const ir::operation::BatchToSpaceND &) override;
-  void visit(const ir::operation::Cast &) override;
+  void visit(const ir::operation::BinaryArithmetic &) override;
    void visit(const ir::operation::Conv2D &) override;
    void visit(const ir::operation::DepthToSpace &) override;
    void visit(const ir::operation::DepthwiseConv2D &) override;
-  void visit(const ir::operation::Dequantize &) override;
-  void visit(const ir::operation::MaxPool2D &) override;
-  void visit(const ir::operation::AvgPool2D &) override;
    void visit(const ir::operation::Concat &) override;
+  void visit(const ir::operation::ElementwiseActivation &) override;
+  void visit(const ir::operation::ElementwiseBinary &) override;
+  void visit(const ir::operation::ElementwiseUnary &) override;
    void visit(const ir::operation::EmbeddingLookup &) override;
-  void visit(const ir::operation::Floor &) override;
    void visit(const ir::operation::FullyConnected &) override;
    void visit(const ir::operation::Gather &) override;
    void visit(const ir::operation::HashtableLookup &) override;
    void visit(const ir::operation::InstanceNorm &) override;
    void visit(const ir::operation::L2Normalization &) override;
-  void visit(const ir::operation::L2Pool2D &) override;
    void visit(const ir::operation::LocalResponseNormalization &) override;
-  void visit(const ir::operation::LogicalAnd &) override;
-  void visit(const ir::operation::LogicalNot &) override;
-  void visit(const ir::operation::LogicalOr &) override;
-  void visit(const ir::operation::Logistic &) override;
    void visit(const ir::operation::LSTM &) override;
-  void visit(const ir::operation::Mul &) override;
-  void visit(const ir::operation::Neg &) override;
    void visit(const ir::operation::Pack &) override;
    void visit(const ir::operation::Pad &) override;
+  void visit(const ir::operation::Pool2D &) override;
    void visit(const ir::operation::Permute &) override;
    void visit(const ir::operation::PReLU &) override;
    void visit(const ir::operation::Reduce &) override;
-  void visit(const ir::operation::ReLU &) override;
-  void visit(const ir::operation::ReLU1 &) override;
-  void visit(const ir::operation::ReLU6 &) override;
    void visit(const ir::operation::Reshape &) override;
    void visit(const ir::operation::ResizeBilinear &) override;
    void visit(const ir::operation::RNN &) override;
-  void visit(const ir::operation::RSQRT &) override;
    void visit(const ir::operation::Squeeze &) override;
-  void visit(const ir::operation::Tanh &) override;
    void visit(const ir::operation::Softmax &) override;
    void visit(const ir::operation::SpaceToBatchND &) override;
    void visit(const ir::operation::SpaceToDepth &) override;
    void visit(const ir::operation::Split &) override;
-  void visit(const ir::operation::SQRT &) override;
    void visit(const ir::operation::SquaredDifference &) override;
-  void visit(const ir::operation::Sub &) override;
    void visit(const ir::operation::Slice &) override;
    void visit(const ir::operation::StridedSlice &) override;
    void visit(const ir::operation::TransposeConv &) override;
    void visit(const ir::operation::Transpose &) override;
    void visit(const ir::operation::Unpack &) override;
-  void visit(const ir::operation::Add &) override;
-  void visit(const ir::operation::Div &) override;
-  void visit(const ir::operation::Exp &) override;
    void visit(const ir::operation::ExpandDims &) override;
    void visit(const ir::operation::Comparison &) override;
-  void visit(const ir::operation::Min &) override;
-  void visit(const ir::operation::Max &) override;
    void visit(const ir::operation::OneHot &) override;
  
  private:
    const ir::Operands &_ctx;
    const ir::Operations &_operations_ctx;
    std::shared_ptr<TensorBuilder> _tensor_builder;
+  std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg;
    ir::Layout _current_op_seq_layout;
  };
  
diff --git a/runtime/onert/backend/acl_neon/Optimizer.cc b/runtime/onert/backend/acl_neon/Optimizer.cc

index 2948cab..ac80901 100644 (file)
--- a/runtime/onert/backend/acl_neon/Optimizer.cc
+++ b/runtime/onert/backend/acl_neon/Optimizer.cc
@@ -19,7 +19,7 @@
  #include "ParentInfo.h"
  
  #include <cassert>
-#include <ir/LoweredGraph.h>
+#include <compiler/LoweredGraph.h>
  #include <util/logging.h>
  #include "AclSubTensorAnalyzer.h"
  
diff --git a/runtime/onert/backend/acl_neon/TensorManager.h b/runtime/onert/backend/acl_neon/TensorManager.h

index 3ec9efa..3b7cfbc 100644 (file)
--- a/runtime/onert/backend/acl_neon/TensorManager.h
+++ b/runtime/onert/backend/acl_neon/TensorManager.h
@@ -55,7 +55,7 @@ using InternalBufferManager = acl_common::AclInternalBufferManager<
  using TensorManager = acl_common::AclTensorManager<acl_neon::operand::INETensor, operand::NETensor,
                                                     operand::NESubTensor>;
  
-TensorManager *createTensorManager(bool is_linear_executor)
+inline TensorManager *createTensorManager(bool is_linear_executor)
  {
    if (is_linear_executor)
    {
diff --git a/runtime/onert/backend/cpu/Backend.h b/runtime/onert/backend/cpu/Backend.h

index 56bd352..fc8574b 100644 (file)
--- a/runtime/onert/backend/cpu/Backend.h
+++ b/runtime/onert/backend/cpu/Backend.h
@@ -47,10 +47,12 @@ public:
      const auto &operands = graph.operands();
      const auto &operations = graph.operations();
      auto context = std::make_unique<BackendContext>(this, &graph);
-    auto tb = std::make_shared<TensorBuilder>();
+    auto tr = std::make_shared<cpu_common::TensorRegistry>();
+    auto tb = std::make_shared<TensorBuilder>(tr);
+    context->tensor_registry = tr;
      context->tensor_builder = tb;
-    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
-    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, kb,
+    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+    context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb,
                                                              context->external_context());
      context->tensor_register = nullptr;
      context->optimizer = nullptr;
diff --git a/runtime/onert/backend/cpu/BackendContext.h b/runtime/onert/backend/cpu/BackendContext.h

index f314a8e..e90b210 100644 (file)
--- a/runtime/onert/backend/cpu/BackendContext.h
+++ b/runtime/onert/backend/cpu/BackendContext.h
@@ -31,13 +31,15 @@ class BackendContext : public onert::backend::BackendContext
  {
  public:
    BackendContext(const Backend *backend, const ir::Graph *graph,
+                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
                   std::shared_ptr<ITensorBuilder> tensor_builder = nullptr,
                   std::shared_ptr<IConstantInitializer> constant_initializer = nullptr,
                   std::shared_ptr<IKernelGenerator> kernel_gen = nullptr,
                   std::shared_ptr<ITensorRegister> tensor_register = nullptr,
                   std::shared_ptr<IOptimizer> optimizer = nullptr)
-      : onert::backend::BackendContext(backend, graph, tensor_builder, constant_initializer,
-                                       kernel_gen, tensor_register, optimizer),
+      : onert::backend::BackendContext(backend, graph, tensor_registry, tensor_builder,
+                                       constant_initializer, kernel_gen, tensor_register,
+                                       optimizer),
          _external_context(new ExternalContext)
    {
    }
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.cc b/runtime/onert/backend/cpu/ConstantInitializer.cc

index deb27f0..6f6eb77 100644 (file)
--- a/runtime/onert/backend/cpu/ConstantInitializer.cc
+++ b/runtime/onert/backend/cpu/ConstantInitializer.cc
@@ -25,8 +25,8 @@ namespace cpu
  {
  
  ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
-                                         const std::shared_ptr<TensorBuilder> &tensor_builder)
-    : IConstantInitializer{operands}, _tensor_builder{tensor_builder}
+                                         const std::shared_ptr<ITensorRegistry> &tensor_reg)
+    : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
  {
    // DO NOTHING
  }
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.h b/runtime/onert/backend/cpu/ConstantInitializer.h

index de03a69..c016c83 100644 (file)
--- a/runtime/onert/backend/cpu/ConstantInitializer.h
+++ b/runtime/onert/backend/cpu/ConstantInitializer.h
@@ -17,7 +17,7 @@
  #ifndef __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
  #define __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
  
-#include "TensorBuilder.h"
+#include "backend/cpu_common/TensorRegistry.h"
  
  #include <backend/IConstantInitializer.h>
  #include <ir/Operands.h>
@@ -33,7 +33,7 @@ class ConstantInitializer : public IConstantInitializer
  {
  public:
    ConstantInitializer(const ir::Operands &operands,
-                      const std::shared_ptr<TensorBuilder> &tensor_builder);
+                      const std::shared_ptr<ITensorRegistry> &tensor_reg);
  
  public:
    void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj) override;
@@ -50,10 +50,10 @@ public:
    void visit(const ir::operation::FullyConnected &) override;
  
  private:
-  std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
+  std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; }
  
  private:
-  std::shared_ptr<TensorBuilder> _tensor_builder;
+  std::shared_ptr<ITensorRegistry> _tensor_reg;
  };
  
  } // namespace cpu
diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc

index 7939fe8..74b6f0c 100644 (file)
--- a/runtime/onert/backend/cpu/KernelGenerator.cc
+++ b/runtime/onert/backend/cpu/KernelGenerator.cc
@@ -16,49 +16,36 @@
  
  #include "KernelGenerator.h"
  
-#include "ops/AbsLayer.h"
-#include "ops/AddLayer.h"
  #include "ops/ArgMinMaxLayer.h"
-#include "ops/AvgPoolLayer.h"
  #include "ops/BatchToSpaceNDLayer.h"
-#include "ops/CastLayer.h"
+#include "ops/BinaryArithmeticLayer.h"
  #include "ops/CompareLayer.h"
  #include "ops/ConcatLayer.h"
  #include "ops/ConvolutionLayer.h"
-#include "ops/CosLayer.h"
  #include "ops/DepthwiseConvolutionLayer.h"
-#include "ops/DivLayer.h"
  #include "ops/EinsumLayer.h"
-#include "ops/ExpLayer.h"
+#include "ops/ElementwiseActivationLayer.h"
+#include "ops/ElementwiseBinaryLayer.h"
+#include "ops/ElementwiseUnaryLayer.h"
  #include "ops/ExpandDimsLayer.h"
  #include "ops/FillLayer.h"
  #include "ops/FullyConnectedLayer.h"
  #include "ops/GatherLayer.h"
-#include "ops/LogLayer.h"
-#include "ops/LogisticLayer.h"
-#include "ops/MaxLayer.h"
-#include "ops/MaxPoolLayer.h"
  #include "ops/MeanLayer.h"
-#include "ops/MinLayer.h"
-#include "ops/MulLayer.h"
-#include "ops/NegLayer.h"
  #include "ops/OneHotLayer.h"
  #include "ops/OperationUtils.h"
  #include "ops/PackLayer.h"
  #include "ops/PadLayer.h"
+#include "ops/PoolLayer.h"
  #include "ops/PowLayer.h"
  #include "ops/RangeLayer.h"
+#include "ops/RankLayer.h"
  #include "ops/ReduceLayer.h"
-#include "ops/ReLULayer.h"
-#include "ops/ReLU6Layer.h"
  #include "ops/ReshapeLayer.h"
  #include "ops/ResizeBilinearLayer.h"
  #include "ops/ReverseLayer.h"
-#include "ops/RoundLayer.h"
-#include "ops/RsqrtLayer.h"
  #include "ops/SelectLayer.h"
  #include "ops/ShapeLayer.h"
-#include "ops/SinLayer.h"
  #include "ops/SliceLayer.h"
  #include "ops/SoftMaxLayer.h"
  #include "ops/StridedSliceLayer.h"
@@ -66,22 +53,16 @@
  #include "ops/SpaceToDepthLayer.h"
  #include "ops/SplitLayer.h"
  #include "ops/SplitVLayer.h"
-#include "ops/SubLayer.h"
-#include "ops/TanhLayer.h"
  #include "ops/TileLayer.h"
  #include "ops/TransposeLayer.h"
  #include "ops/UnpackLayer.h"
-#include "ops/LogicalNotLayer.h"
-#include "ops/ZerosLikeLayer.h"
  #include "ops/SquaredDiffLayer.h"
-#include "ops/LogicalOrLayer.h"
  #include "ops/L2NormLayer.h"
  #include "ops/MatrixBandPartLayer.h"
  #include "ops/BatchMatMulLayer.h"
  #include "ops/BroadcastToLayer.h"
  #include "ops/FusedBatchNormLayer.h"
  #include "ops/LogSoftMaxLayer.h"
-#include "ops/QuantizeLayer.h"
  #include "ops/StatelessRandomUniformLayer.h"
  
  #include <backend/Backend.h>
@@ -102,6 +83,104 @@ namespace cpu
  
  namespace
  {
+ops::ArithmeticType
+convertArithmeticType(ir::operation::BinaryArithmetic::ArithmeticType arithmetic_type_ir)
+{
+  switch (arithmetic_type_ir)
+  {
+    case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+      return ops::ArithmeticType::kAdd;
+    case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+      return ops::ArithmeticType::kSub;
+    case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+      return ops::ArithmeticType::kMul;
+    case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+      return ops::ArithmeticType::kDiv;
+    default:
+      throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+  }
+}
+
+ops::ElementwiseActivationType
+convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type_ir)
+{
+  switch (type_ir)
+  {
+    case ir::operation::ElementwiseActivation::Type::LOGISTIC:
+      return ops::ElementwiseActivationType::kLogistic;
+    case ir::operation::ElementwiseActivation::Type::RELU:
+      return ops::ElementwiseActivationType::kReLU;
+    case ir::operation::ElementwiseActivation::Type::TANH:
+      return ops::ElementwiseActivationType::kTanh;
+    default:
+      throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+  }
+}
+
+ops::ElementwiseBinaryType
+convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinaryType type_ir)
+{
+  switch (type_ir)
+  {
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
+      return ops::ElementwiseBinaryType::kLogicalOr;
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
+      return ops::ElementwiseBinaryType::kMax;
+    case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
+      return ops::ElementwiseBinaryType::kMin;
+    default:
+      throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+  }
+}
+
+ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::ElementwiseUnary::Type type_ir)
+{
+  switch (type_ir)
+  {
+    case ir::operation::ElementwiseUnary::Type::ABS:
+      return ops::ElementwiseUnaryType::kAbs;
+    case ir::operation::ElementwiseUnary::Type::CAST:
+      return ops::ElementwiseUnaryType::kCast;
+    case ir::operation::ElementwiseUnary::Type::COS:
+      return ops::ElementwiseUnaryType::kCos;
+    case ir::operation::ElementwiseUnary::Type::ERF:
+      return ops::ElementwiseUnaryType::kErf;
+    case ir::operation::ElementwiseUnary::Type::EXP:
+      return ops::ElementwiseUnaryType::kExp;
+    case ir::operation::ElementwiseUnary::Type::LOG:
+      return ops::ElementwiseUnaryType::kLog;
+    case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
+      return ops::ElementwiseUnaryType::kLogicalNot;
+    case ir::operation::ElementwiseUnary::Type::NEG:
+      return ops::ElementwiseUnaryType::kNeg;
+    case ir::operation::ElementwiseUnary::Type::QUANTIZE:
+      return ops::ElementwiseUnaryType::kQuantize;
+    case ir::operation::ElementwiseUnary::Type::ROUND:
+      return ops::ElementwiseUnaryType::kRound;
+    case ir::operation::ElementwiseUnary::Type::RSQRT:
+      return ops::ElementwiseUnaryType::kRSqrt;
+    case ir::operation::ElementwiseUnary::Type::SIN:
+      return ops::ElementwiseUnaryType::kSin;
+    case ir::operation::ElementwiseUnary::Type::ZEROS_LIKE:
+      return ops::ElementwiseUnaryType::kZerosLike;
+    default:
+      throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+  }
+}
+
+ops::PoolType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
+{
+  switch (type_ir)
+  {
+    case ir::operation::Pool2D::PoolType::AVG:
+      return ops::PoolType::kAvg;
+    case ir::operation::Pool2D::PoolType::MAX:
+      return ops::PoolType::kMax;
+    default:
+      throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+  }
+}
+
  ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
  {
    switch (reduce_type_ir)
@@ -127,11 +206,12 @@ ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_
  KernelGenerator::KernelGenerator(
      const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
      const std::shared_ptr<TensorBuilder> &tensor_builder,
+    const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
      const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
      const std::shared_ptr<ExternalContext> &external_context)
      : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
-      _kernel_builder(kernel_builder), _current_op_seq_layout(ir::Layout::UNKNOWN),
-      _external_context(external_context)
+      _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
+      _current_op_seq_layout(ir::Layout::UNKNOWN), _external_context(external_context)
  {
    // DO NOTHING
  }
@@ -140,11 +220,9 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
  {
    assert(!_return_fn_seq);
    assert(_tensor_builder->dynamicTensorManager());
-  assert(_tensor_builder->tensorRegistry());
+  assert(_tensor_reg);
  
-  auto dyn_tensor_manager = _tensor_builder->dynamicTensorManager();
-  auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(
-      _ctx, dyn_tensor_manager, _tensor_builder->tensorRegistry());
+  auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
  
    _return_fn_seq = std::make_unique<exec::FunctionSequence>();
  
@@ -154,7 +232,7 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
      dyn_ctx->op_seq = &op_seq;
      dyn_ctx->operations = &_operations_ctx;
      dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
-    dyn_ctx->tensor_registry = _tensor_builder->tensorRegistry();
+    dyn_ctx->tensor_registry = _tensor_reg;
      dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
  
      _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
@@ -170,13 +248,13 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
  
      for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
      {
-      auto portable_tensor = _tensor_builder->portableAt(ind);
+      auto portable_tensor = _tensor_reg->getPortableTensor(ind);
        if (portable_tensor)
        {
          assert(portable_tensor->layout() == ir::Layout::NHWC);
        }
  
-      auto tensor = _tensor_builder->at(ind);
+      auto tensor = _tensor_reg->getNativeTensor(ind);
        if (tensor)
        {
          tensor->increase_ref();
@@ -194,21 +272,23 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
    const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
    const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
  
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-  auto ker_tensor = _tensor_builder->portableAt(ker_index).get();
-  auto bias_tensor = _tensor_builder->portableAt(bias_index).get();
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
+  auto ker_tensor = _tensor_reg->getPortableTensor(ker_index).get();
+  auto bias_tensor = _tensor_reg->getPortableTensor(bias_index).get();
  
    const auto stride = node.param().stride;
    const auto activation = node.param().activation;
    const auto param_padding = node.param().padding;
+  const auto dilation = node.param().dilation;
    auto fn = std::make_unique<ops::ConvolutionLayer>();
  
    if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
    {
      fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
                    param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
-                  stride.horizontal, stride.vertical, activation, ofm_tensor);
+                  stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor,
+                  activation, ofm_tensor);
  
      _return_fn = std::move(fn);
      return;
@@ -221,11 +301,12 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
    const auto ker_width = ker_shape.dim(2);
  
    const auto padding =
-      ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height);
+      ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+                           dilation.width_factor, dilation.height_factor);
  
    fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
                  padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
-                activation, ofm_tensor);
+                dilation.width_factor, dilation.height_factor, activation, ofm_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -251,10 +332,10 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
    const auto multiplier = node.param().multiplier;
    const auto activation = node.param().activation;
  
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-  auto ker_tensor = _tensor_builder->portableAt(ker_index).get();
-  auto bias_tensor = _tensor_builder->portableAt(bias_index).get();
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
+  auto ker_tensor = _tensor_reg->getPortableTensor(ker_index).get();
+  auto bias_tensor = _tensor_reg->getPortableTensor(bias_index).get();
  
    auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>();
  
@@ -265,57 +346,6 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
    _return_fn = std::move(fn);
  }
  
-void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::MaxPool2D::Input::INPUT)};
-
-  const auto kh = node.param().kh;
-  const auto kw = node.param().kw;
-
-  const auto stride = node.param().stride;
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
-  const auto padding =
-      ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
-  auto fn = std::make_unique<ops::MaxPoolLayer>();
-
-  fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
-                stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)};
-
-  const auto kh = node.param().kh;
-  const auto kw = node.param().kw;
-  const auto stride = node.param().stride;
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
-  const auto padding =
-      ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
-  auto fn = std::make_unique<ops::AvgPoolLayer>();
-
-  fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
-                stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
  void KernelGenerator::visit(const ir::operation::Concat &node)
  {
    const auto ofm_index{node.getOutputs().at(0)};
@@ -323,11 +353,11 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
    const auto rank = _ctx.at(ofm_index).shape().rank();
    const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
  
-  auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
  
    std::vector<const IPortableTensor *> input_tensors;
    for (auto &ifm_idx : node.getInputs())
-    input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+    input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
  
    auto fn = std::make_unique<ops::ConcatLayer>();
  
@@ -342,9 +372,9 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
    const auto input_index{node.getInputs().at(ir::operation::BatchToSpaceND::INPUT)};
    const auto block_size_index{node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE)};
  
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
-  auto block_size_alloc = _tensor_builder->portableAt(block_size_index).get();
+  auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_alloc = _tensor_reg->getPortableTensor(input_index).get();
+  auto block_size_alloc = _tensor_reg->getPortableTensor(block_size_index).get();
  
    auto fn = std::make_unique<ops::BatchToSpaceNDLayer>();
  
@@ -354,7 +384,7 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
    if (node.getInputs().size() != NNApiInputs)
    {
      const auto crops_data_index{node.getInputs().at(ir::operation::BatchToSpaceND::CROPS_DATA)};
-    crops_alloc = _tensor_builder->portableAt(crops_data_index).get();
+    crops_alloc = _tensor_reg->getPortableTensor(crops_data_index).get();
    }
  
    fn->configure(input_alloc, output_alloc, block_size_alloc, crops_alloc);
@@ -368,9 +398,9 @@ void KernelGenerator::visit(const ir::operation::Fill &node)
    const auto input_index{node.getInputs().at(ir::operation::Fill::Input::INPUT)};
    const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
  
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto value_tensor = _tensor_builder->portableAt(value_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto value_tensor = _tensor_reg->getPortableTensor(value_index).get();
  
    auto fn = std::make_unique<ops::FillLayer>();
  
@@ -389,11 +419,11 @@ void KernelGenerator::visit(const ir::operation::FullyConnected &node)
    const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
    const auto activation = node.param().activation;
  
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto weight_tensor = _tensor_builder->portableAt(weight_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto weight_tensor = _tensor_reg->getPortableTensor(weight_index).get();
    auto bias_tensor =
-      bias_index.undefined() ? nullptr : _tensor_builder->portableAt(bias_index).get();
+      bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index).get();
  
    auto fn = std::make_unique<ops::FullyConnectedLayer>();
  
@@ -408,8 +438,8 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
  
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
  
    // optional 2nd input
    IPortableTensor *shape_tensor = nullptr;
@@ -417,7 +447,7 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
    if (node.getInputs().size() == 2)
    {
      const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
-    shape_tensor = _tensor_builder->portableAt(shape_index).get();
+    shape_tensor = _tensor_reg->getPortableTensor(shape_index).get();
    }
  
    auto fn = std::make_unique<ops::ReshapeLayer>();
@@ -431,8 +461,8 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
  
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
  
    // Squeeze can share same kernel with reshape
    auto fn = std::make_unique<ops::ReshapeLayer>();
@@ -449,8 +479,8 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
  
    const auto beta = node.param().beta;
  
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
  
    auto fn = std::make_unique<ops::SoftMaxLayer>();
  
@@ -459,21 +489,22 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
    _return_fn = std::move(fn);
  }
  
-void KernelGenerator::visit(const ir::operation::Add &node)
+void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
  {
    const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
+  const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
+  const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
  
    const auto activation = node.param().activation;
  
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+  auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
  
-  auto fn = std::make_unique<ops::AddLayer>();
+  auto fn = std::make_unique<ops::BinaryArithmeticLayer>();
  
-  fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
+  fn->configure(lhs_tensor, rhs_tensor, ofm_tensor, activation,
+                convertArithmeticType(node.param().arithmetic_type));
  
    _return_fn = std::move(fn);
  }
@@ -484,9 +515,9 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
    const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
    const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
  
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+  auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
  
    auto comparison_type = node.param().comparison_type;
  
@@ -503,9 +534,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
    const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
    const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
  
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto indices_tensor = _tensor_builder->portableAt(indices_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto indices_tensor = _tensor_reg->getPortableTensor(indices_index).get();
  
    const auto backend_layout = output_tensor->layout();
    UNUSED_RELEASE(backend_layout);
@@ -534,46 +565,6 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
    _return_fn = std::move(fn);
  }
  
-void KernelGenerator::visit(const ir::operation::Sub &node)
-{
-  // The same as Add
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
-
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
-  auto fn = std::make_unique<ops::SubLayer>();
-
-  fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Mul &node)
-{
-  // The same as Add
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
-
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
-  auto fn = std::make_unique<ops::MulLayer>();
-
-  fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
  void KernelGenerator::visit(const ir::operation::OneHot &node)
  {
    const auto output_index{node.getOutputs().at(0)};
@@ -584,11 +575,11 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
  
    const auto axis = node.param().axis;
  
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto indices_tensor = _tensor_builder->portableAt(indices_index).get();
-  auto depth_tensor = _tensor_builder->portableAt(depth_index).get();
-  auto onvalue_tensor = _tensor_builder->portableAt(onvalue_index).get();
-  auto offvalue_tensor = _tensor_builder->portableAt(offvalue_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto indices_tensor = _tensor_reg->getPortableTensor(indices_index).get();
+  auto depth_tensor = _tensor_reg->getPortableTensor(depth_index).get();
+  auto onvalue_tensor = _tensor_reg->getPortableTensor(onvalue_index).get();
+  auto offvalue_tensor = _tensor_reg->getPortableTensor(offvalue_index).get();
  
    assert(indices_tensor->data_type() == OperandType::INT32);
    assert(axis <= static_cast<int>(indices_tensor->num_dimensions()));
@@ -600,34 +591,14 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
    _return_fn = std::move(fn);
  }
  
-void KernelGenerator::visit(const ir::operation::Div &node)
-{
-  // The same as Add
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
-
-  const auto activation = node.param().activation;
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
-  auto fn = std::make_unique<ops::DivLayer>();
-
-  fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
  void KernelGenerator::visit(const ir::operation::Einsum &node)
  {
    const auto ofm_index{node.getOutputs().at(0)};
  
-  auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
    std::vector<const IPortableTensor *> input_tensors;
    for (auto &ifm_idx : node.getInputs())
-    input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+    input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
  
    const auto equation = node.param().equation;
  
@@ -648,7 +619,7 @@ void KernelGenerator::visit(const ir::operation::Custom &node)
        const auto &operand = _ctx.at(idx);
        // TODO make sure using `_current_op_seq_layout` is correct for custom operations
        types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
-      auto in_tensor = _tensor_builder->portableAt(idx);
+      auto in_tensor = _tensor_reg->getPortableTensor(idx);
        tensors.emplace_back(in_tensor);
      }
    };
@@ -666,64 +637,68 @@ void KernelGenerator::visit(const ir::operation::Custom &node)
    _return_fn = std::move(fn);
  }
  
-void KernelGenerator::visit(const ir::operation::Exp &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
  {
    const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
+  const auto input_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
  
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
  
-  auto fn = std::make_unique<ops::ExpLayer>();
+  auto fn = std::make_unique<ops::ElementwiseActivationLayer>();
  
-  fn->configure(input_tensor, output_tensor);
+  fn->configure(input_tensor, output_tensor, node.param().alpha, node.param().beta,
+                convertElementwiseActivationType(node.param().op_type));
  
    _return_fn = std::move(fn);
  }
  
-void KernelGenerator::visit(const ir::operation::ExpandDims &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
  {
    const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
-  const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
+  const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
+  const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
  
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto axis_tensor = _tensor_builder->portableAt(axis_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
  
-  auto fn = std::make_unique<ops::ExpandDimsLayer>();
+  auto fn = std::make_unique<ops::ElementwiseBinaryLayer>();
  
-  fn->configure(input_tensor, axis_tensor, output_tensor);
+  fn->configure(lhs_tensor, rhs_tensor, output_tensor,
+                convertElementwiseBinaryType(node.param().op_type));
  
    _return_fn = std::move(fn);
  }
  
-void KernelGenerator::visit(const ir::operation::Logistic &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
  {
    const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
+  const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
  
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
  
-  auto fn = std::make_unique<ops::LogisticLayer>();
+  auto fn = std::make_unique<ops::ElementwiseUnaryLayer>();
  
-  fn->configure(input_tensor, output_tensor);
+  fn->configure(input_tensor, output_tensor, convertElementwiseUnaryType(node.param().op_type));
  
    _return_fn = std::move(fn);
  }
  
-void KernelGenerator::visit(const ir::operation::Tanh &node)
+void KernelGenerator::visit(const ir::operation::ExpandDims &node)
  {
    const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
+  const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
+  const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
  
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto axis_tensor = _tensor_reg->getPortableTensor(axis_index).get();
  
-  auto fn = std::make_unique<ops::TanhLayer>();
+  auto fn = std::make_unique<ops::ExpandDimsLayer>();
  
-  fn->configure(input_tensor, output_tensor);
+  fn->configure(input_tensor, axis_tensor, output_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -737,11 +712,11 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
  
    assert(-rank <= axis && axis < rank);
  
-  auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
  
    std::vector<const IPortableTensor *> input_tensors;
    for (auto &ifm_idx : node.getInputs())
-    input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+    input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
  
    auto fn = std::make_unique<ops::PackLayer>();
  
@@ -759,11 +734,11 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
  
    assert(rank == 0 || (-rank <= axis && axis < rank));
  
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
  
    std::vector<IPortableTensor *> output_tensors;
    for (auto &output_idx : node.getOutputs())
-    output_tensors.emplace_back(_tensor_builder->portableAt(output_idx).get());
+    output_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
  
    auto fn = std::make_unique<ops::UnpackLayer>();
  
@@ -781,8 +756,8 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
    const auto output_index{node.getOutputs().at(0)};
    assert(_ctx.at(pad_index).data());
  
-  auto input = _tensor_builder->portableAt(input_index).get();
-  auto output = _tensor_builder->portableAt(output_index).get();
+  auto input = _tensor_reg->getPortableTensor(input_index).get();
+  auto output = _tensor_reg->getPortableTensor(output_index).get();
    auto pad_rank = _ctx.at(pad_index).shape().dim(0);
    auto pad_base = reinterpret_cast<const int32_t *>(_ctx.at(pad_index).data()->base());
  
@@ -801,62 +776,13 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
    _return_fn = std::move(fn);
  }
  
-void KernelGenerator::visit(const ir::operation::Max &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
-  auto fn = std::make_unique<ops::MaxLayer>();
-
-  fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Min &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
-  auto fn = std::make_unique<ops::MinLayer>();
-
-  fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Cast &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
-  auto fn = std::make_unique<ops::CastLayer>();
-
-  fn->configure(ifm_tensor, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
  void KernelGenerator::visit(const ir::operation::Transpose &node)
  {
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
  
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
  
    auto fn = std::make_unique<ops::TransposeLayer>();
  
@@ -872,9 +798,9 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
    const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
  
    const auto keep_dims = node.param().keep_dims;
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto axes_tensor = _tensor_builder->portableAt(axes_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto axes_tensor = _tensor_reg->getPortableTensor(axes_index).get();
  
    if (node.param().reduce_type == ir::operation::Reduce::ReduceType::MEAN)
    {
@@ -895,36 +821,6 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
    }
  }
  
-void KernelGenerator::visit(const ir::operation::ReLU &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(0)};
-
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-
-  auto fn = std::make_unique<ops::ReLULayer>();
-
-  fn->configure(input_tensor, output_tensor);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU6 &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(0)};
-
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-
-  auto fn = std::make_unique<ops::ReLU6Layer>();
-
-  fn->configure(input_tensor, output_tensor);
-
-  _return_fn = std::move(fn);
-}
-
  void KernelGenerator::visit(const ir::operation::Select &node)
  {
    const auto output_index{node.getOutputs().at(0)};
@@ -932,10 +828,10 @@ void KernelGenerator::visit(const ir::operation::Select &node)
    const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
    const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
  
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto condition_tensor = _tensor_builder->portableAt(condition_index).get();
-  auto true_tensor = _tensor_builder->portableAt(true_index).get();
-  auto false_tensor = _tensor_builder->portableAt(false_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto condition_tensor = _tensor_reg->getPortableTensor(condition_index).get();
+  auto true_tensor = _tensor_reg->getPortableTensor(true_index).get();
+  auto false_tensor = _tensor_reg->getPortableTensor(false_index).get();
  
    auto fn = std::make_unique<ops::SelectLayer>();
  
@@ -951,10 +847,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
    const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
    const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
  
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto begins_tensor = _tensor_builder->portableAt(begins_index).get();
-  auto sizes_tensor = _tensor_builder->portableAt(sizes_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto begins_tensor = _tensor_reg->getPortableTensor(begins_index).get();
+  auto sizes_tensor = _tensor_reg->getPortableTensor(sizes_index).get();
  
    auto fn = std::make_unique<ops::SliceLayer>();
  
@@ -971,11 +867,11 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
    const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
    const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
  
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto starts_tensor = _tensor_builder->portableAt(starts_index).get();
-  auto ends_tensor = _tensor_builder->portableAt(ends_index).get();
-  auto strides_tensor = _tensor_builder->portableAt(strides_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto starts_tensor = _tensor_reg->getPortableTensor(starts_index).get();
+  auto ends_tensor = _tensor_reg->getPortableTensor(ends_index).get();
+  auto strides_tensor = _tensor_reg->getPortableTensor(strides_index).get();
  
    auto begin_mask = node.param().begin_mask;
    auto end_mask = node.param().end_mask;
@@ -999,11 +895,11 @@ void KernelGenerator::visit(const ir::operation::Split &node)
    const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
    auto axis_resolved = axis < 0 ? axis + rank : axis;
  
-  auto in_tensor = _tensor_builder->portableAt(input_idx).get();
+  auto in_tensor = _tensor_reg->getPortableTensor(input_idx).get();
  
    std::vector<IPortableTensor *> out_tensors;
    for (auto &output_idx : node.getOutputs())
-    out_tensors.emplace_back(_tensor_builder->portableAt(output_idx).get());
+    out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
  
    auto fn = std::make_unique<ops::SplitLayer>();
  
@@ -1012,73 +908,13 @@ void KernelGenerator::visit(const ir::operation::Split &node)
    _return_fn = std::move(fn);
  }
  
-void KernelGenerator::visit(const ir::operation::Abs &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
-  auto fn = std::make_unique<ops::AbsLayer>();
-
-  fn->configure(ifm_tensor, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Sin &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Sin::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
-  auto fn = std::make_unique<ops::SinLayer>();
-
-  fn->configure(ifm_tensor, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Cos &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Cos::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
-  auto fn = std::make_unique<ops::CosLayer>();
-
-  fn->configure(ifm_tensor, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::RSQRT &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
-  auto fn = std::make_unique<ops::RsqrtLayer>();
-
-  fn->configure(ifm_tensor, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
  void KernelGenerator::visit(const ir::operation::Shape &node)
  {
    const auto ofm_index{node.getOutputs().at(0)};
    const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
  
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
  
    auto fn = std::make_unique<ops::ShapeLayer>();
  
@@ -1097,8 +933,8 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
    auto align_corners = node.param().align_corners;
    auto half_pixel_centers = node.param().half_pixel_centers;
  
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
  
    auto fn = std::make_unique<ops::ResizeBilinearLayer>();
  
@@ -1114,9 +950,9 @@ void KernelGenerator::visit(const ir::operation::Reverse &node)
    const auto input_index{node.getInputs().at(ir::operation::Reverse::INPUT)};
    const auto axis_index{node.getInputs().at(ir::operation::Reverse::AXIS)};
  
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto axis_tensor = _tensor_builder->portableAt(axis_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto axis_tensor = _tensor_reg->getPortableTensor(axis_index).get();
  
    auto fn = std::make_unique<ops::ReverseLayer>();
  
@@ -1125,21 +961,6 @@ void KernelGenerator::visit(const ir::operation::Reverse &node)
    _return_fn = std::move(fn);
  }
  
-void KernelGenerator::visit(const ir::operation::Neg &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
-  auto fn = std::make_unique<ops::NegLayer>();
-
-  fn->configure(ifm_tensor, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
-
  void KernelGenerator::visit(const ir::operation::ArgMax &node)
  {
    const auto output_index{node.getOutputs().at(0)};
@@ -1147,8 +968,8 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
  
    const auto axis = node.param().axis;
  
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
  
    auto fn = std::make_unique<ops::ArgMinMaxLayer>();
  
@@ -1157,81 +978,45 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
    _return_fn = std::move(fn);
  }
  
-void KernelGenerator::visit(const ir::operation::Pow &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
-
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
-  auto fn = std::make_unique<ops::PowLayer>();
-
-  fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Log &node)
+void KernelGenerator::visit(const ir::operation::Pool2D &node)
  {
    const auto ofm_index{node.getOutputs().at(0)};
-  const auto ifm_index{node.getInputs().at(ir::operation::Log::Input::INPUT)};
-
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
-  auto fn = std::make_unique<ops::LogLayer>();
-
-  fn->configure(ifm_tensor, ofm_tensor);
-
-  _return_fn = std::move(fn);
-}
+  const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
  
-void KernelGenerator::visit(const ir::operation::Round &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::Round::INPUT)};
+  const auto kh = node.param().kh;
+  const auto kw = node.param().kw;
+  const auto stride = node.param().stride;
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+  const auto padding =
+      ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+  const auto activation = node.param().activation;
  
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
  
-  auto fn = std::make_unique<ops::RoundLayer>();
+  auto fn = std::make_unique<ops::PoolLayer>();
  
-  fn->configure(input_tensor, output_tensor);
+  fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
+                stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor,
+                convertPoolType(node.param().op_type));
  
    _return_fn = std::move(fn);
  }
  
-void KernelGenerator::visit(const ir::operation::LogicalNot &node)
+void KernelGenerator::visit(const ir::operation::Pow &node)
  {
    const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::LogicalNot::INPUT)};
-
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-
-  auto fn = std::make_unique<ops::LogicalNotLayer>();
-
-  fn->configure(input_tensor, output_tensor);
-
-  _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalOr &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(0)};
-  const auto rhs_index{node.getInputs().at(1)};
+  const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
+  const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
  
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
  
-  auto fn = std::make_unique<ops::LogicalOrLayer>();
+  auto fn = std::make_unique<ops::PowLayer>();
  
-  fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
+  fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
  
    _return_fn = std::move(fn);
  }
@@ -1241,8 +1026,8 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
    const auto output_index{node.getOutputs().at(0)};
    const auto input_index{node.getInputs().at(0)};
  
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto input_alloc = _tensor_builder->portableAt(input_index).get();
+  auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_alloc = _tensor_reg->getPortableTensor(input_index).get();
  
    auto fn = std::make_unique<ops::L2NormLayer>();
  
@@ -1251,35 +1036,36 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
    _return_fn = std::move(fn);
  }
  
-void KernelGenerator::visit(const ir::operation::ZerosLike &node)
+void KernelGenerator::visit(const ir::operation::Range &node)
  {
    const auto output_index{node.getOutputs().at(0)};
-  const auto input_index{node.getInputs().at(ir::operation::ZerosLike::INPUT)};
+  const auto start_index{node.getInputs().at(ir::operation::Range::START)};
+  const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
+  const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
  
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto start_tensor = _tensor_reg->getPortableTensor(start_index).get();
+  auto limit_tensor = _tensor_reg->getPortableTensor(limit_index).get();
+  auto delta_tensor = _tensor_reg->getPortableTensor(delta_index).get();
  
-  auto fn = std::make_unique<ops::ZerosLikeLayer>();
+  auto fn = std::make_unique<ops::RangeLayer>();
  
-  fn->configure(input_tensor, output_tensor);
+  fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
    _return_fn = std::move(fn);
  }
  
-void KernelGenerator::visit(const ir::operation::Range &node)
+void KernelGenerator::visit(const ir::operation::Rank &node)
  {
-  const auto output_index{node.getOutputs().at(0)};
-  const auto start_index{node.getInputs().at(ir::operation::Range::START)};
-  const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
-  const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
  
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto start_tensor = _tensor_builder->portableAt(start_index).get();
-  auto limit_tensor = _tensor_builder->portableAt(limit_index).get();
-  auto delta_tensor = _tensor_builder->portableAt(delta_index).get();
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+  auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
  
-  auto fn = std::make_unique<ops::RangeLayer>();
+  auto fn = std::make_unique<ops::RankLayer>();
+
+  fn->configure(ifm_tensor, ofm_tensor);
  
-  fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
    _return_fn = std::move(fn);
  }
  
@@ -1289,9 +1075,9 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
    const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
    const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
  
-  auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+  auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+  auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
  
    auto fn = std::make_unique<ops::SqDiffLayer>();
  
@@ -1305,9 +1091,9 @@ void KernelGenerator::visit(const ir::operation::Tile &node)
    const auto input_index{node.getInputs().at(ir::operation::Tile::INPUT)};
    const auto multiples_index{node.getInputs().at(ir::operation::Tile::MULTIPLES)};
  
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto multiples_tensor = _tensor_builder->portableAt(multiples_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto multiples_tensor = _tensor_reg->getPortableTensor(multiples_index).get();
  
    auto fn = std::make_unique<ops::TileLayer>();
  
@@ -1322,10 +1108,10 @@ void KernelGenerator::visit(const ir::operation::MatrixBandPart &node)
    const auto num_lower_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_LOWER_DIAG)};
    const auto num_upper_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_UPPER_DIAG)};
  
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto num_lower_tensor = _tensor_builder->portableAt(num_lower_index).get();
-  auto num_upper_tensor = _tensor_builder->portableAt(num_upper_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto num_lower_tensor = _tensor_reg->getPortableTensor(num_lower_index).get();
+  auto num_upper_tensor = _tensor_reg->getPortableTensor(num_upper_index).get();
  
    auto fn = std::make_unique<ops::MatrixBandPartLayer>();
  
@@ -1339,9 +1125,9 @@ void KernelGenerator::visit(const ir::operation::BatchMatMul &node)
    const auto lhs_index{node.getInputs().at(ir::operation::BatchMatMul::LHS)};
    const auto rhs_index{node.getInputs().at(ir::operation::BatchMatMul::RHS)};
  
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
-  auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+  auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
  
    const auto adj_x = node.param().adj_x;
    const auto adj_y = node.param().adj_y;
@@ -1358,9 +1144,9 @@ void KernelGenerator::visit(const ir::operation::BroadcastTo &node)
    const auto input_index{node.getInputs().at(ir::operation::BroadcastTo::INPUT)};
    const auto shape_index{node.getInputs().at(ir::operation::BroadcastTo::SHAPE)};
  
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto shape_tensor = _tensor_builder->portableAt(shape_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto shape_tensor = _tensor_reg->getPortableTensor(shape_index).get();
  
    auto fn = std::make_unique<ops::BroadcastToLayer>();
  
@@ -1373,10 +1159,10 @@ void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node)
  {
    const auto ofm_index{node.getOutputs().at(0)};
  
-  auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
    std::vector<const IPortableTensor *> input_tensors;
    for (auto &ifm_idx : node.getInputs())
-    input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+    input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
  
    const auto epsilon = node.param().epsilon;
    const auto is_training = node.param().is_training;
@@ -1397,8 +1183,8 @@ void KernelGenerator::visit(const ir::operation::LogSoftmax &node)
    const auto beta = node.param().beta;
    const auto axis = node.param().axis;
  
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
  
    auto fn = std::make_unique<ops::LogSoftMaxLayer>();
  
@@ -1414,10 +1200,10 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
    const auto block_shape_index{node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE)};
    const auto padding_index{node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS)};
  
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto block_shape_tensor = _tensor_builder->portableAt(block_shape_index).get();
-  auto padding_tensor = _tensor_builder->portableAt(padding_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto block_shape_tensor = _tensor_reg->getPortableTensor(block_shape_index).get();
+  auto padding_tensor = _tensor_reg->getPortableTensor(padding_index).get();
  
    auto fn = std::make_unique<ops::SpaceToBatchNDLayer>();
  
@@ -1426,29 +1212,14 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
    _return_fn = std::move(fn);
  }
  
-void KernelGenerator::visit(const ir::operation::Quantize &node)
-{
-  const auto input_index{node.getInputs().at(ir::operation::Quantize::Input::INPUT)};
-  const auto output_index{node.getOutputs().at(0)};
-
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
-
-  auto fn = std::make_unique<ops::QuantizeLayer>();
-
-  fn->configure(input_tensor, output_tensor);
-
-  _return_fn = std::move(fn);
-}
-
  void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
  {
    const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
    const auto output_index{node.getOutputs().at(0)};
    auto block_size = node.param().block_size;
  
-  auto input_tensor = _tensor_builder->portableAt(input_index).get();
-  auto output_tensor = _tensor_builder->portableAt(output_index).get();
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
  
    auto fn = std::make_unique<ops::SpaceToDepthLayer>();
  
@@ -1462,9 +1233,9 @@ void KernelGenerator::visit(const ir::operation::StatelessRandomUniform &node)
    const auto shape_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SHAPE)};
    const auto seed_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SEED)};
  
-  auto output_alloc = _tensor_builder->portableAt(output_index).get();
-  auto shape_alloc = _tensor_builder->portableAt(shape_index).get();
-  auto seed_alloc = _tensor_builder->portableAt(seed_index).get();
+  auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
+  auto shape_alloc = _tensor_reg->getPortableTensor(shape_index).get();
+  auto seed_alloc = _tensor_reg->getPortableTensor(seed_index).get();
  
    auto fn = std::make_unique<ops::StatelessRandomUniformLayer>();
  
@@ -1481,13 +1252,13 @@ void KernelGenerator::visit(const ir::operation::SplitV &node)
    const auto size_splits{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
    const auto split_dim{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
  
-  auto in_tensor = _tensor_builder->portableAt(input_idx).get();
-  auto in_size_splits = _tensor_builder->portableAt(size_splits).get();
-  auto in_split_dim = _tensor_builder->portableAt(split_dim).get();
+  auto in_tensor = _tensor_reg->getPortableTensor(input_idx).get();
+  auto in_size_splits = _tensor_reg->getPortableTensor(size_splits).get();
+  auto in_split_dim = _tensor_reg->getPortableTensor(split_dim).get();
  
    std::vector<IPortableTensor *> out_tensors;
    for (auto &output_idx : node.getOutputs())
-    out_tensors.emplace_back(_tensor_builder->portableAt(output_idx).get());
+    out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
  
    auto fn = std::make_unique<ops::SplitVLayer>();
  
diff --git a/runtime/onert/backend/cpu/KernelGenerator.h b/runtime/onert/backend/cpu/KernelGenerator.h

index 40c056a..786e68e 100644 (file)
--- a/runtime/onert/backend/cpu/KernelGenerator.h
+++ b/runtime/onert/backend/cpu/KernelGenerator.h
@@ -19,6 +19,7 @@
  
  #include "ExternalContext.h"
  #include "TensorBuilder.h"
+#include "backend/cpu_common/TensorRegistry.h"
  #include "Tensor.h"
  
  #include <backend/CustomKernelBuilder.h>
@@ -38,6 +39,7 @@ class KernelGenerator : public IKernelGenerator
  public:
    KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
                    const std::shared_ptr<TensorBuilder> &tensor_builder,
+                  const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
                    const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
                    const std::shared_ptr<ExternalContext> &external_context);
  
@@ -46,8 +48,6 @@ public:
    void visit(const ir::OpSequence &) override;
    void visit(const ir::operation::Conv2D &) override;
    void visit(const ir::operation::DepthwiseConv2D &) override;
-  void visit(const ir::operation::MaxPool2D &) override;
-  void visit(const ir::operation::AvgPool2D &) override;
    void visit(const ir::operation::Concat &) override;
    void visit(const ir::operation::Fill &) override;
    void visit(const ir::operation::FullyConnected &) override;
@@ -55,51 +55,35 @@ public:
    void visit(const ir::operation::Squeeze &) override;
    void visit(const ir::operation::Softmax &) override;
    void visit(const ir::operation::Comparison &) override;
-  void visit(const ir::operation::Add &) override;
-  void visit(const ir::operation::Sub &) override;
-  void visit(const ir::operation::Mul &) override;
-  void visit(const ir::operation::Div &) override;
+  void visit(const ir::operation::BinaryArithmetic &) override;
    void visit(const ir::operation::Einsum &) override;
    void visit(const ir::operation::Gather &) override;
    void visit(const ir::operation::Custom &node) override;
-  void visit(const ir::operation::Exp &) override;
+  void visit(const ir::operation::ElementwiseActivation &) override;
+  void visit(const ir::operation::ElementwiseBinary &) override;
+  void visit(const ir::operation::ElementwiseUnary &) override;
    void visit(const ir::operation::ExpandDims &) override;
-  void visit(const ir::operation::Logistic &) override;
    void visit(const ir::operation::Pad &) override;
-  void visit(const ir::operation::Max &) override;
-  void visit(const ir::operation::Min &) override;
-  void visit(const ir::operation::Tanh &) override;
    void visit(const ir::operation::Pack &) override;
    void visit(const ir::operation::Unpack &) override;
    void visit(const ir::operation::OneHot &) override;
-  void visit(const ir::operation::Cast &) override;
    void visit(const ir::operation::Transpose &) override;
    void visit(const ir::operation::Reduce &) override;
-  void visit(const ir::operation::ReLU &) override;
-  void visit(const ir::operation::ReLU6 &) override;
    void visit(const ir::operation::Select &) override;
    void visit(const ir::operation::Slice &) override;
    void visit(const ir::operation::StridedSlice &) override;
    void visit(const ir::operation::Split &) override;
-  void visit(const ir::operation::Abs &) override;
-  void visit(const ir::operation::Cos &) override;
-  void visit(const ir::operation::Sin &) override;
-  void visit(const ir::operation::RSQRT &) override;
    void visit(const ir::operation::Shape &) override;
    void visit(const ir::operation::ResizeBilinear &node) override;
    void visit(const ir::operation::Reverse &) override;
-  void visit(const ir::operation::Neg &) override;
    void visit(const ir::operation::ArgMax &) override;
-  void visit(const ir::operation::Log &) override;
-  void visit(const ir::operation::Round &) override;
+  void visit(const ir::operation::Pool2D &) override;
    void visit(const ir::operation::Pow &) override;
-  void visit(const ir::operation::LogicalNot &) override;
-  void visit(const ir::operation::ZerosLike &) override;
    void visit(const ir::operation::SquaredDifference &) override;
    void visit(const ir::operation::Tile &) override;
-  void visit(const ir::operation::LogicalOr &) override;
    void visit(const ir::operation::L2Normalization &) override;
    void visit(const ir::operation::Range &) override;
+  void visit(const ir::operation::Rank &) override;
    void visit(const ir::operation::MatrixBandPart &) override;
    void visit(const ir::operation::BatchMatMul &) override;
    void visit(const ir::operation::BatchToSpaceND &) override;
@@ -107,7 +91,6 @@ public:
    void visit(const ir::operation::FusedBatchNorm &) override;
    void visit(const ir::operation::LogSoftmax &) override;
    void visit(const ir::operation::SpaceToBatchND &) override;
-  void visit(const ir::operation::Quantize &) override;
    void visit(const ir::operation::SpaceToDepth &) override;
    void visit(const ir::operation::StatelessRandomUniform &) override;
    void visit(const ir::operation::SplitV &) override;
@@ -116,6 +99,7 @@ private:
    const ir::Operands &_ctx;
    const ir::Operations &_operations_ctx;
    std::shared_ptr<TensorBuilder> _tensor_builder;
+  std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
    std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
    ir::Layout _current_op_seq_layout;
    const std::shared_ptr<ExternalContext> _external_context;
diff --git a/runtime/onert/backend/cpu/TensorBuilder.cc b/runtime/onert/backend/cpu/TensorBuilder.cc

index ab8ba57..828d52f 100644 (file)
--- a/runtime/onert/backend/cpu/TensorBuilder.cc
+++ b/runtime/onert/backend/cpu/TensorBuilder.cc
@@ -27,8 +27,8 @@ namespace backend
  namespace cpu
  {
  
-TensorBuilder::TensorBuilder()
-    : _tensor_reg{new cpu_common::TensorRegistry()},
+TensorBuilder::TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg)
+    : _tensor_reg{tensor_reg},
        _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)},
        _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
  {
@@ -57,7 +57,7 @@ void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
    assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
    const auto tensor_info = _tensor_info_map.at(ind);
  
-  if (!at(ind)->is_dynamic())
+  if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
    {
      const auto size = tensor_info.total_size();
      _static_tensor_mgr->claimPlan(ind, size);
@@ -66,7 +66,7 @@ void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
  
  void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
  {
-  if (!at(ind)->is_dynamic())
+  if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
    {
      _static_tensor_mgr->releasePlan(ind);
    }
@@ -85,29 +85,6 @@ void TensorBuilder::allocate()
    //      This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
  }
  
-std::shared_ptr<ITensor> TensorBuilder::tensorAt(const ir::OperandIndex &ind)
-{
-  return _tensor_reg->getITensor(ind);
-}
-
-std::shared_ptr<IPortableTensor> TensorBuilder::portableAt(const ir::OperandIndex &ind)
-{
-  return _tensor_reg->getPortableTensor(ind);
-}
-
-bool TensorBuilder::setMigrantTensor(const ir::OperandIndex &ind,
-                                     const std::shared_ptr<IPortableTensor> &tensor)
-{
-  return _tensor_reg->setMigrantTensor(ind, tensor);
-}
-
-void TensorBuilder::iterate(const IterateFunction &fn) { _static_tensor_mgr->iterate(fn); }
-
-std::shared_ptr<Tensor> TensorBuilder::at(const ir::OperandIndex &ind)
-{
-  return _tensor_reg->getNativeTensor(ind);
-}
-
  std::unique_ptr<ITensorManager> TensorBuilder::releaseStaticTensorManager(void)
  {
    return std::move(_static_tensor_mgr);
diff --git a/runtime/onert/backend/cpu/TensorBuilder.h b/runtime/onert/backend/cpu/TensorBuilder.h

index 6171365..b6d5f09 100644 (file)
--- a/runtime/onert/backend/cpu/TensorBuilder.h
+++ b/runtime/onert/backend/cpu/TensorBuilder.h
@@ -38,9 +38,7 @@ namespace cpu
  class TensorBuilder : public ITensorBuilder
  {
  public:
-  TensorBuilder();
-
-  bool supportDynamicTensor() override { return true; }
+  TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg);
  
    /**
     * @brief     Register tensor information to allocate on CPU backend
@@ -60,34 +58,12 @@ public:
    void allocate() override;
    void postFunctionPrepare() override { /* DO NOTHING */}
  
-  /**
-   * @brief Get tensor with a specific OperandIndex
-   *
-   * @return shared_ptr<ITensor> if a tensor with given OperandIndex exists. nullptr otherwise.
-   */
-  std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) override;
-
-  void iterate(const IterateFunction &fn) override;
-
    std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) override;
  
    IDynamicTensorManager *dynamicTensorManager(void) override { return _dynamic_tensor_mgr.get(); }
  
    std::unique_ptr<ITensorManager> releaseDynamicTensorManager(void) override;
  
-  /**
-   * @brief Get tensor with a specific OperandIndex.
-   * @param ind OperandIndex for the tensor. There must exist a tensor with this ind.
-   *        If not, program will crash with assert or exception.
-   * @return shared_ptr<Tensor>
-   */
-  std::shared_ptr<Tensor> at(const ir::OperandIndex &ind);
-  std::shared_ptr<IPortableTensor> portableAt(const ir::OperandIndex &ind);
-  bool setMigrantTensor(const ir::OperandIndex &ind,
-                        const std::shared_ptr<IPortableTensor> &tensor) override;
-
-  std::shared_ptr<ITensorRegistry> tensorRegistry() override { return _tensor_reg; }
-
  private:
    const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
    std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr;
diff --git a/runtime/onert/backend/cpu/ops/AbsLayer.cc b/runtime/onert/backend/cpu/ops/AbsLayer.cc

deleted file mode 100644 (file)

index 322785a..0000000
--- a/runtime/onert/backend/cpu/ops/AbsLayer.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "AbsLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-AbsLayer::AbsLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void AbsLayer::absFloat32()
-{
-  nnfw::cker::Abs(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                  getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void AbsLayer::absQuant8() { throw std::runtime_error{"NYI"}; }
-
-void AbsLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void AbsLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    absFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    absQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Abs: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/AbsLayer.h b/runtime/onert/backend/cpu/ops/AbsLayer.h

deleted file mode 100644 (file)

index feb5f35..0000000
--- a/runtime/onert/backend/cpu/ops/AbsLayer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_ABSLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_ABSLAYER_H__
-
-#include "backend/IPortableTensor.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class AbsLayer : public ::onert::exec::IFunction
-{
-public:
-  AbsLayer();
-
-public:
-  void absFloat32();
-
-  void absQuant8();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_ABSLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/AddLayer.cc b/runtime/onert/backend/cpu/ops/AddLayer.cc

deleted file mode 100644 (file)

index 3792153..0000000
--- a/runtime/onert/backend/cpu/ops/AddLayer.cc
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "AddLayer.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-void AddLayer::addFloat32()
-{
-  float output_activation_min = 0, output_activation_max = 0;
-  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
-  nnfw::cker::BinaryArithmeticOpParam op_params;
-  op_params.float_activation_max = output_activation_max;
-  op_params.float_activation_min = output_activation_min;
-
-  const bool need_broadcast =
-      nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
-  if (need_broadcast)
-  {
-    nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
-        op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
-        getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
-        getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-    return;
-  }
-
-  nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
-      op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
-      getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
-      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void AddLayer::addInt32()
-{
-  int32_t output_activation_min = 0, output_activation_max = 0;
-  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
-  nnfw::cker::BinaryArithmeticOpParam op_params;
-  op_params.quantized_activation_max = output_activation_max;
-  op_params.quantized_activation_min = output_activation_min;
-
-  const bool need_broadcast =
-      nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
-  if (need_broadcast)
-  {
-    nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
-        op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()),
-        getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()),
-        getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer()));
-    return;
-  }
-
-  nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
-      op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()),
-      getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()),
-      getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer()));
-}
-
-void AddLayer::addQuant8()
-{
-  int32_t output_activation_min, output_activation_max;
-  CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
-                                &output_activation_max);
-  nnfw::cker::BinaryArithmeticOpParam op_params;
-  op_params.quantized_activation_max = output_activation_max;
-  op_params.quantized_activation_min = output_activation_min;
-  // Parameters for scaled quantized computation
-  op_params.left_shift = 20;
-  // Zero-points of input and output tensors
-  op_params.input1_offset = -_lhs->data_offset();
-  op_params.input2_offset = -_rhs->data_offset();
-  op_params.output_offset = _output->data_offset();
-  assert((op_params.input1_offset >= 0) && (op_params.input1_offset <= 255));
-  assert((op_params.input2_offset >= 0) && (op_params.input2_offset <= 255));
-  assert((op_params.output_offset >= 0) && (op_params.output_offset <= 255));
-
-  // Compute normalized scale for _lhs and _rhs values,
-  // and represent in 32-bit fixed point
-  const double norm_max_scale = 2 * std::max(_lhs->data_scale(), _rhs->data_scale());
-  const double real_lhs_scale = _lhs->data_scale() / norm_max_scale;
-  const double real_rhs_scale = _rhs->data_scale() / norm_max_scale;
-  // output scale is used to normalize final result, so we invert the scale here
-  const double real_output_scale =
-      norm_max_scale / (_output->data_scale() * (1 << op_params.left_shift));
-
-  // Represent the scales as fixed int32_t multipliers, and int32_t shifts
-  QuantizeMultiplier(real_lhs_scale, &op_params.input1_multiplier, &op_params.input1_shift);
-  QuantizeMultiplier(real_rhs_scale, &op_params.input2_multiplier, &op_params.input2_shift);
-  QuantizeMultiplier(real_output_scale, &op_params.output_multiplier, &op_params.output_shift);
-
-  // cker quant8 add is not implemented yet
-  const bool need_broadcast =
-      nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
-  if (need_broadcast)
-  {
-    nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
-        op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
-        getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
-        getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-    return;
-  }
-
-  nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
-      op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
-      getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
-      getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void AddLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                         const ir::Activation activation, IPortableTensor *output)
-{
-  assert(lhs != nullptr);
-  assert(rhs != nullptr);
-  assert(output != nullptr);
-
-  _lhs = lhs;
-  _rhs = rhs;
-  _activation = activation;
-  _output = output;
-}
-
-void AddLayer::run()
-{
-  if (_lhs->data_type() == OperandType::FLOAT32)
-  {
-    addFloat32();
-  }
-  else if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    addQuant8();
-  }
-  else if (_output->data_type() == OperandType::INT32)
-  {
-    addInt32();
-  }
-  else
-  {
-    throw std::runtime_error{"Add: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/AddLayer.h b/runtime/onert/backend/cpu/ops/AddLayer.h

deleted file mode 100644 (file)

index 91030d9..0000000
--- a/runtime/onert/backend/cpu/ops/AddLayer.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_ADDLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_ADDLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class AddLayer : public ::onert::exec::IFunction
-{
-public:
-  AddLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
-  {
-    // DO NOTHING
-  }
-
-public:
-  void addFloat32();
-
-  void addQuant8();
-
-  void addInt32();
-
-  void configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                 const ir::Activation activation, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_lhs;
-  const IPortableTensor *_rhs;
-  IPortableTensor *_output;
-
-  ir::Activation _activation{ir::Activation::NONE};
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_ADDLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/AvgPoolLayer.cc b/runtime/onert/backend/cpu/ops/AvgPoolLayer.cc

deleted file mode 100644 (file)

index 9c22c1c..0000000
--- a/runtime/onert/backend/cpu/ops/AvgPoolLayer.cc
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "AvgPoolLayer.h"
-
-#include <cker/operation/AveragePool.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-#define AVGPOOLING_PARAMETERS                            \
-  nnfw::cker::PoolParams op_params;                      \
-  op_params.stride_height = _strideHeight;               \
-  op_params.stride_width = _strideWidth;                 \
-  op_params.filter_height = _kernelHeight;               \
-  op_params.filter_width = _kernelWidth;                 \
-  op_params.padding_values.height = (int8_t)_paddingTop; \
-  op_params.padding_values.width = (int8_t)_paddingLeft;
-
-AvgPoolLayer::AvgPoolLayer()
-    : _input(nullptr), _output(nullptr), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
-      _paddingBottom(0), _strideWidth(0), _strideHeight(0), _kernelWidth(0), _kernelHeight(0),
-      _activation(ir::Activation::NONE)
-{
-  // DO NOTHING
-}
-
-void AvgPoolLayer::averagePoolFloat32()
-{
-  AVGPOOLING_PARAMETERS
-  float output_activation_min = 0, output_activation_max = 0;
-  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
-  op_params.float_activation_min = output_activation_min;
-  op_params.float_activation_max = output_activation_max;
-
-  nnfw::cker::AveragePool(op_params, getTensorShape(_input),
-                          reinterpret_cast<const float *>(_input->buffer()),
-                          getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-void AvgPoolLayer::averagePoolQuant8()
-{
-  AVGPOOLING_PARAMETERS
-  int32_t output_activation_min = 0;
-  int32_t output_activation_max = 0;
-  CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
-                                &output_activation_max);
-  op_params.quantized_activation_min = output_activation_min;
-  op_params.quantized_activation_max = output_activation_max;
-
-  nnfw::cker::AveragePool(op_params, getTensorShape(_input),
-                          reinterpret_cast<const uint8_t *>(_input->buffer()),
-                          getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void AvgPoolLayer::configure(const IPortableTensor *input, const uint32_t paddingLeft,
-                             const uint32_t paddingRight, const uint32_t paddingTop,
-                             const uint32_t paddingBottom, const uint32_t strideWidth,
-                             const uint32_t strideHeight, const uint32_t kernelWidth,
-                             const uint32_t kernelHeight, const ir::Activation activation,
-                             IPortableTensor *output)
-{
-  assert(input != nullptr);
-  assert(output != nullptr);
-
-  _input = input;
-  _paddingLeft = paddingLeft;
-  _paddingRight = paddingRight;
-  _paddingTop = paddingTop;
-  _paddingBottom = paddingBottom;
-  _strideWidth = strideWidth;
-  _strideHeight = strideHeight;
-  _kernelWidth = kernelWidth;
-  _kernelHeight = kernelHeight;
-  _activation = activation;
-  _output = output;
-}
-
-void AvgPoolLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    averagePoolFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    averagePoolQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"AvgPool: unsupported data type"};
-  }
-}
-
-#undef AVGPOOLING_PARAMETERS
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/AvgPoolLayer.h b/runtime/onert/backend/cpu/ops/AvgPoolLayer.h

deleted file mode 100644 (file)

index d4e8f79..0000000
--- a/runtime/onert/backend/cpu/ops/AvgPoolLayer.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_AVGPOOLLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_AVGPOOLLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class AvgPoolLayer : public ::onert::exec::IFunction
-{
-public:
-  AvgPoolLayer();
-
-public:
-  void averagePoolFloat32();
-
-  void averagePoolQuant8();
-
-  void configure(const IPortableTensor *input, const uint32_t paddingLeft,
-                 const uint32_t paddingRight, const uint32_t paddingTop,
-                 const uint32_t paddingBottom, const uint32_t strideWidth,
-                 const uint32_t strideHeight, const uint32_t kernelWidth,
-                 const uint32_t kernelHeight, const ir::Activation activation,
-                 IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-
-  uint32_t _paddingLeft;
-  uint32_t _paddingTop;
-  uint32_t _paddingRight;
-  uint32_t _paddingBottom;
-
-  uint32_t _strideWidth;
-  uint32_t _strideHeight;
-  uint32_t _kernelWidth;
-  uint32_t _kernelHeight;
-
-  ir::Activation _activation;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_AVGPOOLLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc

new file mode 100644 (file)

index 0000000..f50c633
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BinaryArithmeticLayer.h"
+
+#include <cker/operation/BinaryArithmeticOps.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+namespace
+{
+
+template <nnfw::cker::BinaryArithmeticOpType arithmetic_type, typename T>
+void eval(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output,
+          nnfw::cker::BinaryArithmeticOpParam op_params)
+{
+  const bool need_broadcast =
+      nnfw::cker::ProcessBroadcastShapes(getTensorShape(lhs), getTensorShape(rhs), &op_params);
+  if (need_broadcast)
+  {
+    nnfw::cker::BroadcastBinaryArithmeticOp<arithmetic_type>(
+        op_params, getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+        getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
+        reinterpret_cast<T *>(output->buffer()));
+    return;
+  }
+
+  nnfw::cker::BinaryArithmeticOp<arithmetic_type>(
+      op_params, getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+      getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
+      reinterpret_cast<T *>(output->buffer()));
+}
+
+template <nnfw::cker::BinaryArithmeticOpType arithmetic_type>
+std::function<void(const IPortableTensor *, const IPortableTensor *, IPortableTensor *)>
+generateKernelGeneric(const IPortableTensor *lhs, const ir::Activation activation,
+                      nnfw::cker::BinaryArithmeticOpParam op_params)
+{
+  switch (lhs->data_type())
+  {
+    case OperandType::FLOAT32:
+    {
+      float output_activation_min = 0, output_activation_max = 0;
+      CalculateActivationRange(activation, &output_activation_min, &output_activation_max);
+      op_params.float_activation_max = output_activation_max;
+      op_params.float_activation_min = output_activation_min;
+      return std::bind(&eval<arithmetic_type, float>, std::placeholders::_1, std::placeholders::_2,
+                       std::placeholders::_3, op_params);
+      break;
+    }
+    case OperandType::INT32:
+    {
+      int32_t output_activation_min = 0, output_activation_max = 0;
+      CalculateActivationRange(activation, &output_activation_min, &output_activation_max);
+      op_params.quantized_activation_max = output_activation_max;
+      op_params.quantized_activation_min = output_activation_min;
+      return std::bind(eval<arithmetic_type, int32_t>, std::placeholders::_1, std::placeholders::_2,
+                       std::placeholders::_3, op_params);
+      break;
+    }
+    default:
+      throw std::runtime_error{"BinaryArithmetic(generic): Unsupported data type"};
+  }
+}
+
+void setAddOrSubQuant8Params(const IPortableTensor *lhs, const IPortableTensor *rhs,
+                             IPortableTensor *output, ir::Activation activation,
+                             nnfw::cker::BinaryArithmeticOpParam *params)
+{
+  int32_t output_activation_min, output_activation_max;
+  CalculateActivationRangeUint8(activation, output, &output_activation_min, &output_activation_max);
+  nnfw::cker::BinaryArithmeticOpParam &op_params = *params;
+  op_params.quantized_activation_max = output_activation_max;
+  op_params.quantized_activation_min = output_activation_min;
+  // Parameters for scaled quantized computation
+  op_params.left_shift = 20;
+  // Zero-points of input and output tensors
+  op_params.input1_offset = -lhs->data_offset();
+  op_params.input2_offset = -rhs->data_offset();
+  op_params.output_offset = output->data_offset();
+  assert((op_params.input1_offset >= 0) && (op_params.input1_offset <= 255));
+  assert((op_params.input2_offset >= 0) && (op_params.input2_offset <= 255));
+  assert((op_params.output_offset >= 0) && (op_params.output_offset <= 255));
+
+  // Compute normalized scale for _lhs and _rhs values,
+  // and represent in 32-bit fixed point
+  const double norm_max_scale = 2 * std::max(lhs->data_scale(), rhs->data_scale());
+  const double real_lhs_scale = lhs->data_scale() / norm_max_scale;
+  const double real_rhs_scale = rhs->data_scale() / norm_max_scale;
+  // output scale is used to normalize final result, so we invert the scale here
+  const double real_output_scale =
+      norm_max_scale / (output->data_scale() * (1 << op_params.left_shift));
+
+  // Represent the scales as fixed int32_t multipliers, and int32_t shifts
+  QuantizeMultiplier(real_lhs_scale, &op_params.input1_multiplier, &op_params.input1_shift);
+  QuantizeMultiplier(real_rhs_scale, &op_params.input2_multiplier, &op_params.input2_shift);
+  QuantizeMultiplier(real_output_scale, &op_params.output_multiplier, &op_params.output_shift);
+}
+
+void setMulQuant8Params(const IPortableTensor *lhs, const IPortableTensor *rhs,
+                        IPortableTensor *output, ir::Activation activation,
+                        nnfw::cker::BinaryArithmeticOpParam *params)
+{
+  int32_t output_activation_min, output_activation_max;
+  CalculateActivationRangeUint8(activation, output, &output_activation_min, &output_activation_max);
+  nnfw::cker::BinaryArithmeticOpParam &op_params = *params;
+
+  op_params.quantized_activation_max = output_activation_max;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.input1_offset = -lhs->data_offset();
+  op_params.input2_offset = -rhs->data_offset();
+  op_params.output_offset = output->data_offset();
+
+  double real_multiplier = lhs->data_scale() * rhs->data_scale() / output->data_scale();
+  QuantizeMultiplier(real_multiplier, &op_params.output_multiplier, &op_params.output_shift);
+}
+
+} // namespace
+
+void BinaryArithmeticLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
+                                      IPortableTensor *output, const ir::Activation activation,
+                                      const ArithmeticType arithmetic_type)
+{
+  assert(lhs != nullptr);
+  assert(rhs != nullptr);
+  assert(output != nullptr);
+
+  _lhs = lhs;
+  _rhs = rhs;
+  _output = output;
+
+  nnfw::cker::BinaryArithmeticOpParam op_params;
+  switch (arithmetic_type)
+  {
+    case ArithmeticType::kAdd:
+      if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+      {
+        setAddOrSubQuant8Params(_lhs, _rhs, _output, activation, &op_params);
+        _kernel = std::bind(&eval<nnfw::cker::BinaryArithmeticOpType::ADD, uint8_t>,
+                            std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+                            op_params);
+      }
+      else
+      {
+        _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::ADD>(_lhs, activation,
+                                                                                 op_params);
+      }
+      break;
+    case ArithmeticType::kSub:
+      if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+      {
+        setAddOrSubQuant8Params(_lhs, _rhs, _output, activation, &op_params);
+        op_params.input2_multiplier *= -1;
+        _kernel = std::bind(&eval<nnfw::cker::BinaryArithmeticOpType::SUB, uint8_t>,
+                            std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+                            op_params);
+      }
+      else
+      {
+        _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::SUB>(_lhs, activation,
+                                                                                 op_params);
+      }
+      break;
+    case ArithmeticType::kMul:
+      if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+      {
+        nnfw::cker::BinaryArithmeticOpParam op_params;
+        setMulQuant8Params(_lhs, _rhs, _output, activation, &op_params);
+        _kernel = std::bind(&eval<nnfw::cker::BinaryArithmeticOpType::MUL, uint8_t>,
+                            std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+                            op_params);
+      }
+      else
+      {
+        _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::MUL>(_lhs, activation,
+                                                                                 op_params);
+      }
+      break;
+    case ArithmeticType::kDiv:
+      if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+      {
+        throw std::runtime_error{
+            "BinaryArithmetic(Div): Div operation does not support quantization"};
+      }
+      else if (_lhs->data_type() == OperandType::INT32)
+      {
+        throw std::runtime_error{"BinaryArithmetic(Div): Unsupported data type"};
+      }
+      else
+      {
+        _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::DIV>(_lhs, activation,
+                                                                                 op_params);
+      }
+      break;
+    default:
+      throw std::runtime_error{"BinaryArithmetic: Unsupported BinaryArithmetic type"};
+  }
+}
+
+void BinaryArithmeticLayer::run() { _kernel(_lhs, _rhs, _output); }
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/DivLayer.h b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.h

similarity index 65%

rename from runtime/onert/backend/cpu/ops/DivLayer.h

rename to runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.h

index 9411be7..d6b33ad 100644 (file)
--- a/runtime/onert/backend/cpu/ops/DivLayer.h
+++ b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.h
@@ -14,8 +14,8 @@
   * limitations under the License.
   */
  
-#ifndef __ONERT_BACKEND_CPU_OPS_DIVLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_DIVLAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_BINARYARITHMETICLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_BINARYARITHMETICLAYER_H__
  
  #include <backend/IPortableTensor.h>
  #include "OperationUtils.h"
@@ -31,21 +31,25 @@ namespace cpu
  namespace ops
  {
  
-class DivLayer : public ::onert::exec::IFunction
+enum class ArithmeticType
+{
+  kAdd,
+  kSub,
+  kMul,
+  kDiv,
+};
+
+class BinaryArithmeticLayer : public ::onert::exec::IFunction
  {
  public:
-  DivLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
+  BinaryArithmeticLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
    {
      // DO NOTHING
    }
  
  public:
-  void divFloat32();
-
-  void divQuant8();
-
-  void configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                 const ir::Activation activation, IPortableTensor *output);
+  void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output,
+                 const ir::Activation activation, const ArithmeticType arithmetic_type);
  
    void run() override;
  
@@ -54,7 +58,7 @@ private:
    const IPortableTensor *_rhs;
    IPortableTensor *_output;
  
-  ir::Activation _activation{ir::Activation::NONE};
+  std::function<void(const IPortableTensor *, const IPortableTensor *, IPortableTensor *)> _kernel;
  };
  
  } // namespace ops
@@ -62,4 +66,4 @@ private:
  } // namespace backend
  } // namespace onert
  
-#endif // __ONERT_BACKEND_CPU_OPS_DIVLAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_BINARYARITHMETICLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/CastLayer.cc b/runtime/onert/backend/cpu/ops/CastLayer.cc

deleted file mode 100644 (file)

index 4975156..0000000
--- a/runtime/onert/backend/cpu/ops/CastLayer.cc
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "CastLayer.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-CastLayer::CastLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void CastLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-template <typename FromT, typename ToT> void CastLayer::castTensor(const FromT *in, ToT *out)
-{
-  auto input_shape = getTensorShape(_input);
-  auto output_shape = getTensorShape(_output);
-  const auto num_elements = MatchingFlatSize(input_shape, output_shape);
-
-  std::transform(in, in + num_elements, out, [](FromT a) { return static_cast<ToT>(a); });
-}
-
-template <typename FromT> void CastLayer::castPtr(const FromT *in, DataPtr out)
-{
-  switch (_output->data_type())
-  {
-    case ir::DataType::FLOAT32:
-      castTensor(in, out.f);
-      return;
-    case ir::DataType::INT32:
-      castTensor(in, out.i32);
-      return;
-    case ir::DataType::UINT32:
-      castTensor(in, out.u32);
-      return;
-    case ir::DataType::UINT8:
-      castTensor(in, out.u8);
-      return;
-    case ir::DataType::BOOL8:
-      castTensor(in, out.b);
-      return;
-    case ir::DataType::INT64:
-      castTensor(in, out.i64);
-      return;
-    default:
-      throw std::runtime_error("Not supported output type" +
-                               std::to_string((int)_output->data_type()));
-  }
-}
-
-void CastLayer::run()
-{
-  auto input_buf = _input->buffer();
-  auto output_buf = _output->buffer();
-  const auto in = *reinterpret_cast<const DataPtr *>(&input_buf);
-  auto out = *reinterpret_cast<DataPtr *>(&output_buf);
-
-  switch (_input->data_type())
-  {
-    case ir::DataType::FLOAT32:
-      castPtr(in.f, out);
-      return;
-    case ir::DataType::INT32:
-      castPtr(in.i32, out);
-      return;
-    case ir::DataType::UINT32:
-      castPtr(in.u32, out);
-      return;
-    case ir::DataType::UINT8:
-      castPtr(in.u8, out);
-      return;
-    case ir::DataType::BOOL8:
-      castPtr(in.b, out);
-      return;
-    case ir::DataType::INT64:
-      castPtr(in.i64, out);
-      return;
-    default:
-      throw std::runtime_error("Cast: unsupported data type" +
-                               std::to_string((int)_input->data_type()));
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/CastLayer.h b/runtime/onert/backend/cpu/ops/CastLayer.h

deleted file mode 100644 (file)

index 290c722..0000000
--- a/runtime/onert/backend/cpu/ops/CastLayer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_CASTLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_CASTLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class CastLayer : public ::onert::exec::IFunction
-{
-public:
-  CastLayer();
-
-public:
-  template <typename FromT, typename ToT> void castTensor(const FromT *in, ToT *out);
-  template <typename FromT> void castPtr(const FromT *in, DataPtr out);
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_CASTLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc

index 2d5bbef..c057267 100644 (file)
--- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
@@ -31,7 +31,8 @@ namespace ops
  ConvolutionLayer::ConvolutionLayer()
      : _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
        _paddingType(ir::PaddingType::EXPLICIT), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
-      _paddingBottom(0), _strideWidth(0), _strideHeight(0), _activation(ir::Activation::NONE),
+      _paddingBottom(0), _strideWidth(0), _strideHeight(0), _dilationWidthFactor(1),
+      _dilationHeightFactor(1), _activation(ir::Activation::NONE),
        _conv_kernel(new nnfw::cker::Conv()), _prepare(false)
  {
    // DO NOTHING
@@ -50,8 +51,8 @@ void ConvolutionLayer::convFloat32()
    op_params.padding_values.height = _paddingTop;
    op_params.stride_width = _strideWidth;
    op_params.stride_height = _strideHeight;
-  op_params.dilation_width_factor = 1;
-  op_params.dilation_height_factor = 1;
+  op_params.dilation_width_factor = _dilationWidthFactor;
+  op_params.dilation_height_factor = _dilationHeightFactor;
    op_params.float_activation_min = output_activation_min;
    op_params.float_activation_max = output_activation_max;
  
@@ -78,8 +79,8 @@ void ConvolutionLayer::convQuant8()
    nnfw::cker::ConvParams op_params;
    op_params.stride_width = _strideWidth;
    op_params.stride_height = _strideHeight;
-  op_params.dilation_width_factor = 1;
-  op_params.dilation_height_factor = 1;
+  op_params.dilation_width_factor = _dilationWidthFactor;
+  op_params.dilation_height_factor = _dilationHeightFactor;
    op_params.padding_type = getPaddingType(_paddingType);
    op_params.padding_values.width = _paddingLeft;
    op_params.padding_values.height = _paddingTop;
@@ -104,6 +105,8 @@ void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTe
                                   const uint32_t paddingLeft, const uint32_t paddingRight,
                                   const uint32_t paddingTop, const uint32_t paddingBottom,
                                   const uint32_t strideWidth, const uint32_t strideHeight,
+                                 const uint32_t dilationWidthFactor,
+                                 const uint32_t dilationHeightFactor,
                                   const ir::Activation activation, IPortableTensor *output)
  {
    _input = input;
@@ -116,6 +119,8 @@ void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTe
    _paddingBottom = paddingBottom;
    _strideWidth = strideWidth;
    _strideHeight = strideHeight;
+  _dilationWidthFactor = dilationWidthFactor;
+  _dilationHeightFactor = dilationHeightFactor;
    _activation = activation;
    _output = output;
  }
@@ -145,7 +150,8 @@ void ConvolutionLayer::run()
      param_padding.param.bottom = _paddingBottom;
  
      const auto padding =
-        ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height);
+        ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+                             _dilationWidthFactor, _dilationHeightFactor);
  
      _paddingLeft = padding.left;
      _paddingRight = padding.right;
@@ -176,7 +182,8 @@ void ConvolutionLayer::prepare()
    {
      bool is_transposed = false;
      kernel.prepare(getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()),
-                   getPaddingType(_paddingType), is_transposed);
+                   getPaddingType(_paddingType), is_transposed, _dilationWidthFactor,
+                   _dilationHeightFactor);
  
      // Decrease reference of _kernel(weights) only when _kernel is constant
      if (is_transposed)
diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.h b/runtime/onert/backend/cpu/ops/ConvolutionLayer.h

index 2833387..398892e 100644 (file)
--- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.h
+++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.h
@@ -56,7 +56,8 @@ public:
                   const IPortableTensor *bias, ir::PaddingType _paddingType,
                   const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
                   const uint32_t paddingBottom, const uint32_t strideWidth,
-                 const uint32_t strideHeight, const ir::Activation activation,
+                 const uint32_t strideHeight, const uint32_t dilationWidthFactor,
+                 const uint32_t dilationHeightFactor, const ir::Activation activation,
                   IPortableTensor *output);
  
    void run() override;
@@ -77,6 +78,8 @@ private:
  
    uint32_t _strideWidth;
    uint32_t _strideHeight;
+  uint32_t _dilationWidthFactor;
+  uint32_t _dilationHeightFactor;
  
    ir::Activation _activation;
  
diff --git a/runtime/onert/backend/cpu/ops/CosLayer.cc b/runtime/onert/backend/cpu/ops/CosLayer.cc

deleted file mode 100644 (file)

index 9417019..0000000
--- a/runtime/onert/backend/cpu/ops/CosLayer.cc
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "CosLayer.h"
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-CosLayer::CosLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void CosLayer::cosFloat32()
-{
-  nnfw::cker::Cos(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                  getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void CosLayer::cosQuant8() { throw std::runtime_error{"NYI"}; }
-
-void CosLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void CosLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    cosFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    cosQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Cos: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/CosLayer.h b/runtime/onert/backend/cpu/ops/CosLayer.h

deleted file mode 100644 (file)

index 1fadef7..0000000
--- a/runtime/onert/backend/cpu/ops/CosLayer.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_COSLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_COSLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class CosLayer : public ::onert::exec::IFunction
-{
-public:
-  CosLayer();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  void cosFloat32();
-  void cosQuant8();
-
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_COSLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/DivLayer.cc b/runtime/onert/backend/cpu/ops/DivLayer.cc

deleted file mode 100644 (file)

index 556c55e..0000000
--- a/runtime/onert/backend/cpu/ops/DivLayer.cc
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DivLayer.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-void DivLayer::divFloat32()
-{
-  float output_activation_min = 0, output_activation_max = 0;
-  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
-  nnfw::cker::BinaryArithmeticOpParam op_params;
-  op_params.float_activation_max = output_activation_max;
-  op_params.float_activation_min = output_activation_min;
-
-  const bool requires_broadcast = !HaveSameShapes(_lhs, _rhs);
-  if (requires_broadcast)
-  {
-    nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::DIV>(
-        op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
-        getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
-        getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-  }
-  else
-  {
-    nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::DIV>(
-        op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
-        getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
-        getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-  }
-}
-
-void DivLayer::divQuant8()
-{
-  int32_t output_activation_min, output_activation_max;
-  CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
-                                &output_activation_max);
-  // op_params.quantized_activation_max = output_activation_max;
-  // op_params.quantized_activation_min = output_activation_min;
-
-  // cker quant8 div is not implemented yet
-  throw std::runtime_error{"Div NYI for quantized"};
-}
-
-void DivLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                         const ir::Activation activation, IPortableTensor *output)
-{
-  _lhs = lhs;
-  _rhs = rhs;
-  _activation = activation;
-  _output = output;
-}
-
-void DivLayer::run()
-{
-  if (_output->data_type() == OperandType::FLOAT32)
-  {
-    divFloat32();
-  }
-  else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    divQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Div: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc

new file mode 100644 (file)

index 0000000..c1d6317
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ElementwiseActivationLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/Logistic.h>
+#include <cker/operation/ReLU.h>
+#include <cker/operation/ReLU6.h>
+#include <cker/operation/Tanh.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+ElementwiseActivationLayer::ElementwiseActivationLayer()
+    : _input(nullptr), _output(nullptr), _kernel()
+{
+  // DO NOTHING
+}
+
+void ElementwiseActivationLayer::PopulateLookupTable(const ElementwiseActivationType op_type)
+{
+  const auto input_scale = static_cast<double>(_input->data_scale());
+  const auto input_zero_point = static_cast<int32_t>(_input->data_offset());
+  const auto output_scale = static_cast<double>(_output->data_scale());
+  const auto output_zero_point = static_cast<int32_t>(_output->data_offset());
+  const float inverse_scale = 1 / output_scale;
+  int32_t maxval = std::numeric_limits<uint8_t>::max();
+  int32_t minval = std::numeric_limits<uint8_t>::min();
+  for (int32_t val = minval; val <= maxval; ++val)
+  {
+    const float dequantized = input_scale * (val - input_zero_point);
+    float transformed = 0.f;
+    if (op_type == ElementwiseActivationType::kTanh)
+    {
+      transformed = std::tanh(dequantized);
+    }
+    else if (op_type == ElementwiseActivationType::kLogistic)
+    {
+      transformed = 1.0f / (1.0f + std::exp(-dequantized));
+    }
+    else
+    {
+      throw std::runtime_error("ElementwiseActivationLayer : unsupported activation type");
+    }
+    const float rescaled = std::round(transformed * inverse_scale);
+    const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
+    _table[val] = static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval));
+  }
+}
+
+void ElementwiseActivationLayer::EvalUsingLookupTable(const IPortableTensor *input,
+                                                      IPortableTensor *output)
+{
+  const int size = MatchingFlatSize(getTensorShape(input), getTensorShape(output));
+  const uint8_t *input_data = reinterpret_cast<const uint8_t *>(input->buffer());
+  uint8_t *output_data = reinterpret_cast<uint8_t *>(output->buffer());
+
+  for (int i = 0; i < size; ++i)
+  {
+    output_data[i] = _table[input_data[i]];
+  }
+}
+
+void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortableTensor *output,
+                                           float alpha, float beta,
+                                           ElementwiseActivationType op_type)
+{
+  _input = input;
+  _output = output;
+
+  switch (op_type)
+  {
+    case ElementwiseActivationType::kLogistic:
+      if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+      {
+        PopulateLookupTable(op_type);
+        _kernel = std::bind(&ElementwiseActivationLayer::EvalUsingLookupTable, this,
+                            std::placeholders::_1, std::placeholders::_2);
+      }
+      else if (_input->data_type() == OperandType::FLOAT32)
+      {
+        _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+          nnfw::cker::Logistic(getTensorShape(input),
+                               reinterpret_cast<const float *>(input->buffer()),
+                               getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+        };
+      }
+      else
+      {
+        throw std::runtime_error{"ElementwiseActivationLayer(Logistic): unsupported data type"};
+      }
+      break;
+    case ElementwiseActivationType::kReLU:
+      if (_input->data_type() == OperandType::FLOAT32)
+      {
+        if (alpha == std::numeric_limits<float>::infinity() && beta == 0.f)
+        {
+          _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+            nnfw::cker::ReLU(getTensorShape(input),
+                             reinterpret_cast<const float *>(input->buffer()),
+                             getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+          };
+        }
+        else if (alpha == 6.f && beta == 0.f)
+        {
+          _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+            nnfw::cker::ReLU6(getTensorShape(input),
+                              reinterpret_cast<const float *>(input->buffer()),
+                              reinterpret_cast<float *>(output->buffer()));
+          };
+        }
+        else
+        {
+          throw std::runtime_error(
+              "ElementwiseActivationLayer : This layer suppports only ReLU(0-inf) and ReLU6(0-6)");
+        }
+      }
+      else
+      {
+        throw std::runtime_error{"ElementwiseActivationLayer(ReLU): unsupported data type"};
+      }
+      break;
+    case ElementwiseActivationType::kTanh:
+      if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+      {
+        PopulateLookupTable(op_type);
+        _kernel = std::bind(&ElementwiseActivationLayer::EvalUsingLookupTable, this,
+                            std::placeholders::_1, std::placeholders::_2);
+      }
+      else if (_input->data_type() == OperandType::FLOAT32)
+      {
+        _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+          nnfw::cker::Tanh(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                           getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+        };
+      }
+      else
+      {
+        throw std::runtime_error{"ElementwiseActivationLayer(Logistic): unsupported data type"};
+      }
+      break;
+    default:
+      throw std::runtime_error("ElementwiseActivationLayer: unsupported op type");
+  }
+}
+
+void ElementwiseActivationLayer::run() { _kernel(_input, _output); }
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/TanhLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h

similarity index 60%

rename from runtime/onert/backend/cpu/ops/TanhLayer.h

rename to runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h

index 35a1840..3ef5800 100644 (file)
--- a/runtime/onert/backend/cpu/ops/TanhLayer.h
+++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h
@@ -14,8 +14,8 @@
   * limitations under the License.
   */
  
-#ifndef __ONERT_BACKEND_CPU_OPS_TANHLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_TANHLAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_ElementwiseActivationLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_ElementwiseActivationLAYER_H__
  
  #include <backend/IPortableTensor.h>
  
@@ -30,26 +30,33 @@ namespace cpu
  namespace ops
  {
  
-class TanhLayer : public ::onert::exec::IFunction
+enum class ElementwiseActivationType
  {
-public:
-  TanhLayer();
+  kLogistic,
+  kReLU,
+  kTanh
+};
  
+class ElementwiseActivationLayer : public ::onert::exec::IFunction
+{
  public:
-  void tanhFloat32();
+  ElementwiseActivationLayer();
  
-  void tanhQuant8();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
+public:
+  void configure(const IPortableTensor *input, IPortableTensor *output, float alpha, float beta,
+                 const ElementwiseActivationType op_type);
  
    void run() override;
  
-  void PopulateLookupTable();
+  void PopulateLookupTable(const ElementwiseActivationType op_type);
+
+  void EvalUsingLookupTable(const IPortableTensor *input, IPortableTensor *output);
  
  private:
    const IPortableTensor *_input;
    IPortableTensor *_output;
    uint8_t _table[256];
+  std::function<void(const IPortableTensor *input, IPortableTensor *output)> _kernel;
  };
  
  } // namespace ops
@@ -57,4 +64,4 @@ private:
  } // namespace backend
  } // namespace onert
  
-#endif // __ONERT_BACKEND_CPU_OPS_TANHLAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_ElementwiseActivationLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc

new file mode 100644 (file)

index 0000000..ea3c1e7
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ElementwiseBinaryLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/LogicalOr.h>
+#include <cker/operation/MaxMin.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+namespace
+{
+template <typename T>
+void logicalOrGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs,
+                      IPortableTensor *output)
+{
+  if (!HaveSameShapes(lhs, rhs))
+  {
+    nnfw::cker::LogicalOrBroadcast<T>(
+        getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), getTensorShape(rhs),
+        reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
+        reinterpret_cast<T *>(output->buffer()));
+  }
+  else
+  {
+    nnfw::cker::LogicalOrElementwise<T>(
+        getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+        reinterpret_cast<const T *>(rhs->buffer()), reinterpret_cast<T *>(output->buffer()));
+  }
+}
+
+template <typename T>
+void maximumGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output)
+{
+  nnfw::cker::Max<T>(getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+                     getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+                     getTensorShape(output), reinterpret_cast<T *>(output->buffer()));
+}
+
+template <typename T>
+void minimumGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output)
+{
+  nnfw::cker::Min<T>(getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+                     getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+                     getTensorShape(output), reinterpret_cast<T *>(output->buffer()));
+}
+
+bool haveSameQauntInfo(const IPortableTensor *lhs, const IPortableTensor *rhs,
+                       const IPortableTensor *output)
+{
+  return (lhs->data_scale() == rhs->data_scale() && lhs->data_scale() == output->data_scale()) &&
+         (lhs->data_offset() == rhs->data_offset() && lhs->data_offset() == output->data_offset());
+}
+} // namespace
+
+void ElementwiseBinaryLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
+                                       IPortableTensor *output, const ElementwiseBinaryType op_type)
+{
+  assert(lhs != nullptr);
+  assert(rhs != nullptr);
+  assert(output != nullptr);
+
+  _lhs = lhs;
+  _rhs = rhs;
+  _output = output;
+
+  switch (op_type)
+  {
+    case ElementwiseBinaryType::kLogicalOr:
+      if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8))
+      {
+        _kernel = logicalOrGeneric<bool>;
+      }
+      else
+      {
+        throw std::runtime_error{"LogicalOr: Unsupported data type"};
+      }
+      break;
+    case ElementwiseBinaryType::kMax:
+      if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+      {
+        if (!haveSameQauntInfo(_lhs, _rhs, _output))
+        {
+          throw std::runtime_error("Max NYI for quantized");
+        }
+        _kernel = maximumGeneric<uint8_t>;
+      }
+      else if (_lhs->data_type() == OperandType::FLOAT32)
+      {
+        _kernel = maximumGeneric<float>;
+      }
+      else
+      {
+        throw std::runtime_error{"Max: unsupported data type"};
+      }
+      break;
+    case ElementwiseBinaryType::kMin:
+      if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+      {
+        if (!haveSameQauntInfo(_lhs, _rhs, _output))
+        {
+          throw std::runtime_error("Min NYI for quantized");
+        }
+        _kernel = minimumGeneric<uint8_t>;
+      }
+      else if (_lhs->data_type() == OperandType::INT32)
+      {
+        _kernel = minimumGeneric<int32_t>;
+      }
+      else if (_lhs->data_type() == OperandType::FLOAT32)
+      {
+        _kernel = minimumGeneric<float>;
+      }
+      else
+      {
+        throw std::runtime_error{"Min: unsupported data type"};
+      }
+      break;
+    default:
+      throw std::runtime_error{"ElementwiseBinary: Unsupported ElementwiseBinary type"};
+  }
+}
+
+void ElementwiseBinaryLayer::run() { _kernel(_lhs, _rhs, _output); }
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MaxLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.h

similarity index 65%

rename from runtime/onert/backend/cpu/ops/MaxLayer.h

rename to runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.h

index ed8dc5b..052747a 100644 (file)
--- a/runtime/onert/backend/cpu/ops/MaxLayer.h
+++ b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.h
@@ -14,8 +14,8 @@
   * limitations under the License.
   */
  
-#ifndef __ONERT_BACKEND_CPU_OPS_MAXLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_MAXLAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_ELEMENTWISEBINARYLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_ELEMENTWISEBINARYLAYER_H__
  
  #include <backend/IPortableTensor.h>
  
@@ -30,20 +30,25 @@ namespace cpu
  namespace ops
  {
  
-class MaxLayer : public ::onert::exec::IFunction
+enum class ElementwiseBinaryType
+{
+  kLogicalAnd,
+  kLogicalOr,
+  kMax,
+  kMin,
+};
+
+class ElementwiseBinaryLayer : public ::onert::exec::IFunction
  {
  public:
-  MaxLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
+  ElementwiseBinaryLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
    {
      // DO NOTHING
    }
  
  public:
-  template <typename T> void maximum();
-
-  void maxQuant8();
-
-  void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output);
+  void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output,
+                 const ElementwiseBinaryType op_type);
  
    void run() override;
  
@@ -51,6 +56,7 @@ private:
    const IPortableTensor *_lhs;
    const IPortableTensor *_rhs;
    IPortableTensor *_output;
+  std::function<void(const IPortableTensor *, const IPortableTensor *, IPortableTensor *)> _kernel;
  };
  
  } // namespace ops
@@ -58,4 +64,4 @@ private:
  } // namespace backend
  } // namespace onert
  
-#endif // __ONERT_BACKEND_CPU_OPS_MAXLAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_ELEMENTWISEBINARYLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc

new file mode 100644 (file)

index 0000000..f8f89ab
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc
@@ -0,0 +1,336 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ElementwiseUnaryLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/Elementwise.h>
+#include <cker/operation/Erf.h>
+#include <cker/operation/Exp.h>
+#include <cker/operation/LogicalNot.h>
+#include <cker/operation/Quantize.h>
+#include <cker/operation/Round.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+namespace
+{
+void absFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Abs(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+template <typename FromT>
+void castPtr(const FromT *in, DataPtr out, int num_elements, ir::DataType data_type_out)
+{
+  switch (data_type_out)
+  {
+    case ir::DataType::FLOAT32:
+      std::transform(in, in + num_elements, out.f, [](FromT a) { return static_cast<float>(a); });
+      return;
+    case ir::DataType::INT32:
+      std::transform(in, in + num_elements, out.i32,
+                     [](FromT a) { return static_cast<int32_t>(a); });
+      return;
+    case ir::DataType::UINT32:
+      std::transform(in, in + num_elements, out.u32,
+                     [](FromT a) { return static_cast<uint32_t>(a); });
+      return;
+    case ir::DataType::UINT8:
+      std::transform(in, in + num_elements, out.u8,
+                     [](FromT a) { return static_cast<uint8_t>(a); });
+      return;
+    case ir::DataType::BOOL8:
+      std::transform(in, in + num_elements, out.b, [](FromT a) { return static_cast<bool>(a); });
+      return;
+    case ir::DataType::INT64:
+      std::transform(in, in + num_elements, out.i64,
+                     [](FromT a) { return static_cast<int64_t>(a); });
+      return;
+    default:
+      throw std::runtime_error("Cast: Not supported output type" +
+                               std::to_string((int)data_type_out));
+  }
+}
+
+void cast(const IPortableTensor *input, IPortableTensor *output)
+{
+  auto input_buf = input->buffer();
+  auto output_buf = output->buffer();
+  const auto in = *reinterpret_cast<const DataPtr *>(&input_buf);
+  auto out = *reinterpret_cast<DataPtr *>(&output_buf);
+
+  auto input_shape = getTensorShape(input);
+  auto output_shape = getTensorShape(output);
+  const auto num_elements = MatchingFlatSize(input_shape, output_shape);
+
+  switch (input->data_type())
+  {
+    case ir::DataType::FLOAT32:
+      castPtr(in.f, out, num_elements, output->data_type());
+      return;
+    case ir::DataType::INT32:
+      castPtr(in.i32, out, num_elements, output->data_type());
+      return;
+    case ir::DataType::UINT32:
+      castPtr(in.u32, out, num_elements, output->data_type());
+      return;
+    case ir::DataType::UINT8:
+      castPtr(in.u8, out, num_elements, output->data_type());
+      return;
+    case ir::DataType::BOOL8:
+      castPtr(in.b, out, num_elements, output->data_type());
+      return;
+    case ir::DataType::INT64:
+      castPtr(in.i64, out, num_elements, output->data_type());
+      return;
+    default:
+      throw std::runtime_error("Cast: unsupported data type" +
+                               std::to_string((int)input->data_type()));
+  }
+}
+
+void cosFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Cos(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void expFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Exp(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void erfFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Erf(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void logFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Log(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void logicalNot(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::LogicalNot(getTensorShape(input), reinterpret_cast<const bool *>(input->buffer()),
+                         getTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
+}
+
+void negFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Neg(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+template <typename InputT, typename OutputT>
+void affineQuantize(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Quantize(getTensorShape(input), reinterpret_cast<const InputT *>(input->buffer()),
+                       getTensorShape(output), reinterpret_cast<OutputT *>(output->buffer()),
+                       output->data_scale(), output->data_offset());
+}
+
+void roundFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Round(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                    getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void rsqrtFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Rsqrt(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                    getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void sinFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  nnfw::cker::Sin(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+                  getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+template <typename T> void zerosLikeFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+  if (!HaveSameShapes(input, output))
+    throw std::runtime_error{"ZerosLike: input and output shape don't match."};
+
+  auto element_size = getTensorShape(input).FlatSize();
+
+  memset(reinterpret_cast<T *>(output->buffer()), 0, element_size * sizeof(T));
+}
+} // namespace
+
+void ElementwiseUnaryLayer::configure(const IPortableTensor *input, IPortableTensor *output,
+                                      const ElementwiseUnaryType op_type)
+{
+  assert(input != nullptr);
+  assert(output != nullptr);
+
+  _input = input;
+  _output = output;
+
+  switch (op_type)
+  {
+    case ElementwiseUnaryType::kAbs:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = absFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"Abs: Unsupported data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kCast:
+      _kernel = cast;
+      break;
+    case ElementwiseUnaryType::kCos:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = cosFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"Cos: Unsupported data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kExp:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = expFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"Exp: Unsupported data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kErf:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = erfFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"Exp: Unsupported data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kLog:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = logFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"Log: Unsupported  data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kLogicalNot:
+      if ((input->data_type() == OperandType::BOOL8))
+      {
+        _kernel = logicalNot;
+      }
+      else
+      {
+        throw std::runtime_error{"LogicalNot: Unsupported  data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kNeg:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = negFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"Neg: Unsupported  data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kQuantize:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = affineQuantize<float, uint8_t>;
+      }
+      else
+      {
+        throw std::runtime_error{"Quantize: Unsupported  data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kRound:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = roundFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"Round: Unsupported  data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kRSqrt:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = rsqrtFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"RSqrt: Unsupported  data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kSin:
+      if ((input->data_type() == OperandType::FLOAT32))
+      {
+        _kernel = sinFloat32;
+      }
+      else
+      {
+        throw std::runtime_error{"Sin: Unsupported  data type"};
+      }
+      break;
+    case ElementwiseUnaryType::kZerosLike:
+      if (input->data_type() == OperandType::FLOAT32)
+      {
+        _kernel = zerosLikeFloat32<float>;
+      }
+      else if (input->data_type() == OperandType::INT32)
+      {
+        _kernel = zerosLikeFloat32<int32_t>;
+      }
+      else
+      {
+        throw std::runtime_error{"ZerosLike: Unsupported data type"};
+      }
+      break;
+    default:
+      throw std::runtime_error{"ElementwiseBinary: Unsupported ElementwiseBinary type"};
+  }
+}
+
+void ElementwiseUnaryLayer::run() { _kernel(_input, _output); }
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ReLU6Layer.h b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h

similarity index 62%

rename from runtime/onert/backend/cpu/ops/ReLU6Layer.h

rename to runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h

index 994d17a..7496838 100644 (file)
--- a/runtime/onert/backend/cpu/ops/ReLU6Layer.h
+++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h
@@ -14,8 +14,8 @@
   * limitations under the License.
   */
  
-#ifndef __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_ELEMENTWISEUNARYLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_ELEMENTWISEUNARYLAYER_H__
  
  #include <backend/IPortableTensor.h>
  
@@ -30,23 +30,41 @@ namespace cpu
  namespace ops
  {
  
-class ReLU6Layer : public ::onert::exec::IFunction
+enum class ElementwiseUnaryType
  {
-public:
-  ReLU6Layer();
+  kAbs,
+  kCast,
+  kCos,
+  kErf,
+  kExp,
+  kLog,
+  kLogicalNot,
+  kNeg,
+  kQuantize,
+  kRound,
+  kRSqrt,
+  kSin,
+  kZerosLike
+};
  
+class ElementwiseUnaryLayer : public ::onert::exec::IFunction
+{
  public:
-  void relu6Float32();
+  ElementwiseUnaryLayer() : _input(nullptr), _output(nullptr), _kernel()
+  {
+    // DO NOTHING
+  }
  
-  void relu6Quant8();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
+public:
+  void configure(const IPortableTensor *input, IPortableTensor *output,
+                 const ElementwiseUnaryType op_type);
  
    void run() override;
  
  private:
    const IPortableTensor *_input;
    IPortableTensor *_output;
+  std::function<void(const IPortableTensor *, IPortableTensor *)> _kernel;
  };
  
  } // namespace ops
@@ -54,4 +72,4 @@ private:
  } // namespace backend
  } // namespace onert
  
-#endif // __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_ELEMENTWISEUNARYLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ExpLayer.cc b/runtime/onert/backend/cpu/ops/ExpLayer.cc

deleted file mode 100644 (file)

index 4dbec9c..0000000
--- a/runtime/onert/backend/cpu/ops/ExpLayer.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ExpLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Exp.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-ExpLayer::ExpLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void ExpLayer::expFloat32()
-{
-  nnfw::cker::Exp(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                  getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void ExpLayer::expQuant8()
-{
-  // cker quant8 exp is not implemented yet
-  throw std::runtime_error{"NYI"};
-}
-
-void ExpLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void ExpLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    expFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    expQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Exp: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/LogLayer.cc b/runtime/onert/backend/cpu/ops/LogLayer.cc

deleted file mode 100644 (file)

index 307c15b..0000000
--- a/runtime/onert/backend/cpu/ops/LogLayer.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LogLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-LogLayer::LogLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void LogLayer::logFloat32()
-{
-  nnfw::cker::Log(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                  getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void LogLayer::logQuant8() { throw std::runtime_error{"NYI"}; }
-
-void LogLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void LogLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    logFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    logQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Log: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/LogLayer.h b/runtime/onert/backend/cpu/ops/LogLayer.h

deleted file mode 100644 (file)

index 2f6b4b5..0000000
--- a/runtime/onert/backend/cpu/ops/LogLayer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_LOGLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_LOGLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class LogLayer : public ::onert::exec::IFunction
-{
-public:
-  LogLayer();
-
-public:
-  void logFloat32();
-
-  void logQuant8();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_LOGLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc

index 06dde4f..1d7ee6c 100644 (file)
--- a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc
+++ b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc
@@ -34,6 +34,16 @@ LogSoftMaxLayer::LogSoftMaxLayer() : _input(nullptr), _output(nullptr), _beta(0.
    // DO NOTHING
  }
  
+void LogSoftMaxLayer::PopulateLookupTable(const float kBeta)
+{
+  const float scale = -_input->data_scale() * kBeta;
+  const int32_t max_uint8 = std::numeric_limits<uint8_t>::max();
+  for (int32_t val = 0; val <= max_uint8; ++val)
+  {
+    _table[max_uint8 - val] = expf(scale * val);
+  }
+}
+
  void LogSoftMaxLayer::logsoftmaxFloat32()
  {
    nnfw::cker::SoftmaxParams op_params;
@@ -46,7 +56,15 @@ void LogSoftMaxLayer::logsoftmaxFloat32()
  
  void LogSoftMaxLayer::logsoftmaxQuant8()
  {
-  // NYI
+  nnfw::cker::SoftmaxParams op_params;
+  op_params.beta = _beta;
+  op_params.axis = _axis;
+  op_params.table = _table;
+  op_params.zero_point = _output->data_offset();
+  op_params.scale = _output->data_scale();
+  nnfw::cker::LogSoftmax(op_params, _input->data_scale(), getTensorShape(_input),
+                         reinterpret_cast<const uint8_t *>(_input->buffer()),
+                         getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
  }
  
  void LogSoftMaxLayer::configure(const IPortableTensor *input, const float beta, const int axis,
@@ -56,6 +74,10 @@ void LogSoftMaxLayer::configure(const IPortableTensor *input, const float beta,
    _output = output;
    _beta = beta;
    _axis = axis;
+  if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+  {
+    PopulateLookupTable(_beta);
+  }
  }
  
  void LogSoftMaxLayer::run()
@@ -66,7 +88,7 @@ void LogSoftMaxLayer::run()
    }
    else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
    {
-    throw std::runtime_error{"LogSoftmax : NYI"};
+    logsoftmaxQuant8();
    }
    else
    {
diff --git a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h

index ba9deca..1533f33 100644 (file)
--- a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h
+++ b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h
@@ -45,12 +45,15 @@ public:
  
    void run();
  
+  void PopulateLookupTable(const float kBeta);
+
  private:
    const IPortableTensor *_input;
    IPortableTensor *_output;
  
    float _beta;
    int _axis;
+  float _table[256];
  };
  
  } // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/LogicalNotLayer.cc b/runtime/onert/backend/cpu/ops/LogicalNotLayer.cc

deleted file mode 100644 (file)

index f2192c1..0000000
--- a/runtime/onert/backend/cpu/ops/LogicalNotLayer.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LogicalNotLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/LogicalNot.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-LogicalNotLayer::LogicalNotLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void LogicalNotLayer::logicalNotBool8()
-{
-  nnfw::cker::LogicalNot(getTensorShape(_input), reinterpret_cast<const bool *>(_input->buffer()),
-                         getTensorShape(_output), reinterpret_cast<bool *>(_output->buffer()));
-}
-
-void LogicalNotLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void LogicalNotLayer::run()
-{
-  if (_input->data_type() == OperandType::BOOL8)
-  {
-    logicalNotBool8();
-  }
-  else
-  {
-    throw std::runtime_error{"LogicalNot: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/LogicalNotLayer.h b/runtime/onert/backend/cpu/ops/LogicalNotLayer.h

deleted file mode 100644 (file)

index 5543cca..0000000
--- a/runtime/onert/backend/cpu/ops/LogicalNotLayer.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_LOGICALNOTLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_LOGICALNOTLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class LogicalNotLayer : public ::onert::exec::IFunction
-{
-public:
-  LogicalNotLayer();
-
-public:
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  void logicalNotBool8();
-
-private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_LOGICALNOTLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/LogicalOrLayer.cc b/runtime/onert/backend/cpu/ops/LogicalOrLayer.cc

deleted file mode 100644 (file)

index 5b7c9f6..0000000
--- a/runtime/onert/backend/cpu/ops/LogicalOrLayer.cc
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LogicalOrLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/LogicalOr.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-void LogicalOrLayer::lorBool8()
-{
-  if (!HaveSameShapes(_lhs, _rhs))
-  {
-    nnfw::cker::LogicalOrBroadcast<bool>(
-        getTensorShape(_lhs), reinterpret_cast<const bool *>(_lhs->buffer()), getTensorShape(_rhs),
-        reinterpret_cast<const bool *>(_rhs->buffer()), getTensorShape(_output),
-        reinterpret_cast<bool *>(_output->buffer()));
-  }
-  else
-  {
-    nnfw::cker::LogicalOrElementwise<bool>(getTensorShape(_lhs),
-                                           reinterpret_cast<const bool *>(_lhs->buffer()),
-                                           reinterpret_cast<const bool *>(_rhs->buffer()),
-                                           reinterpret_cast<bool *>(_output->buffer()));
-  }
-}
-
-void LogicalOrLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                               IPortableTensor *output)
-{
-  assert(lhs != nullptr);
-  assert(rhs != nullptr);
-  assert(output != nullptr);
-
-  _lhs = lhs;
-  _rhs = rhs;
-  _output = output;
-}
-
-void LogicalOrLayer::run()
-{
-  if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8))
-  {
-    lorBool8();
-  }
-  else
-  {
-    throw std::runtime_error{"LogicalOr: Unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/LogicalOrLayer.h b/runtime/onert/backend/cpu/ops/LogicalOrLayer.h

deleted file mode 100644 (file)

index efaf396..0000000
--- a/runtime/onert/backend/cpu/ops/LogicalOrLayer.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_LOGICAL_OR_LAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_LOGICAL_OR_LAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class LogicalOrLayer : public ::onert::exec::IFunction
-{
-public:
-  LogicalOrLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
-  {
-    // Nothing
-  }
-
-public:
-  void configure(const IPortableTensor *_lhs, const IPortableTensor *_rhs, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  void lorBool8();
-
-private:
-  const IPortableTensor *_lhs;
-  const IPortableTensor *_rhs;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_LOGICAL_OR_LAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/LogisticLayer.cc b/runtime/onert/backend/cpu/ops/LogisticLayer.cc

deleted file mode 100644 (file)

index 140ab4d..0000000
--- a/runtime/onert/backend/cpu/ops/LogisticLayer.cc
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LogisticLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Logistic.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-LogisticLayer::LogisticLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void LogisticLayer::populateLookupTable()
-{
-  const auto input_scale = static_cast<double>(_input->data_scale());
-  const auto input_zero_point = static_cast<int32_t>(_input->data_offset());
-  const auto output_scale = static_cast<double>(_output->data_scale());
-  const auto output_zero_point = static_cast<int32_t>(_output->data_offset());
-  const float inverse_scale = 1 / output_scale;
-  int32_t maxval = std::numeric_limits<uint8_t>::max();
-  int32_t minval = std::numeric_limits<uint8_t>::min();
-  for (int32_t val = minval; val <= maxval; ++val)
-  {
-    const float dequantized = input_scale * (val - input_zero_point);
-    const float transformed = 1.0f / (1.0f + std::exp(-dequantized));
-    const float rescaled = std::round(transformed * inverse_scale);
-    const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
-    _table[val] = static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval));
-  }
-}
-
-void LogisticLayer::logisticFloat32()
-{
-  nnfw::cker::Logistic(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                       getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void LogisticLayer::logisticQuant8()
-{
-  const int size = MatchingFlatSize(getTensorShape(_input), getTensorShape(_output));
-  const uint8_t *input_data = reinterpret_cast<const uint8_t *>(_input->buffer());
-  uint8_t *output_data = reinterpret_cast<uint8_t *>(_output->buffer());
-
-  for (int i = 0; i < size; ++i)
-  {
-    output_data[i] = _table[input_data[i]];
-  }
-}
-
-void LogisticLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-
-  if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    if (_output->data_scale() != 1.f / 256)
-    {
-      throw std::runtime_error{"incorrect scale for output"};
-    }
-    populateLookupTable();
-  }
-}
-
-void LogisticLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    logisticFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    logisticQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Logistic: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/LogisticLayer.h b/runtime/onert/backend/cpu/ops/LogisticLayer.h

deleted file mode 100644 (file)

index cac7793..0000000
--- a/runtime/onert/backend/cpu/ops/LogisticLayer.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_LOGISTICLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_LOGISTICLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class LogisticLayer : public ::onert::exec::IFunction
-{
-public:
-  LogisticLayer();
-
-public:
-  void logisticFloat32();
-
-  void logisticQuant8();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-  void populateLookupTable();
-
-  void run() override;
-
-private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-
-  uint8_t _table[256];
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_LOGISTICLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/MaxLayer.cc b/runtime/onert/backend/cpu/ops/MaxLayer.cc

deleted file mode 100644 (file)

index 9631983..0000000
--- a/runtime/onert/backend/cpu/ops/MaxLayer.cc
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MaxLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/MaxMin.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-template <typename T> void MaxLayer::maximum()
-{
-  nnfw::cker::Max<T>(getTensorShape(_lhs), reinterpret_cast<const T *>(_lhs->buffer()),
-                     getTensorShape(_rhs), reinterpret_cast<const T *>(_rhs->buffer()),
-                     getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()));
-}
-
-void MaxLayer::maxQuant8()
-{
-  if (_lhs->data_scale() == _rhs->data_scale() && _lhs->data_scale() == _output->data_scale())
-  {
-    if (_lhs->data_offset() == _rhs->data_offset() && _lhs->data_offset() == _output->data_offset())
-    {
-      return nnfw::cker::Max<uint8_t>(
-          getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
-          getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
-          getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-    }
-  }
-  throw std::runtime_error("Max NYI for quantized");
-}
-
-void MaxLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                         IPortableTensor *output)
-{
-  assert(lhs != nullptr);
-  assert(rhs != nullptr);
-  assert(output != nullptr);
-
-  _lhs = lhs;
-  _rhs = rhs;
-  _output = output;
-}
-
-void MaxLayer::run()
-{
-  if (_lhs->data_type() == OperandType::FLOAT32)
-  {
-    maximum<float>();
-  }
-  else if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    maxQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Max: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MaxPoolLayer.cc b/runtime/onert/backend/cpu/ops/MaxPoolLayer.cc

deleted file mode 100644 (file)

index 1e983b4..0000000
--- a/runtime/onert/backend/cpu/ops/MaxPoolLayer.cc
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MaxPoolLayer.h"
-
-#include <cker/operation/MaxPool.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-#define MAXPOOLING_PARAMETERS                            \
-  nnfw::cker::PoolParams op_params;                      \
-  op_params.stride_height = _strideHeight;               \
-  op_params.stride_width = _strideWidth;                 \
-  op_params.filter_height = _kernelHeight;               \
-  op_params.filter_width = _kernelWidth;                 \
-  op_params.padding_values.height = (int8_t)_paddingTop; \
-  op_params.padding_values.width = (int8_t)_paddingLeft;
-
-MaxPoolLayer::MaxPoolLayer()
-    : _input(nullptr), _output(nullptr), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
-      _paddingBottom(0), _strideWidth(0), _strideHeight(0), _kernelWidth(0), _kernelHeight(0),
-      _activation(ir::Activation::NONE)
-{
-  // DO NOTHING
-}
-
-void MaxPoolLayer::maxPoolFloat32()
-{
-  MAXPOOLING_PARAMETERS
-  float output_activation_min = 0, output_activation_max = 0;
-  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
-  op_params.float_activation_min = output_activation_min;
-  op_params.float_activation_max = output_activation_max;
-
-  nnfw::cker::MaxPool(op_params, getTensorShape(_input),
-                      reinterpret_cast<const float *>(_input->buffer()), getTensorShape(_output),
-                      reinterpret_cast<float *>(_output->buffer()));
-}
-void MaxPoolLayer::maxPoolQuant8()
-{
-  MAXPOOLING_PARAMETERS
-  int32_t output_activation_min = 0;
-  int32_t output_activation_max = 0;
-  CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
-                                &output_activation_max);
-  op_params.quantized_activation_min = output_activation_min;
-  op_params.quantized_activation_max = output_activation_max;
-
-  nnfw::cker::MaxPool(op_params, getTensorShape(_input),
-                      reinterpret_cast<const uint8_t *>(_input->buffer()), getTensorShape(_output),
-                      reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void MaxPoolLayer::configure(const IPortableTensor *input, const uint32_t paddingLeft,
-                             const uint32_t paddingRight, const uint32_t paddingTop,
-                             const uint32_t paddingBottom, const uint32_t strideWidth,
-                             const uint32_t strideHeight, const uint32_t kernelWidth,
-                             const uint32_t kernelHeight, const ir::Activation activation,
-                             IPortableTensor *output)
-{
-  _input = input;
-  _paddingLeft = paddingLeft;
-  _paddingRight = paddingRight;
-  _paddingTop = paddingTop;
-  _paddingBottom = paddingBottom;
-  _strideWidth = strideWidth;
-  _strideHeight = strideHeight;
-  _kernelWidth = kernelWidth;
-  _kernelHeight = kernelHeight;
-  _activation = activation;
-  _output = output;
-}
-
-void MaxPoolLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    maxPoolFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    maxPoolQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"MaxPool: unsupported data type"};
-  }
-}
-
-#undef MAXPOOLING_PARAMETERS
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MinLayer.cc b/runtime/onert/backend/cpu/ops/MinLayer.cc

deleted file mode 100644 (file)

index 2085967..0000000
--- a/runtime/onert/backend/cpu/ops/MinLayer.cc
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MinLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/MaxMin.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-template <typename T> void MinLayer::minimum()
-{
-  nnfw::cker::Min<T>(getTensorShape(_lhs), reinterpret_cast<const T *>(_lhs->buffer()),
-                     getTensorShape(_rhs), reinterpret_cast<const T *>(_rhs->buffer()),
-                     getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()));
-}
-
-void MinLayer::minQuant8()
-{
-  if (_lhs->data_scale() == _rhs->data_scale() && _lhs->data_scale() == _output->data_scale())
-  {
-    if (_lhs->data_offset() == _rhs->data_offset() && _lhs->data_offset() == _output->data_offset())
-    {
-      return nnfw::cker::Min<uint8_t>(
-          getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
-          getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
-          getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-    }
-  }
-  throw std::runtime_error("Min NYI for quantized");
-}
-
-void MinLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                         IPortableTensor *output)
-{
-  assert(lhs != nullptr);
-  assert(rhs != nullptr);
-  assert(output != nullptr);
-
-  _lhs = lhs;
-  _rhs = rhs;
-  _output = output;
-}
-
-void MinLayer::run()
-{
-  if (_lhs->data_type() == OperandType::FLOAT32)
-  {
-    minimum<float>();
-  }
-  else if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    minQuant8();
-  }
-  else if (_lhs->data_type() == OperandType::INT32)
-  {
-    minimum<int32_t>();
-  }
-  else
-  {
-    throw std::runtime_error{"Min: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MinLayer.h b/runtime/onert/backend/cpu/ops/MinLayer.h

deleted file mode 100644 (file)

index 9bd114e..0000000
--- a/runtime/onert/backend/cpu/ops/MinLayer.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_MINLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_MINLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class MinLayer : public ::onert::exec::IFunction
-{
-public:
-  MinLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
-  {
-    // DO NOTHING
-  }
-
-public:
-  template <typename T> void minimum();
-
-  void minQuant8();
-
-  void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_lhs;
-  const IPortableTensor *_rhs;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_MINLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/MulLayer.cc b/runtime/onert/backend/cpu/ops/MulLayer.cc

deleted file mode 100644 (file)

index eef73ed..0000000
--- a/runtime/onert/backend/cpu/ops/MulLayer.cc
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MulLayer.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-void MulLayer::mulFloat32()
-{
-  float output_activation_min = 0, output_activation_max = 0;
-  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
-  nnfw::cker::BinaryArithmeticOpParam op_params;
-  op_params.float_activation_max = output_activation_max;
-  op_params.float_activation_min = output_activation_min;
-
-  const bool need_broadcast =
-      nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
-  if (need_broadcast)
-  {
-    nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>(
-        op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
-        getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
-        getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-    return;
-  }
-
-  nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>(
-      op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
-      getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
-      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void MulLayer::mulQuant8()
-{
-  int32_t output_activation_min, output_activation_max;
-  CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
-                                &output_activation_max);
-  nnfw::cker::BinaryArithmeticOpParam op_params;
-
-  op_params.quantized_activation_max = output_activation_max;
-  op_params.quantized_activation_min = output_activation_min;
-  op_params.input1_offset = -_lhs->data_offset();
-  op_params.input2_offset = -_rhs->data_offset();
-  op_params.output_offset = _output->data_offset();
-
-  double real_multiplier = _lhs->data_scale() * _rhs->data_scale() / _output->data_scale();
-  QuantizeMultiplier(real_multiplier, &op_params.output_multiplier, &op_params.output_shift);
-
-  const bool need_broadcast =
-      nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
-  if (need_broadcast)
-  {
-    nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>(
-        op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
-        getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
-        getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-    return;
-  }
-
-  nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>(
-      op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
-      getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
-      getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void MulLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                         const ir::Activation activation, IPortableTensor *output)
-{
-  _lhs = lhs;
-  _rhs = rhs;
-  _activation = activation;
-  _output = output;
-}
-
-void MulLayer::run()
-{
-  if (_output->data_type() == OperandType::FLOAT32)
-  {
-    mulFloat32();
-  }
-  else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    mulQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Mul: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MulLayer.h b/runtime/onert/backend/cpu/ops/MulLayer.h

deleted file mode 100644 (file)

index 2c4a988..0000000
--- a/runtime/onert/backend/cpu/ops/MulLayer.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_MULLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_MULLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class MulLayer : public ::onert::exec::IFunction
-{
-public:
-  MulLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
-  {
-    // DO NOTHING
-  }
-
-public:
-  void mulFloat32();
-
-  void mulQuant8();
-
-  void configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                 const ir::Activation activation, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_lhs;
-  const IPortableTensor *_rhs;
-  IPortableTensor *_output;
-
-  ir::Activation _activation{ir::Activation::NONE};
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_MULLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/NegLayer.cc b/runtime/onert/backend/cpu/ops/NegLayer.cc

deleted file mode 100644 (file)

index 2cb95b7..0000000
--- a/runtime/onert/backend/cpu/ops/NegLayer.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "NegLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-NegLayer::NegLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void NegLayer::negFloat32()
-{
-  nnfw::cker::Neg(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                  getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void NegLayer::negQuant8() { throw std::runtime_error{"NYI"}; }
-
-void NegLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void NegLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    negFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    negQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Neg: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/NegLayer.h b/runtime/onert/backend/cpu/ops/NegLayer.h

deleted file mode 100644 (file)

index addf84e..0000000
--- a/runtime/onert/backend/cpu/ops/NegLayer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_NEGLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_NEGLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class NegLayer : public ::onert::exec::IFunction
-{
-public:
-  NegLayer();
-
-public:
-  void negFloat32();
-
-  void negQuant8();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_NEGLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/PoolLayer.cc b/runtime/onert/backend/cpu/ops/PoolLayer.cc

new file mode 100644 (file)

index 0000000..85d02a7
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/PoolLayer.cc
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PoolLayer.h"
+
+#include <cker/operation/AveragePool.h>
+#include <cker/operation/MaxPool.h>
+
+#include <unordered_map>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+namespace
+{
+template <typename T>
+void avgPool2D(const nnfw::cker::PoolParams &params, const IPortableTensor *input,
+               IPortableTensor *output)
+{
+  nnfw::cker::AveragePool<T>(params, getTensorShape(input),
+                             reinterpret_cast<const T *>(input->buffer()), getTensorShape(output),
+                             reinterpret_cast<T *>(output->buffer()));
+}
+
+template <typename T>
+void maxPool2D(const nnfw::cker::PoolParams &params, const IPortableTensor *input,
+               IPortableTensor *output)
+{
+  nnfw::cker::MaxPool<T>(params, getTensorShape(input),
+                         reinterpret_cast<const T *>(input->buffer()), getTensorShape(output),
+                         reinterpret_cast<T *>(output->buffer()));
+}
+
+template <typename T>
+std::function<void(const IPortableTensor *, IPortableTensor *)>
+generateKernelGeneric(const nnfw::cker::PoolParams &params, PoolType op_type)
+{
+  if (op_type == PoolType::kAvg)
+  {
+    return std::bind(&avgPool2D<T>, params, std::placeholders::_1, std::placeholders::_2);
+  }
+  else if (op_type == PoolType::kMax)
+  {
+    return std::bind(&maxPool2D<T>, params, std::placeholders::_1, std::placeholders::_2);
+  }
+  else
+  {
+    throw std::runtime_error{"Pool: unsupported pool type"};
+  }
+}
+} // namespace
+
+PoolLayer::PoolLayer() : _input(nullptr), _output(nullptr), _kernel()
+{
+  // DO NOTHING
+}
+
+#define POOLING_PARAMETERS                              \
+  nnfw::cker::PoolParams op_params;                     \
+  op_params.stride_height = strideHeight;               \
+  op_params.stride_width = strideWidth;                 \
+  op_params.filter_height = kernelHeight;               \
+  op_params.filter_width = kernelWidth;                 \
+  op_params.padding_values.height = (int8_t)paddingTop; \
+  op_params.padding_values.width = (int8_t)paddingLeft;
+
+void PoolLayer::configure(const IPortableTensor *input, const uint32_t paddingLeft, const uint32_t,
+                          const uint32_t paddingTop, const uint32_t, const uint32_t strideWidth,
+                          const uint32_t strideHeight, const uint32_t kernelWidth,
+                          const uint32_t kernelHeight, const ir::Activation activation,
+                          IPortableTensor *output, const PoolType op_type)
+{
+  assert(input != nullptr);
+  assert(output != nullptr);
+
+  _input = input;
+  _output = output;
+
+  POOLING_PARAMETERS
+  if (_input->data_type() == OperandType::FLOAT32)
+  {
+    float output_activation_min = 0;
+    float output_activation_max = 0;
+    CalculateActivationRange<float>(activation, &output_activation_min, &output_activation_max);
+    op_params.float_activation_min = output_activation_min;
+    op_params.float_activation_max = output_activation_max;
+
+    _kernel = generateKernelGeneric<float>(op_params, op_type);
+  }
+  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+  {
+    int32_t output_activation_min = 0;
+    int32_t output_activation_max = 0;
+    CalculateActivationRangeUint8(activation, _output, &output_activation_min,
+                                  &output_activation_max);
+    op_params.quantized_activation_min = output_activation_min;
+    op_params.quantized_activation_max = output_activation_max;
+    _kernel = generateKernelGeneric<uint8_t>(op_params, op_type);
+  }
+  else
+  {
+    throw std::runtime_error{"Pool: unsupported data type"};
+  }
+}
+
+void PoolLayer::run() { _kernel(_input, _output); }
+
+#undef AVGPOOLING_PARAMETERS
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MaxPoolLayer.h b/runtime/onert/backend/cpu/ops/PoolLayer.h

similarity index 68%

rename from runtime/onert/backend/cpu/ops/MaxPoolLayer.h

rename to runtime/onert/backend/cpu/ops/PoolLayer.h

index 4c5109f..b378359 100644 (file)
--- a/runtime/onert/backend/cpu/ops/MaxPoolLayer.h
+++ b/runtime/onert/backend/cpu/ops/PoolLayer.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -14,8 +14,8 @@
   * limitations under the License.
   */
  
-#ifndef __ONERT_BACKEND_CPU_OPS_MAXPOOLLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_MAXPOOLLAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_POOLLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_POOLLAYER_H__
  
  #include <backend/IPortableTensor.h>
  #include "OperationUtils.h"
@@ -31,22 +31,25 @@ namespace cpu
  namespace ops
  {
  
-class MaxPoolLayer : public ::onert::exec::IFunction
+enum class PoolType
  {
-public:
-  MaxPoolLayer();
+  kAvg,
+  kL2,
+  kMax,
+};
  
+class PoolLayer : public ::onert::exec::IFunction
+{
  public:
-  void maxPoolFloat32();
-
-  void maxPoolQuant8();
+  PoolLayer();
  
+public:
    void configure(const IPortableTensor *input, const uint32_t paddingLeft,
                   const uint32_t paddingRight, const uint32_t paddingTop,
                   const uint32_t paddingBottom, const uint32_t strideWidth,
                   const uint32_t strideHeight, const uint32_t kernelWidth,
                   const uint32_t kernelHeight, const ir::Activation activation,
-                 IPortableTensor *output);
+                 IPortableTensor *output, const PoolType op_type);
  
    void run() override;
  
@@ -54,17 +57,7 @@ private:
    const IPortableTensor *_input;
    IPortableTensor *_output;
  
-  uint32_t _paddingLeft;
-  uint32_t _paddingTop;
-  uint32_t _paddingRight;
-  uint32_t _paddingBottom;
-
-  uint32_t _strideWidth;
-  uint32_t _strideHeight;
-  uint32_t _kernelWidth;
-  uint32_t _kernelHeight;
-
-  ir::Activation _activation;
+  std::function<void(const IPortableTensor *, IPortableTensor *)> _kernel;
  };
  
  } // namespace ops
@@ -72,4 +65,4 @@ private:
  } // namespace backend
  } // namespace onert
  
-#endif // __ONERT_BACKEND_CPU_OPS_MAXPOOLLAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_POOLLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/QuantizeLayer.cc b/runtime/onert/backend/cpu/ops/QuantizeLayer.cc

deleted file mode 100644 (file)

index 45fc148..0000000
--- a/runtime/onert/backend/cpu/ops/QuantizeLayer.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "QuantizeLayer.h"
-
-#include <cker/operation/Quantize.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-QuantizeLayer::QuantizeLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-template <typename InputT, typename OutputT> void QuantizeLayer::affineQuantize()
-{
-  nnfw::cker::Quantize(getTensorShape(_input), reinterpret_cast<const InputT *>(_input->buffer()),
-                       getTensorShape(_output), reinterpret_cast<OutputT *>(_output->buffer()),
-                       _output->data_scale(), _output->data_offset());
-}
-
-void QuantizeLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void QuantizeLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    affineQuantize<float, uint8_t>();
-  }
-  else
-  {
-    throw std::runtime_error{"Quantize: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/QuantizeLayer.h b/runtime/onert/backend/cpu/ops/QuantizeLayer.h

deleted file mode 100644 (file)

index b4e7aca..0000000
--- a/runtime/onert/backend/cpu/ops/QuantizeLayer.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class QuantizeLayer : public ::onert::exec::IFunction
-{
-public:
-  QuantizeLayer();
-
-public:
-  template <typename InputT, typename OutputT> void affineQuantize();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/RoundLayer.cc b/runtime/onert/backend/cpu/ops/RankLayer.cc

similarity index 61%

rename from runtime/onert/backend/cpu/ops/RoundLayer.cc

rename to runtime/onert/backend/cpu/ops/RankLayer.cc

index 185d755..4690bdf 100644 (file)
--- a/runtime/onert/backend/cpu/ops/RoundLayer.cc
+++ b/runtime/onert/backend/cpu/ops/RankLayer.cc
@@ -14,12 +14,10 @@
   * limitations under the License.
   */
  
-#include "RoundLayer.h"
+#include "RankLayer.h"
  
  #include "OperationUtils.h"
  
-#include <cker/operation/Round.h>
-
  namespace onert
  {
  namespace backend
@@ -28,32 +26,28 @@ namespace cpu
  {
  namespace ops
  {
-RoundLayer::RoundLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
  
-void RoundLayer::roundFloat32()
+RankLayer::RankLayer() : _input(nullptr), _output(nullptr)
  {
-  nnfw::cker::Round(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                    getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+  // DO NOTHING
  }
  
-void RoundLayer::configure(const IPortableTensor *input, IPortableTensor *output)
+void RankLayer::configure(const IPortableTensor *input, IPortableTensor *output)
  {
    _input = input;
    _output = output;
  }
  
-void RoundLayer::run()
+void RankLayer::run()
  {
-  if (_input->data_type() == OperandType::FLOAT32)
+  if (_input->data_type() == OperandType::FLOAT32 || _input->data_type() == OperandType::INT32)
    {
-    roundFloat32();
+    int32_t *output_data = reinterpret_cast<int32_t *>(_output->buffer());
+    output_data[0] = _input->num_dimensions();
    }
    else
    {
-    throw std::runtime_error{"Round: unsupported data type"};
+    throw std::runtime_error{"Rank : unsupported data type"};
    }
  }
  
diff --git a/runtime/onert/backend/cpu/ops/ZerosLikeLayer.h b/runtime/onert/backend/cpu/ops/RankLayer.h

similarity index 81%

rename from runtime/onert/backend/cpu/ops/ZerosLikeLayer.h

rename to runtime/onert/backend/cpu/ops/RankLayer.h

index 0548942..6282ceb 100644 (file)
--- a/runtime/onert/backend/cpu/ops/ZerosLikeLayer.h
+++ b/runtime/onert/backend/cpu/ops/RankLayer.h
@@ -14,8 +14,8 @@
   * limitations under the License.
   */
  
-#ifndef __ONERT_BACKEND_CPU_OPS_ZEROS_LIKE_LAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_ZEROS_LIKE_LAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_RANKLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_RANKLAYER_H__
  
  #include <backend/IPortableTensor.h>
  
@@ -29,11 +29,13 @@ namespace cpu
  {
  namespace ops
  {
-class ZerosLikeLayer : public ::onert::exec::IFunction
+
+class RankLayer : public ::onert::exec::IFunction
  {
  public:
-  ZerosLikeLayer();
+  RankLayer();
  
+public:
    void configure(const IPortableTensor *input, IPortableTensor *output);
  
    void run() override;
@@ -48,4 +50,4 @@ private:
  } // namespace backend
  } // namespace onert
  
-#endif // __ONERT_BACKEND_CPU_OPS_ZEROS_LIKE_LAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_RANKLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ReLU6Layer.cc b/runtime/onert/backend/cpu/ops/ReLU6Layer.cc

deleted file mode 100644 (file)

index 26eb35e..0000000
--- a/runtime/onert/backend/cpu/ops/ReLU6Layer.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ReLU6Layer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/ReLU6.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-ReLU6Layer::ReLU6Layer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void ReLU6Layer::relu6Float32()
-{
-  nnfw::cker::ReLU6(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                    reinterpret_cast<float *>(_output->buffer()));
-}
-
-void ReLU6Layer::relu6Quant8()
-{
-  // cker quant8 relu is not implemented yet
-  throw std::runtime_error{"NYI"};
-}
-
-void ReLU6Layer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void ReLU6Layer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    relu6Float32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    relu6Quant8();
-  }
-  else
-  {
-    throw std::runtime_error{"ReLU6: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ReLULayer.cc b/runtime/onert/backend/cpu/ops/ReLULayer.cc

deleted file mode 100644 (file)

index cb4529f..0000000
--- a/runtime/onert/backend/cpu/ops/ReLULayer.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ReLULayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/ReLU.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-ReLULayer::ReLULayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void ReLULayer::reluFloat32()
-{
-  nnfw::cker::ReLU(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                   getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void ReLULayer::reluQuant8()
-{
-  // cker quant8 relu is not implemented yet
-  throw std::runtime_error{"NYI"};
-}
-
-void ReLULayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void ReLULayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    reluFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    reluQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"ReLU: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ReduceLayer.cc b/runtime/onert/backend/cpu/ops/ReduceLayer.cc

index fe22dbe..bb5f85d 100644 (file)
--- a/runtime/onert/backend/cpu/ops/ReduceLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ReduceLayer.cc
@@ -49,27 +49,31 @@ void evalLogic(const IPortableTensor *input, IPortableTensor *output, const std:
  }
  
  template <typename T>
-void evalType(const IPortableTensor *input, IPortableTensor *output, const std::vector<int> &axes,
-              bool keep_dims, nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type)
+std::function<void(const IPortableTensor *, IPortableTensor *, const std::vector<int> &)>
+evalType(bool keep_dims, nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type)
  {
    switch (reduce_type)
    {
      case ReduceType::kSum:
-      return evalLogic<T>(input, output, axes, keep_dims, static_cast<T>(0), reduce_kernel,
-                          [](const T current, const T in) -> T { return in + current; });
+      return std::bind(&evalLogic<T>, std::placeholders::_1, std::placeholders::_2,
+                       std::placeholders::_3, keep_dims, static_cast<T>(0), reduce_kernel,
+                       [](const T current, const T in) -> T { return in + current; });
        break;
      case ReduceType::kProd:
-      return evalLogic<T>(input, output, axes, keep_dims, static_cast<T>(1), reduce_kernel,
-                          [](const T current, const T in) -> T { return in * current; });
+      return std::bind(&evalLogic<T>, std::placeholders::_1, std::placeholders::_2,
+                       std::placeholders::_3, keep_dims, static_cast<T>(1), reduce_kernel,
+                       [](const T current, const T in) -> T { return in * current; });
        break;
      case ReduceType::kMax:
-      return evalLogic<T>(
-          input, output, axes, keep_dims, std::numeric_limits<T>::lowest(), reduce_kernel,
+      return std::bind(
+          &evalLogic<T>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+          keep_dims, std::numeric_limits<T>::lowest(), reduce_kernel,
            [](const T current, const T in) -> T { return (in > current) ? in : current; });
        break;
      case ReduceType::kMin:
-      return evalLogic<T>(
-          input, output, axes, keep_dims, std::numeric_limits<T>::max(), reduce_kernel,
+      return std::bind(
+          &evalLogic<T>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+          keep_dims, std::numeric_limits<T>::max(), reduce_kernel,
            [](const T current, const T in) -> T { return (in < current) ? in : current; });
        break;
      default:
@@ -79,44 +83,44 @@ void evalType(const IPortableTensor *input, IPortableTensor *output, const std::
  
  // Template specialization for bool type
  template <>
-void evalType<bool>(const IPortableTensor *input, IPortableTensor *output,
-                    const std::vector<int> &axes, bool keep_dims, nnfw::cker::Reduce &reduce_kernel,
-                    ReduceType reduce_type)
+std::function<void(const IPortableTensor *, IPortableTensor *, const std::vector<int> &)>
+evalType<bool>(bool keep_dims, nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type)
  {
    switch (reduce_type)
    {
      case ReduceType::kAny:
-      return evalLogic<bool>(
-          input, output, axes, keep_dims, false, reduce_kernel,
-          [](const bool current, const bool in) -> bool { return in || current; });
+      return std::bind(&evalLogic<bool>, std::placeholders::_1, std::placeholders::_2,
+                       std::placeholders::_3, keep_dims, false, reduce_kernel,
+                       [](const bool current, const bool in) -> bool { return in || current; });
        break;
      case ReduceType::kAll:
-      return evalLogic<bool>(
-          input, output, axes, keep_dims, true, reduce_kernel,
-          [](const bool current, const bool in) -> bool { return in && current; });
+      return std::bind(&evalLogic<bool>, std::placeholders::_1, std::placeholders::_2,
+                       std::placeholders::_3, keep_dims, true, reduce_kernel,
+                       [](const bool current, const bool in) -> bool { return in && current; });
        break;
      default:
        throw std::runtime_error{"Reduce: Unsupported reduce type"};
    }
  }
  
-template <ReduceType reduce_type>
-void evalGeneric(const IPortableTensor *input, IPortableTensor *output,
-                 const std::vector<int> &axes, bool keep_dims, nnfw::cker::Reduce &reduce_kernel)
+std::function<void(const IPortableTensor *, IPortableTensor *, const std::vector<int> &)>
+generateKernelGeneric(const IPortableTensor *input, bool keep_dims,
+                      nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type)
  {
    switch (input->data_type())
    {
      case OperandType::FLOAT32:
-      return evalType<float>(input, output, axes, keep_dims, reduce_kernel, reduce_type);
+      return evalType<float>(keep_dims, reduce_kernel, reduce_type);
      case OperandType::INT32:
-      return evalType<int32_t>(input, output, axes, keep_dims, reduce_kernel, reduce_type);
+      return evalType<int32_t>(keep_dims, reduce_kernel, reduce_type);
      case OperandType::BOOL8:
-      return evalType<bool>(input, output, axes, keep_dims, reduce_kernel, reduce_type);
+      return evalType<bool>(keep_dims, reduce_kernel, reduce_type);
      default:
        throw std::runtime_error{"Reduce(generic): unsupported data type"};
    }
  }
  
+// TODO Refine this function
  void evalSumQuantized(const IPortableTensor *input, IPortableTensor *output,
                        const std::vector<int> &axes, bool keep_dims,
                        nnfw::cker::Reduce &reduce_kernel)
@@ -146,14 +150,15 @@ void evalSumQuantized(const IPortableTensor *input, IPortableTensor *output,
      return;
    }
  
-  evalGeneric<ReduceType::kSum>(input, output, axes, keep_dims, reduce_kernel);
+  const auto kernel = generateKernelGeneric(input, keep_dims, reduce_kernel, ReduceType::kSum);
+  kernel(input, output, axes);
  }
  
  } // namespace
  
  ReduceLayer::ReduceLayer()
-    : _input(nullptr), _axes(nullptr), _output(nullptr), _reduceType(ReduceType::kAny),
-      _keep_dims(false), _reduce_kernel(new nnfw::cker::Reduce())
+    : _input(nullptr), _axes(nullptr), _output(nullptr), _reduce_kernel(new nnfw::cker::Reduce()),
+      _kernel()
  {
    // DO NOTHING
  }
@@ -166,43 +171,44 @@ void ReduceLayer::configure(const IPortableTensor *input, const IPortableTensor
    _input = input;
    _axes = axes;
    _output = output;
-  _reduceType = reduceType;
-  _keep_dims = keep_dims;
-}
  
-void ReduceLayer::run()
-{
-  const auto axes = getReducerAxes(_axes);
-  switch (_reduceType)
+  switch (reduceType)
    {
      case ReduceType::kSum:
        if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
        {
-        evalSumQuantized(_input, _output, axes, _keep_dims, *_reduce_kernel);
+        _kernel = std::bind(&evalSumQuantized, std::placeholders::_1, std::placeholders::_2,
+                            std::placeholders::_3, keep_dims, *_reduce_kernel);
          return;
        }
-      evalGeneric<ReduceType::kSum>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+      _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kSum);
        break;
      case ReduceType::kProd:
-      evalGeneric<ReduceType::kProd>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+      _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kProd);
        break;
      case ReduceType::kMax:
-      evalGeneric<ReduceType::kMax>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+      _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kMax);
        break;
      case ReduceType::kMin:
-      evalGeneric<ReduceType::kMin>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+      _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kMin);
        break;
      case ReduceType::kAny:
-      evalGeneric<ReduceType::kAny>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+      _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kAny);
        break;
      case ReduceType::kAll:
-      evalGeneric<ReduceType::kAll>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+      _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kAll);
        break;
      default:
        throw std::runtime_error{"ReduceSum: Unsupported reduce type"};
    }
  }
  
+void ReduceLayer::run()
+{
+  const auto axes = getReducerAxes(_axes);
+  _kernel(_input, _output, axes);
+}
+
  } // namespace ops
  } // namespace cpu
  } // namespace backend
diff --git a/runtime/onert/backend/cpu/ops/ReduceLayer.h b/runtime/onert/backend/cpu/ops/ReduceLayer.h

index 8e7bcdb..332d399 100644 (file)
--- a/runtime/onert/backend/cpu/ops/ReduceLayer.h
+++ b/runtime/onert/backend/cpu/ops/ReduceLayer.h
@@ -65,10 +65,11 @@ private:
    const IPortableTensor *_input;
    const IPortableTensor *_axes;
    IPortableTensor *_output;
-  ReduceType _reduceType;
-  bool _keep_dims;
  
    std::unique_ptr<nnfw::cker::Reduce> _reduce_kernel;
+  std::function<void(const IPortableTensor *input, IPortableTensor *output,
+                     const std::vector<int> &axes)>
+      _kernel;
  };
  
  } // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/RoundLayer.h b/runtime/onert/backend/cpu/ops/RoundLayer.h

deleted file mode 100644 (file)

index fc6a46c..0000000
--- a/runtime/onert/backend/cpu/ops/RoundLayer.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_ROUNDLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_ROUNDLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class RoundLayer : public ::onert::exec::IFunction
-{
-public:
-  RoundLayer();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  void roundFloat32();
-
-private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_ROUNDLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/RsqrtLayer.cc b/runtime/onert/backend/cpu/ops/RsqrtLayer.cc

deleted file mode 100644 (file)

index 0bd468f..0000000
--- a/runtime/onert/backend/cpu/ops/RsqrtLayer.cc
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "RsqrtLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-RsqrtLayer::RsqrtLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void RsqrtLayer::rsqrtFloat32()
-{
-  nnfw::cker::Rsqrt(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                    getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void RsqrtLayer::rsqrtQuant8() { throw std::runtime_error{"NYI : QASYMM8 not supported"}; }
-
-void RsqrtLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void RsqrtLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    rsqrtFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    rsqrtQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Rsqrt: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/RsqrtLayer.h b/runtime/onert/backend/cpu/ops/RsqrtLayer.h

deleted file mode 100644 (file)

index 49abbb0..0000000
--- a/runtime/onert/backend/cpu/ops/RsqrtLayer.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_RSQRTLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_RSQRTLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class RsqrtLayer : public ::onert::exec::IFunction
-{
-public:
-  RsqrtLayer();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  void rsqrtFloat32();
-  void rsqrtQuant8();
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_RSQRTLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/SinLayer.cc b/runtime/onert/backend/cpu/ops/SinLayer.cc

deleted file mode 100644 (file)

index 2a6b117..0000000
--- a/runtime/onert/backend/cpu/ops/SinLayer.cc
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "SinLayer.h"
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-SinLayer::SinLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void SinLayer::sinFloat32()
-{
-  nnfw::cker::Sin(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                  getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void SinLayer::sinQuant8() { throw std::runtime_error{"NYI"}; }
-
-void SinLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void SinLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    sinFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    sinQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Sin: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/SinLayer.h b/runtime/onert/backend/cpu/ops/SinLayer.h

deleted file mode 100644 (file)

index 348350f..0000000
--- a/runtime/onert/backend/cpu/ops/SinLayer.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_SINLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_SINLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class SinLayer : public ::onert::exec::IFunction
-{
-public:
-  SinLayer();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  void sinFloat32();
-  void sinQuant8();
-
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_SINLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc b/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc

index 6e2bb58..095e67a 100644 (file)
--- a/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc
+++ b/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc
@@ -34,55 +34,23 @@ SoftMaxLayer::SoftMaxLayer() : _input(nullptr), _output(nullptr), _beta(0.0)
    // DO NOTHING
  }
  
-// Performs softmax along the input of size (input_size * batch_size).
-void Softmax(const float *in, const int input_size, const int batch_size, const float beta,
-             float *out)
+void SoftMaxLayer::softmaxFloat32()
  {
-  assert(input_size > 0);
-
-  // For each batch
-  for (int b = 0; b < batch_size; b++)
+  if (getNumberOfDimensions(_input) == 1)
    {
-    // Find the max coeff.
-    float max_coeff = in[0];
-    for (int i = 1; i < input_size; i++)
-    {
-      if (in[i] > max_coeff)
-        max_coeff = in[i];
-    }
-
-    // Compute the normalized sum of exps.
-    float exp_sum = 0.0;
-    for (int i = 0; i < input_size; i++)
-    {
-      out[i] = std::exp((in[i] - max_coeff) * beta);
-      exp_sum += out[i];
-    }
-
-    // Divide by the sum of exps.
-    float reciprocal_sum_exp = 1.f / exp_sum;
-    for (int i = 0; i < input_size; i++)
-    {
-      out[i] *= reciprocal_sum_exp;
-    }
-
-    // Advance in and out pointers for the next batch.
-    in += input_size;
-    out += input_size;
+    uint32_t input_size = getNumberOfElements(_input);
+    nnfw::cker::Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, 1, _beta,
+                        reinterpret_cast<float *>(_output->buffer()));
    }
-}
-
-void SoftMaxLayer::softmaxFloat32()
-{
-  if (getNumberOfDimensions(_input) == 2)
+  else if (getNumberOfDimensions(_input) == 2)
    {
      uint32_t batch_size = getSizeOfDimension(_input, 0);
      if (batch_size == 0)
        throw std::runtime_error("batch_size should not be 0");
  
      uint32_t input_size = getNumberOfElements(_input) / batch_size;
-    Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, batch_size, _beta,
-            reinterpret_cast<float *>(_output->buffer()));
+    nnfw::cker::Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, batch_size,
+                        _beta, reinterpret_cast<float *>(_output->buffer()));
    }
    else if (getNumberOfDimensions(_input) == 4)
    {
@@ -94,7 +62,7 @@ void SoftMaxLayer::softmaxFloat32()
    }
    else
    {
-    throw std::runtime_error{"only 2D and 4D tensors supported"};
+    throw std::runtime_error{"only 1D, 2D and 4D tensors supported"};
    }
  }
  
diff --git a/runtime/onert/backend/cpu/ops/SubLayer.cc b/runtime/onert/backend/cpu/ops/SubLayer.cc

deleted file mode 100644 (file)

index 597d529..0000000
--- a/runtime/onert/backend/cpu/ops/SubLayer.cc
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "SubLayer.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-void SubLayer::subFloat32()
-{
-  float output_activation_min = 0, output_activation_max = 0;
-  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
-  nnfw::cker::BinaryArithmeticOpParam op_params;
-  op_params.float_activation_max = output_activation_max;
-  op_params.float_activation_min = output_activation_min;
-
-  const bool need_broadcast =
-      nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
-  if (need_broadcast)
-  {
-    nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
-        op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
-        getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
-        getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-    return;
-  }
-
-  nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
-      op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
-      getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
-      getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void SubLayer::subInt32()
-{
-  int32_t output_activation_min = 0, output_activation_max = 0;
-  CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
-  nnfw::cker::BinaryArithmeticOpParam op_params;
-  op_params.quantized_activation_max = output_activation_max;
-  op_params.quantized_activation_min = output_activation_min;
-
-  const bool need_broadcast =
-      nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
-  if (need_broadcast)
-  {
-    nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
-        op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()),
-        getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()),
-        getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer()));
-    return;
-  }
-
-  nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
-      op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()),
-      getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()),
-      getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer()));
-}
-
-void SubLayer::subQuant8()
-{
-  int32_t output_activation_min, output_activation_max;
-  CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
-                                &output_activation_max);
-  nnfw::cker::BinaryArithmeticOpParam op_params;
-  op_params.quantized_activation_max = output_activation_max;
-  op_params.quantized_activation_min = output_activation_min;
-  // Parameters for scaled quantized computation
-  op_params.left_shift = 20;
-  // Zero-points of input and output tensors
-  op_params.input1_offset = -_lhs->data_offset();
-  op_params.input2_offset = -_rhs->data_offset();
-  op_params.output_offset = _output->data_offset();
-  assert((op_params.input1_offset >= 0) && (op_params.input1_offset <= 255));
-  assert((op_params.input2_offset >= 0) && (op_params.input2_offset <= 255));
-  assert((op_params.output_offset >= 0) && (op_params.output_offset <= 255));
-
-  // Compute normalized scale for _lhs and _rhs values,
-  // and represent in 32-bit fixed point
-  const double norm_max_scale = 2 * std::max(_lhs->data_scale(), _rhs->data_scale());
-  const double real_lhs_scale = _lhs->data_scale() / norm_max_scale;
-  const double real_rhs_scale = _rhs->data_scale() / norm_max_scale;
-  // output scale is used to normalize final result, so we invert the scale here
-  const double real_output_scale =
-      norm_max_scale / (_output->data_scale() * (1 << op_params.left_shift));
-
-  // Represent the scales as fixed int32_t multipliers, and int32_t shifts
-  QuantizeMultiplier(real_lhs_scale, &op_params.input1_multiplier, &op_params.input1_shift);
-  QuantizeMultiplier(real_rhs_scale, &op_params.input2_multiplier, &op_params.input2_shift);
-  op_params.input2_multiplier *= -1;
-  QuantizeMultiplier(real_output_scale, &op_params.output_multiplier, &op_params.output_shift);
-
-  const bool need_broadcast =
-      nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
-  if (need_broadcast)
-  {
-    nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
-        op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
-        getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
-        getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-    return;
-  }
-
-  nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
-      op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
-      getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
-      getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void SubLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                         const ir::Activation activation, IPortableTensor *output)
-{
-  _lhs = lhs;
-  _rhs = rhs;
-  _activation = activation;
-  _output = output;
-}
-
-void SubLayer::run()
-{
-  if (_output->data_type() == OperandType::FLOAT32)
-  {
-    subFloat32();
-  }
-  else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    subQuant8();
-  }
-  else if (_output->data_type() == OperandType::INT32)
-  {
-    subInt32();
-  }
-  else
-  {
-    throw std::runtime_error{"Sub: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/SubLayer.h b/runtime/onert/backend/cpu/ops/SubLayer.h

deleted file mode 100644 (file)

index 86f32ca..0000000
--- a/runtime/onert/backend/cpu/ops/SubLayer.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_SUBLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_SUBLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class SubLayer : public ::onert::exec::IFunction
-{
-public:
-  SubLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
-  {
-    // DO NOTHING
-  }
-
-public:
-  void subFloat32();
-
-  void subQuant8();
-
-  void subInt32();
-
-  void configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
-                 const ir::Activation activation, IPortableTensor *output);
-
-  void run() override;
-
-private:
-  const IPortableTensor *_lhs;
-  const IPortableTensor *_rhs;
-  IPortableTensor *_output;
-
-  ir::Activation _activation{ir::Activation::NONE};
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_SUBLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/TanhLayer.cc b/runtime/onert/backend/cpu/ops/TanhLayer.cc

deleted file mode 100644 (file)

index 910ac1f..0000000
--- a/runtime/onert/backend/cpu/ops/TanhLayer.cc
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TanhLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Tanh.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-TanhLayer::TanhLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void TanhLayer::PopulateLookupTable()
-{
-  const auto input_scale = static_cast<double>(_input->data_scale());
-  const auto input_zero_point = static_cast<int32_t>(_input->data_offset());
-  const auto output_scale = static_cast<double>(_output->data_scale());
-  const auto output_zero_point = static_cast<int32_t>(_output->data_offset());
-  const float inverse_scale = 1 / output_scale;
-  int32_t maxval = std::numeric_limits<uint8_t>::max();
-  int32_t minval = std::numeric_limits<uint8_t>::min();
-  for (int32_t val = minval; val <= maxval; ++val)
-  {
-    const float dequantized = input_scale * (val - input_zero_point);
-    const float transformed = std::tanh(dequantized);
-    const float rescaled = std::round(transformed * inverse_scale);
-    const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
-    _table[val] = static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval));
-  }
-}
-
-void TanhLayer::tanhFloat32()
-{
-  nnfw::cker::Tanh(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
-                   getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void TanhLayer::tanhQuant8()
-{
-  const int size = MatchingFlatSize(getTensorShape(_input), getTensorShape(_output));
-  const uint8_t *input_data = reinterpret_cast<const uint8_t *>(_input->buffer());
-  uint8_t *output_data = reinterpret_cast<uint8_t *>(_output->buffer());
-
-  for (int i = 0; i < size; ++i)
-  {
-    output_data[i] = _table[input_data[i]];
-  }
-}
-
-void TanhLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-  if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    PopulateLookupTable();
-  }
-}
-
-void TanhLayer::run()
-{
-  if (_input->data_type() == OperandType::FLOAT32)
-  {
-    tanhFloat32();
-  }
-  else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
-  {
-    tanhQuant8();
-  }
-  else
-  {
-    throw std::runtime_error{"Tanh: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ZerosLikeLayer.cc b/runtime/onert/backend/cpu/ops/ZerosLikeLayer.cc

deleted file mode 100644 (file)

index ae80845..0000000
--- a/runtime/onert/backend/cpu/ops/ZerosLikeLayer.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ZerosLikeLayer.h"
-
-#include "OperationUtils.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-ZerosLikeLayer::ZerosLikeLayer() : _input(nullptr), _output(nullptr)
-{
-  // DO NOTHING
-}
-
-void ZerosLikeLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
-  _input = input;
-  _output = output;
-}
-
-void ZerosLikeLayer::run()
-{
-  if (!HaveSameShapes(_input, _output))
-    throw std::runtime_error{"ZerosLike: input and output shape don't match."};
-
-  auto element_size = getTensorShape(_input).FlatSize();
-
-  switch (_input->data_type())
-  {
-    case OperandType::FLOAT32:
-      memset(reinterpret_cast<float *>(_output->buffer()), 0, element_size * sizeof(float));
-      break;
-    case OperandType::INT32:
-      memset(reinterpret_cast<int32_t *>(_output->buffer()), 0, element_size * sizeof(int32_t));
-      break;
-    default:
-      throw std::runtime_error{"ZerosLike: unsupported data type"};
-  }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/core/include/backend/BackendContext.h b/runtime/onert/core/include/backend/BackendContext.h

index c263aef..1eba295 100644 (file)
--- a/runtime/onert/core/include/backend/BackendContext.h
+++ b/runtime/onert/core/include/backend/BackendContext.h
@@ -29,6 +29,7 @@ class Backend;
  class IConstantInitializer;
  class IKernelGenerator;
  class ITensorRegister;
+struct ITensorRegistry;
  struct ITensorBuilder;
  struct IOptimizer;
  
@@ -45,14 +46,15 @@ public:
  
  public:
    BackendContext(const Backend *backend, const ir::Graph *graph,
+                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
                   std::shared_ptr<ITensorBuilder> tensor_builder = nullptr,
                   std::shared_ptr<IConstantInitializer> constant_initializer = nullptr,
                   std::shared_ptr<IKernelGenerator> kernel_gen = nullptr,
                   std::shared_ptr<ITensorRegister> tensor_register = nullptr,
                   std::shared_ptr<IOptimizer> optimizer = nullptr)
-      : _backend{backend}, _graph{graph}, tensor_builder{tensor_builder},
-        constant_initializer{constant_initializer}, kernel_gen{kernel_gen},
-        tensor_register{tensor_register}, optimizer{optimizer}
+      : _backend{backend}, _graph{graph}, tensor_registry{tensor_registry},
+        tensor_builder{tensor_builder}, constant_initializer{constant_initializer},
+        kernel_gen{kernel_gen}, tensor_register{tensor_register}, optimizer{optimizer}
    {
    }
  
@@ -74,6 +76,7 @@ private:
    std::vector<ir::OperandIndex> _operand_list;
  
  public:
+  std::shared_ptr<ITensorRegistry> tensor_registry;
    std::shared_ptr<ITensorBuilder> tensor_builder;
    std::shared_ptr<IConstantInitializer> constant_initializer;
    std::shared_ptr<IKernelGenerator> kernel_gen;
diff --git a/runtime/onert/core/include/backend/IConstantInitializer.h b/runtime/onert/core/include/backend/IConstantInitializer.h

index f322015..149acec 100644 (file)
--- a/runtime/onert/core/include/backend/IConstantInitializer.h
+++ b/runtime/onert/core/include/backend/IConstantInitializer.h
@@ -162,14 +162,14 @@ public:
  public:
    void run()
    {
-    assert(tensor_builder().get());
+    assert(tensor_registry());
      for (const auto &it : _init_map)
      {
        const auto &ind = it.first;
        const auto &fn = it.second;
  
        const auto &model_obj = _operands.at(ind);
-      auto tensor_obj = tensor_builder()->tensorAt(ind);
+      auto tensor_obj = tensor_registry()->getNativeITensor(ind);
        assert(tensor_obj != nullptr);
        fn(model_obj, *tensor_obj);
        VERBOSE(FillOperandData) << "Fill data for operand " << ind.value() << std::endl;
@@ -189,10 +189,7 @@ public:
    void setLayout(ir::Layout layout) { _current_op_seq_layout = layout; }
  
  protected:
-  using OperationVisitor::visit;
-
-protected:
-  virtual std::shared_ptr<ITensorBuilder> tensor_builder() const = 0;
+  virtual std::shared_ptr<ITensorRegistry> tensor_registry() const = 0;
  
  public:
    virtual void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj)
diff --git a/runtime/onert/core/include/backend/ITensorBuilder.h b/runtime/onert/core/include/backend/ITensorBuilder.h

index b760cda..f93ab81 100644 (file)
--- a/runtime/onert/core/include/backend/ITensorBuilder.h
+++ b/runtime/onert/core/include/backend/ITensorBuilder.h
@@ -40,11 +40,6 @@ struct ITensorBuilder
    virtual ~ITensorBuilder(void) = default;
  
    /**
-   * @brief Returns true if this TensorBuilder support dynamic tensor
-   */
-  virtual bool supportDynamicTensor() = 0;
-
-  /**
     * @brief Register tensor information to allocate on backend
     *
     * @param ind Index
@@ -63,15 +58,6 @@ struct ITensorBuilder
     */
    virtual bool isRegistered(const ir::OperandIndex &) const = 0;
  
-  /**
-   * @brief Get tensor registry
-   *
-   * @return std::shared_ptr<backend::ITensorRegistry> tensor registry object
-   *
-   * @note   Backend should implement this when it has StaticTensorManager and DynamicTensorManager
-   */
-  virtual std::shared_ptr<backend::ITensorRegistry> tensorRegistry() = 0;
-
  public: // methods for static tensor allocation
    /**
     * @brief Let the tensor builder know first use(start of lifetime) of a tensor
@@ -104,32 +90,6 @@ public: // methods for static tensor allocation
    virtual void postFunctionPrepare() = 0;
  
    /**
-   * @brief Get the tensor object
-   *
-   * @param ind Index of the tensor
-   * @return std::shared_ptr<ITensor> The tensor object
-   */
-  virtual std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) = 0;
-
-  /**
-   * @brief Set the migrant tensor object
-   *
-   * @return true if succeeded
-   * @return false if failed or unsupported
-   */
-  virtual bool setMigrantTensor(const ir::OperandIndex &, const std::shared_ptr<IPortableTensor> &)
-  {
-    return false;
-  }
-
-  /**
-   * @brief Iterate over tensors
-   *
-   * @param fn The function to be run
-   */
-  virtual void iterate(const IterateFunction &fn) = 0;
-
-  /**
     * @brief Release static @c ITensorManger object which was built
     *        Before calling this, @c allocate must have been called
     *
@@ -147,10 +107,7 @@ public: // methods for dynamic tensor allocation
     * @note   Since it is a pointer, its life time is from the cration of TensorBuilder
     *         to the end of execution
     */
-  virtual IDynamicTensorManager *dynamicTensorManager(void)
-  {
-    throw std::runtime_error("dynamicTensorManager(): NYI");
-  }
+  virtual IDynamicTensorManager *dynamicTensorManager(void) { return nullptr; }
  
    /**
     * @brief Release dynamic @c ITensorManger object which was built
@@ -158,10 +115,7 @@ public: // methods for dynamic tensor allocation
     *
     * @return std::unique_ptr<ITensorManager> Tensor Manager object
     */
-  virtual std::unique_ptr<ITensorManager> releaseDynamicTensorManager(void)
-  {
-    throw std::runtime_error("releaseDynamicTensorManager() for this backend is not supported");
-  }
+  virtual std::unique_ptr<ITensorManager> releaseDynamicTensorManager(void) { return nullptr; }
  };
  
  } // namespace backend
diff --git a/runtime/onert/core/include/backend/ITensorRegistry.h b/runtime/onert/core/include/backend/ITensorRegistry.h

index 8555131..88fcb0f 100644 (file)
--- a/runtime/onert/core/include/backend/ITensorRegistry.h
+++ b/runtime/onert/core/include/backend/ITensorRegistry.h
@@ -21,6 +21,7 @@
  
  #include "ir/Index.h"
  #include "backend/ITensor.h"
+#include "backend/IPortableTensor.h"
  
  namespace onert
  {
@@ -51,13 +52,22 @@ struct ITensorRegistry
     * @note  Returned tensor cannot be used longer than dynamic tensor manager
     */
    virtual std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &) = 0;
+  /**
+   * @brief Set the Migrant Tensor which are from other backends
+   *
+   * @return true if supported
+   * @return false if not supported
+   */
+  virtual bool setMigrantTensor(const ir::OperandIndex &, const std::shared_ptr<IPortableTensor> &)
+  {
+    return false;
+  }
  };
  
  } // namespace backend
  } // namespace onert
  
  #include "ir/OperandIndexMap.h"
-#include "backend/IPortableTensor.h"
  
  namespace onert
  {
@@ -108,24 +118,23 @@ public:
      return nullptr;
    }
  
-  bool setMigrantTensor(const ir::OperandIndex &ind, const std::shared_ptr<IPortableTensor> &tensor)
+  bool setMigrantTensor(const ir::OperandIndex &ind,
+                        const std::shared_ptr<IPortableTensor> &tensor) override
    {
-    // TODO Uncomment this as two tensors for an index is not allowed.
-    //      But now it is temporarily allowed as a workaround. External one hides Managed one.
-    // auto itr = _native.find(ind);
-    // if (itr != _native.end() && itr->second != nullptr && tensor != nullptr)
-    //  throw std::runtime_error{
-    //      "Tried to set an migrant tensor but an native tensor already exists."};
+    assert(tensor != nullptr);
+    auto itr = _native.find(ind);
+    if (itr != _native.end())
+      throw std::runtime_error{"Tried to set a migrant tensor but a native tensor already exists."};
      _migrant[ind] = tensor;
      return true;
    }
  
    void setNativeTensor(const ir::OperandIndex &ind, const std::shared_ptr<T_Tensor> &tensor)
    {
+    assert(tensor != nullptr);
      auto itr = _migrant.find(ind);
-    if (itr != _migrant.end() && itr->second != nullptr && tensor != nullptr)
-      throw std::runtime_error{
-          "Tried to set a native tensor but an migrant tensor already exists."};
+    if (itr != _migrant.end())
+      throw std::runtime_error{"Tried to set a native tensor but a migrant tensor already exists."};
      _native[ind] = tensor;
    }
  
diff --git a/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h b/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h

index a7e034a..3f09b7a 100644 (file)
--- a/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h
+++ b/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h
@@ -20,6 +20,7 @@
  #include "MemoryManager.h"
  
  #include "backend/IStaticTensorManager.h"
+#include "backend/IDynamicTensorManager.h"
  #include "ir/OperandIndexMap.h"
  #include "ir/OperandInfo.h"
  #include "TensorRegistry.h"
@@ -34,7 +35,8 @@ namespace cpu_common
  class StaticTensorManager : public backend::IStaticTensorManager
  {
  public:
-  StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg);
+  StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
+                      IDynamicTensorManager *dynamic_tensor_manager);
    virtual ~StaticTensorManager() = default;
  
    void allocateConsts(void);
@@ -55,6 +57,7 @@ private:
    std::unique_ptr<MemoryManager> _nonconst_mgr;
    const std::shared_ptr<TensorRegistry> _tensors;
    ir::OperandIndexMap<bool> _as_constants;
+  IDynamicTensorManager *_dynamic_tensor_manager;
  };
  
  } // namespace cpu_common
diff --git a/runtime/onert/core/include/compiler/LoweredGraph.h b/runtime/onert/core/include/compiler/LoweredGraph.h

new file mode 100644 (file)

index 0000000..aadba68
--- /dev/null
+++ b/runtime/onert/core/include/compiler/LoweredGraph.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_LOWERED_GRAPH_H__
+#define __ONERT_IR_LOWERED_GRAPH_H__
+
+#include "ir/Graph.h"
+#include "ir/LowerInfoMap.h"
+#include "ir/OpSequences.h"
+#include "compiler/BackendResolver.h"
+#include "compiler/Compiler.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+/**
+ * @brief Class that contains lowering information on graph.
+ *        In addition, after lowering, operands in graph will be set to "dynamic"
+ *        if the shape of output of an operation cannot be decided at compilation time.
+ */
+class LoweredGraph
+{
+public:
+  LoweredGraph(const ir::Graph &graph, const compiler::CompilerOptions &options);
+
+  ir::Graph &graph() { return _graph; }
+  const ir::Graph &graph() const { return _graph; }
+  const ir::LowerInfoMap *getLowerInfo() const { return &_lower_info_map; }
+  const ir::operation::LowerInfo *getLowerInfo(const ir::OpSequenceIndex &op_seq_index) const;
+  void setLowerInfo(const ir::OpSequenceIndex &op_seq_index,
+                    std::unique_ptr<ir::operation::LowerInfo> &&lower_info);
+  void removeLowerInfo(const ir::OpSequenceIndex &op_seq_index);
+  const ir::operand::LowerInfo *getLowerInfo(const ir::OperandIndex &index) const;
+  ir::operand::LowerInfo *getLowerInfo(const ir::OperandIndex &index);
+  void setLowerInfo(const ir::OperandIndex &index,
+                    std::unique_ptr<ir::operand::LowerInfo> &&lower_info);
+  void removeLowerInfo(const ir::OperandIndex &index);
+  ir::OpSequences &op_seqs() { return _op_seqs; }
+  const ir::OpSequences &op_seqs() const { return _op_seqs; }
+  void iterateTopolOpSeqs(
+      const std::function<void(const ir::OpSequenceIndex &, const ir::OpSequence &)> &fn) const;
+  void
+  iterateTopolOpSeqs(const std::function<void(const ir::OpSequenceIndex &, ir::OpSequence &)> &fn);
+  const backend::BackendContexts &backend_contexts() { return _backend_contexts; }
+  const backend::BackendContexts &backend_contexts() const { return _backend_contexts; }
+  std::shared_ptr<ir::OperationIndexMap<int64_t>> indexed_ranks() { return _indexed_ranks; }
+
+private:
+  void
+  makeOpSequences(ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
+                  const compiler::CompilerOptions &options,
+                  const compiler::BackendResolver &backend_resolver);
+
+  void manipulateLowerInfo(
+      ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
+      bool is_primary);
+  void dumpLowerInfo();
+  bool mergeable(const ir::OpSequenceIndex &op_seq_index, const ir::OperationIndex &node_index,
+                 ir::Layout layout, const compiler::BackendResolver &backend_resolver);
+  ir::OpSequenceIndex appendFreshSingleOpSequence(const ir::OperationIndex &node_index,
+                                                  const ir::Operation &node);
+
+private:
+  ir::Graph _graph;
+  backend::BackendContexts _backend_contexts;
+  std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
+  ir::LowerInfoMap _lower_info_map;
+  // Pass(for Perm) can accept only graph so that Graph has OpSequences as a member
+  ir::OpSequences _op_seqs;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_IR_LOWERED_GRAPH_H__
diff --git a/runtime/onert/core/include/compiler/StaticShapeInference.h b/runtime/onert/core/include/compiler/StaticShapeInference.h

index bff68c9..b97cb5b 100644 (file)
--- a/runtime/onert/core/include/compiler/StaticShapeInference.h
+++ b/runtime/onert/core/include/compiler/StaticShapeInference.h
@@ -19,7 +19,7 @@
  
  #include "ir/OperationVisitor.h"
  #include "ir/OpSequence.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
  #include "ir/Index.h"
  
  #include <memory>
@@ -41,7 +41,8 @@ class StaticShapeInferer : public ir::OperationVisitor
  public:
    StaticShapeInferer(
        const ir::SubgraphIndex &subg_idx,
-      const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<ir::LoweredGraph>> &lowered_subgs)
+      const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
+          &lowered_subgs)
        : _lowered_subgs(lowered_subgs), _operands(lowered_subgs.at(subg_idx)->graph().operands()),
          _operations(lowered_subgs.at(subg_idx)->graph().operations()),
          _return_has_dynamic_tensor(false)
@@ -57,54 +58,34 @@ public:
     * @param op_seq sequence of operations
     * @return @c true if op_seq's input or output has any dynamic tensor; @c false otherwise.
     */
-  bool infer(const ir::OpSequence &op_seq)
-  {
-    bool has_dynamic_tensor = false;
-
-    _return_has_dynamic_tensor = false; // this is used as a return value inside operation's visit()
-
-    for (const auto &operation_idx : op_seq.operations())
-    {
-      _operations.at(operation_idx).accept(*this);
-
-      has_dynamic_tensor = has_dynamic_tensor || _return_has_dynamic_tensor;
-    }
-
-    return has_dynamic_tensor;
-  }
+  bool infer(const ir::OpSequence &op_seq);
  
    void dump();
  
  private:
+  bool checkDynamicInput(const ir::Operation &op);
+  void setDynamicOutput(const ir::Operation &op);
+
+private:
    // TODO Define visitors for operations. List them in alphabetic order.
-  void visit(const ir::operation::Abs &op) override;
-  void visit(const ir::operation::Add &op) override;
    void visit(const ir::operation::ArgMax &op) override;
    void visit(const ir::operation::BatchMatMul &op) override;
+  void visit(const ir::operation::BinaryArithmetic &op) override;
    void visit(const ir::operation::BroadcastTo &op) override;
-  void visit(const ir::operation::Cast &op) override;
    void visit(const ir::operation::Comparison &op) override;
    void visit(const ir::operation::Concat &op) override;
    void visit(const ir::operation::Conv2D &op) override;
-  void visit(const ir::operation::Cos &op) override;
-  void visit(const ir::operation::Div &op) override;
-  void visit(const ir::operation::Exp &op) override;
+  void visit(const ir::operation::ElementwiseActivation &op) override;
+  void visit(const ir::operation::ElementwiseBinary &op) override;
+  void visit(const ir::operation::ElementwiseUnary &op) override;
    void visit(const ir::operation::ExpandDims &op) override;
    void visit(const ir::operation::Fill &op) override;
    void visit(const ir::operation::FullyConnected &op) override;
    void visit(const ir::operation::FusedBatchNorm &op) override;
    void visit(const ir::operation::Gather &op) override;
    void visit(const ir::operation::If &op) override;
-  void visit(const ir::operation::Log &op) override;
-  void visit(const ir::operation::LogicalNot &op) override;
-  void visit(const ir::operation::LogicalOr &op) override;
-  void visit(const ir::operation::Logistic &op) override;
    void visit(const ir::operation::L2Normalization &op) override;
    void visit(const ir::operation::MatrixBandPart &op) override;
-  void visit(const ir::operation::Max &op) override;
-  void visit(const ir::operation::Min &op) override;
-  void visit(const ir::operation::Mul &op) override;
-  void visit(const ir::operation::Neg &op) override;
    void visit(const ir::operation::OneHot &op) override;
    void visit(const ir::operation::Pack &op) override;
    void visit(const ir::operation::Pad &op) override;
@@ -113,27 +94,21 @@ private:
    void visit(const ir::operation::Range &op) override;
    void visit(const ir::operation::Reduce &op) override;
    void visit(const ir::operation::Reshape &op) override;
-  void visit(const ir::operation::Round &op) override;
-  void visit(const ir::operation::RSQRT &op) override;
    void visit(const ir::operation::ResizeBilinear &op) override;
    void visit(const ir::operation::Reverse &op) override;
    void visit(const ir::operation::Select &op) override;
    void visit(const ir::operation::Shape &op) override;
-  void visit(const ir::operation::Sin &op) override;
    void visit(const ir::operation::Slice &op) override;
    void visit(const ir::operation::Softmax &op) override;
    void visit(const ir::operation::SpaceToBatchND &op) override;
    void visit(const ir::operation::Split &op) override;
    void visit(const ir::operation::Squeeze &op) override;
    void visit(const ir::operation::StridedSlice &op) override;
-  void visit(const ir::operation::Sub &op) override;
    void visit(const ir::operation::SquaredDifference &op) override;
-  void visit(const ir::operation::Tanh &op) override;
    void visit(const ir::operation::Tile &op) override;
    void visit(const ir::operation::Transpose &op) override;
    void visit(const ir::operation::Unpack &op) override;
    void visit(const ir::operation::While &op) override;
-  void visit(const ir::operation::ZerosLike &op) override;
  
  private:
    /**
@@ -149,7 +124,8 @@ private:
    void handleSimpleUnaryOp(const ir::Operation &op, const ir::OperandIndex input_idx);
  
  private:
-  const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<ir::LoweredGraph>> &_lowered_subgs;
+  const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
+      &_lowered_subgs;
    // _operands and _operations can be changed by controlflow operation
    ir::Operands &_operands;     // operands of current subgraph
    ir::Operations &_operations; // operations of current subgraph
diff --git a/runtime/onert/core/include/exec/DynamicShapeInference.h b/runtime/onert/core/include/exec/DynamicShapeInference.h

index bca80db..6f66596 100644 (file)
--- a/runtime/onert/core/include/exec/DynamicShapeInference.h
+++ b/runtime/onert/core/include/exec/DynamicShapeInference.h
@@ -38,46 +38,34 @@ namespace exec
  class DynamicShapeInferer : public ir::OperationVisitor
  {
  public:
-  DynamicShapeInferer(const ir::Operands &operands, backend::IDynamicTensorManager *tensor_manager,
+  DynamicShapeInferer(const ir::Operands &operands,
                        const std::shared_ptr<backend::ITensorRegistry> &tensor_registry)
-      : _operands(operands), _dynamic_tensor_manager(tensor_manager),
-        _tensor_registry(tensor_registry)
+      : _operands(operands), _tensor_registry(tensor_registry)
    {
      UNUSED_RELEASE(_operands);
-    UNUSED_RELEASE(_dynamic_tensor_manager);
      UNUSED_RELEASE(_tensor_registry);
    }
  
  public:
    // TODO Define visitors for operations. List them in alphabetic order.
    // Remove TODO when any op starting from the alphabet is added
-  void visit(const ir::operation::Abs &op) override;
-  void visit(const ir::operation::Add &op) override;
    void visit(const ir::operation::ArgMax &op) override;
    void visit(const ir::operation::BatchMatMul &op) override;
+  void visit(const ir::operation::BinaryArithmetic &op) override;
    void visit(const ir::operation::BroadcastTo &op) override;
-  void visit(const ir::operation::Cast &op) override;
    void visit(const ir::operation::Comparison &op) override;
    void visit(const ir::operation::Concat &op) override;
    void visit(const ir::operation::Conv2D &op) override;
-  void visit(const ir::operation::Cos &op) override;
-  void visit(const ir::operation::Div &op) override;
-  void visit(const ir::operation::Exp &op) override;
+  void visit(const ir::operation::ElementwiseActivation &op) override;
+  void visit(const ir::operation::ElementwiseBinary &op) override;
+  void visit(const ir::operation::ElementwiseUnary &op) override;
    void visit(const ir::operation::ExpandDims &op) override;
    void visit(const ir::operation::Fill &op) override;
    void visit(const ir::operation::FullyConnected &op) override;
    void visit(const ir::operation::FusedBatchNorm &op) override;
    void visit(const ir::operation::Gather &op) override;
-  void visit(const ir::operation::Log &op) override;
-  void visit(const ir::operation::LogicalNot &op) override;
-  void visit(const ir::operation::LogicalOr &op) override;
-  void visit(const ir::operation::Logistic &op) override;
    void visit(const ir::operation::L2Normalization &op) override;
    void visit(const ir::operation::MatrixBandPart &op) override;
-  void visit(const ir::operation::Max &op) override;
-  void visit(const ir::operation::Min &op) override;
-  void visit(const ir::operation::Mul &op) override;
-  void visit(const ir::operation::Neg &op) override;
    void visit(const ir::operation::OneHot &op) override;
    void visit(const ir::operation::Pack &op) override;
    void visit(const ir::operation::Pad &op) override;
@@ -87,27 +75,21 @@ public:
    void visit(const ir::operation::Range &op) override;
    void visit(const ir::operation::Reduce &op) override;
    void visit(const ir::operation::Reshape &op) override;
-  void visit(const ir::operation::Round &op) override;
-  void visit(const ir::operation::RSQRT &op) override;
    void visit(const ir::operation::ResizeBilinear &op) override;
    void visit(const ir::operation::Reverse &op) override;
    void visit(const ir::operation::Select &op) override;
    void visit(const ir::operation::Shape &op) override;
-  void visit(const ir::operation::Sin &op) override;
    void visit(const ir::operation::Slice &op) override;
    void visit(const ir::operation::Softmax &op) override;
    void visit(const ir::operation::SpaceToBatchND &op) override;
    void visit(const ir::operation::Split &op) override;
    void visit(const ir::operation::Squeeze &op) override;
    void visit(const ir::operation::StridedSlice &op) override;
-  void visit(const ir::operation::Sub &op) override;
    void visit(const ir::operation::SquaredDifference &op) override;
-  void visit(const ir::operation::Tanh &op) override;
    void visit(const ir::operation::Tile &op) override;
    void visit(const ir::operation::Transpose &op) override;
    void visit(const ir::operation::Unpack &op) override;
    // TODO write op starting from V
-  void visit(const ir::operation::ZerosLike &op) override;
  
  private:
    /**
@@ -127,11 +109,6 @@ private:
     */
    const ir::Operands &_operands;
    /**
-   * @brief To allocate memory for output tensor if needed
-   */
-  // TODO Remove this, as it is no longer used
-  backend::IDynamicTensorManager *_dynamic_tensor_manager;
-  /**
     * @brief To get tensor object and access tensor-level info, e.g., ITensor::buffer()
     */
    std::shared_ptr<backend::ITensorRegistry> _tensor_registry;
diff --git a/runtime/onert/core/include/exec/IExecutor.h b/runtime/onert/core/include/exec/IExecutor.h

index 46e05a2..6c8bab6 100644 (file)
--- a/runtime/onert/core/include/exec/IExecutor.h
+++ b/runtime/onert/core/include/exec/IExecutor.h
@@ -80,8 +80,6 @@ struct DynAllocInfo
  {
    /// @brief index of input tensor whose memory needs to be allocated at execution time
    ir::OperandIndex ind;
-  /// @brief dynamic tensor manager that can allocate memory when input tensor is dynamic
-  backend::IDynamicTensorManager *dyn_tensor_manager;
  };
  
  using DynAllocInfoMap = std::unordered_map<std::shared_ptr<backend::ITensor>, DynAllocInfo>;
diff --git a/runtime/onert/core/include/exec/IODescription.h b/runtime/onert/core/include/exec/IODescription.h

index c10c367..d1810ec 100644 (file)
--- a/runtime/onert/core/include/exec/IODescription.h
+++ b/runtime/onert/core/include/exec/IODescription.h
@@ -62,8 +62,8 @@ struct IODescription
  {
    std::vector<std::unique_ptr<InputDesc>> inputs;
    std::vector<std::unique_ptr<OutputDesc>> outputs;
-  // Contains shape of input set by set_input_tensorinfo
-  std::unordered_map<ir::IOIndex, ir::Shape> input_shape_signature;
+  // Contains shape of input set by nnfw_set_input_tensorinfo(..)
+  std::unordered_map<ir::IOIndex, ir::Shape> dynamic_input_shapes;
  };
  
  } // namespace exec
diff --git a/runtime/onert/core/include/ir/Graph.h b/runtime/onert/core/include/ir/Graph.h

index fb956fe..2103e6e 100644 (file)
--- a/runtime/onert/core/include/ir/Graph.h
+++ b/runtime/onert/core/include/ir/Graph.h
@@ -60,8 +60,8 @@ public:
    OperandIndex addOperand(const Shape &shape, const TypeInfo &type);
    OperationIndex addOperation(std::unique_ptr<Operation> &&node);
    void setOperandValue(const OperandIndex &ind, std::shared_ptr<Data> data);
-  void addInput(const OperandIndex &ind);
-  void addOutput(const OperandIndex &ind);
+  void addInput(const OperandIndex &ind, const std::string &name = "");
+  void addOutput(const OperandIndex &ind, const std::string &name = "");
    void finishBuilding(void);
    void removeOperand(const OperandIndex &ind) { _operands.remove(ind); }
    bool isBuildingPhase(void) const { return _phase == Phase::BUILDING; }
@@ -94,6 +94,8 @@ public:
    OperandIndexSequence &getInputs() { return _inputs; }
    const OperandIndexSequence &getOutputs() const { return _outputs; }
    OperandIndexSequence &getOutputs() { return _outputs; }
+  IOIndex getInputIndex(const std::string &name) const;
+  IOIndex getOutputIndex(const std::string &name) const;
    const Operands &operands() const { return _operands; }
    Operands &operands() { return _operands; } // TODO Remove this non-const accessor
    const Operations &operations() const { return _operations; }
@@ -108,6 +110,8 @@ private:
    Operands _operands;
    OperandIndexSequence _inputs;
    OperandIndexSequence _outputs;
+  std::unordered_map<std::string, IOIndex> _name_to_input;
+  std::unordered_map<std::string, IOIndex> _name_to_output;
    // Child subgraphs
    std::shared_ptr<Subgraphs> _subgraphs;
    // TFLite and circle's default layout is NHWC;
diff --git a/runtime/onert/core/include/ir/InternalType.h b/runtime/onert/core/include/ir/InternalType.h

index e42db72..1d962c1 100644 (file)
--- a/runtime/onert/core/include/ir/InternalType.h
+++ b/runtime/onert/core/include/ir/InternalType.h
@@ -40,6 +40,12 @@ struct Stride
    uint32_t horizontal;
  };
  
+struct Dilation
+{
+  uint32_t width_factor;
+  uint32_t height_factor;
+};
+
  } // namespace ir
  } // namespace onert
  
diff --git a/runtime/onert/core/include/ir/LoweredGraph.h b/runtime/onert/core/include/ir/LoweredGraph.h

deleted file mode 100644 (file)

index d6583df..0000000
--- a/runtime/onert/core/include/ir/LoweredGraph.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_LOWERED_GRAPH_H__
-#define __ONERT_IR_LOWERED_GRAPH_H__
-
-#include "ir/Graph.h"
-#include "ir/LowerInfoMap.h"
-#include "ir/OpSequences.h"
-#include "compiler/BackendResolver.h"
-#include "compiler/Compiler.h"
-
-namespace onert
-{
-namespace ir
-{
-
-/**
- * @brief Class that contains lowering information on graph.
- *        In addition, after lowering, operands in graph will be set to "dynamic"
- *        if the shape of output of an operation cannot be decided at compilation time.
- */
-class LoweredGraph
-{
-public:
-  LoweredGraph(const Graph &graph, const compiler::CompilerOptions &options);
-
-  Graph &graph() { return _graph; }
-  const Graph &graph() const { return _graph; }
-  const LowerInfoMap *getLowerInfo() const { return &_lower_info_map; }
-  const operation::LowerInfo *getLowerInfo(const OpSequenceIndex &op_seq_index) const;
-  void setLowerInfo(const OpSequenceIndex &op_seq_index,
-                    std::unique_ptr<operation::LowerInfo> &&lower_info);
-  void removeLowerInfo(const OpSequenceIndex &op_seq_index);
-  const operand::LowerInfo *getLowerInfo(const OperandIndex &index) const;
-  operand::LowerInfo *getLowerInfo(const OperandIndex &index);
-  void setLowerInfo(const OperandIndex &index, std::unique_ptr<operand::LowerInfo> &&lower_info);
-  void removeLowerInfo(const OperandIndex &index);
-  OpSequences &op_seqs() { return _op_seqs; }
-  const OpSequences &op_seqs() const { return _op_seqs; }
-  void iterateTopolOpSeqs(
-      const std::function<void(const OpSequenceIndex &, const OpSequence &)> &fn) const;
-  void iterateTopolOpSeqs(const std::function<void(const OpSequenceIndex &, OpSequence &)> &fn);
-  const backend::BackendContexts &backend_contexts() { return _backend_contexts; }
-  const backend::BackendContexts &backend_contexts() const { return _backend_contexts; }
-  std::shared_ptr<ir::OperationIndexMap<int64_t>> indexed_ranks() { return _indexed_ranks; }
-
-private:
-  void makeOpSequences(OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info,
-                       const compiler::CompilerOptions &options,
-                       const compiler::BackendResolver &backend_resolver);
-
-  void
-  manipulateLowerInfo(OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info,
-                      bool is_primary);
-  void dumpLowerInfo();
-  bool mergeable(const OpSequenceIndex &op_seq_index, const OperationIndex &node_index,
-                 Layout layout, const compiler::BackendResolver &backend_resolver);
-  OpSequenceIndex appendFreshSingleOpSequence(const OperationIndex &node_index,
-                                              const Operation &node);
-
-private:
-  Graph _graph;
-  backend::BackendContexts _backend_contexts;
-  std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
-  LowerInfoMap _lower_info_map;
-  // Pass(for Perm) can accept only graph so that Graph has OpSequences as a member
-  OpSequences _op_seqs;
-};
-
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_LOWERED_GRAPH_H__
diff --git a/runtime/onert/core/include/ir/OpSequences.h b/runtime/onert/core/include/ir/OpSequences.h

index 6ed8499..ab258f3 100644 (file)
--- a/runtime/onert/core/include/ir/OpSequences.h
+++ b/runtime/onert/core/include/ir/OpSequences.h
@@ -63,13 +63,6 @@ public:
     */
    OpSequenceIndex getOperation(const OperationIndex &operation_index) const;
    /**
-   * @brief Dump OpSequences
-   *
-   * @param msg Message that will be displayed
-   * @param graph Graph that has information used for dump
-   */
-  void dump(const std::string &msg, const Operations &operations) const;
-  /**
     * @brief Remove an operation from OpSequence
     *
     * @param operation_index Operation index to be removed
@@ -84,6 +77,14 @@ private:
    mutable std::unordered_map<OperationIndex, OpSequenceIndex> _seq_indexes;
  };
  
+/**
+ * @brief Dump OpSequences
+ *
+ * @param op_seqs Operation Sequences
+ * @param operations Operation context
+ */
+void dumpOpSequences(const OpSequences &op_seqs, const Operations &operations);
+
  } // namespace ir
  } // namespace onert
  
diff --git a/runtime/onert/core/include/ir/Operations.Include.h b/runtime/onert/core/include/ir/Operations.Include.h

index 30c4ff2..17bbbc2 100644 (file)
--- a/runtime/onert/core/include/ir/Operations.Include.h
+++ b/runtime/onert/core/include/ir/Operations.Include.h
@@ -17,10 +17,10 @@
  // This file has no ifdef guard intentionally
  
  #include "ir/operation/BatchToSpaceND.h"
+#include "ir/operation/BinaryArithmetic.h"
  #include "ir/operation/BroadcastTo.h"
  #include "ir/operation/Conv2D.h"
-#include "ir/operation/MaxPool2D.h"
-#include "ir/operation/AvgPool2D.h"
+#include "ir/operation/Pool2D.h"
  #include "ir/operation/Concat.h"
  #include "ir/operation/Reshape.h"
  #include "ir/operation/Fill.h"
@@ -29,51 +29,32 @@
  #include "ir/operation/Transpose.h"
  #include "ir/operation/Permute.h"
  #include "ir/operation/Reduce.h"
-#include "ir/operation/Add.h"
-#include "ir/operation/Sub.h"
  #include "ir/operation/DepthwiseConv2D.h"
  #include "ir/operation/Slice.h"
  #include "ir/operation/StridedSlice.h"
-#include "ir/operation/Mul.h"
  #include "ir/operation/Squeeze.h"
-#include "ir/operation/Tanh.h"
-#include "ir/operation/Log.h"
-#include "ir/operation/Logistic.h"
-#include "ir/operation/Cast.h"
-#include "ir/operation/Div.h"
-#include "ir/operation/Exp.h"
+#include "ir/operation/ElementwiseActivation.h"
+#include "ir/operation/ElementwiseBinary.h"
+#include "ir/operation/ElementwiseUnary.h"
  #include "ir/operation/ExpandDims.h"
  #include "ir/operation/Comparison.h"
-#include "ir/operation/LogicalAnd.h"
-#include "ir/operation/LogicalOr.h"
-#include "ir/operation/LogicalNot.h"
  #include "ir/operation/LSTM.h"
-#include "ir/operation/RSQRT.h"
-#include "ir/operation/ReLU.h"
  #include "ir/operation/ResizeBilinear.h"
-#include "ir/operation/ReLU1.h"
-#include "ir/operation/ReLU6.h"
+#include "ir/operation/ResizeNearestNeighbor.h"
  #include "ir/operation/Reverse.h"
  #include "ir/operation/RNN.h"
-#include "ir/operation/Round.h"
-#include "ir/operation/Floor.h"
  #include "ir/operation/SpaceToBatchND.h"
  #include "ir/operation/SpaceToDepth.h"
-#include "ir/operation/L2Pool2D.h"
  #include "ir/operation/EmbeddingLookup.h"
  #include "ir/operation/L2Normalization.h"
  #include "ir/operation/HashtableLookup.h"
  #include "ir/operation/InstanceNorm.h"
  #include "ir/operation/PReLU.h"
  #include "ir/operation/TransposeConv.h"
-#include "ir/operation/SQRT.h"
  #include "ir/operation/SquaredDifference.h"
  #include "ir/operation/TopKV2.h"
  #include "ir/operation/Gather.h"
-#include "ir/operation/Neg.h"
-#include "ir/operation/Abs.h"
  #include "ir/operation/ArgMax.h"
-#include "ir/operation/Dequantize.h"
  #include "ir/operation/LocalResponseNormalization.h"
  #include "ir/operation/DepthToSpace.h"
  #include "ir/operation/Pack.h"
@@ -82,27 +63,22 @@
  #include "ir/operation/SplitV.h"
  #include "ir/operation/Unpack.h"
  #include "ir/operation/Pad.h"
-#include "ir/operation/Min.h"
-#include "ir/operation/Max.h"
  #include "ir/operation/Custom.h"
  #include "ir/operation/Einsum.h"
  #include "ir/operation/OneHot.h"
-#include "ir/operation/Cos.h"
-#include "ir/operation/Sin.h"
  #include "ir/operation/Shape.h"
  #include "ir/operation/ConvertFp32ToFp16.h"
  #include "ir/operation/ConvertFp16ToFp32.h"
  #include "ir/operation/If.h"
  #include "ir/operation/While.h"
  #include "ir/operation/Pow.h"
-#include "ir/operation/ZerosLike.h"
  #include "ir/operation/Tile.h"
  #include "ir/operation/Range.h"
+#include "ir/operation/Rank.h"
  #include "ir/operation/BCQFullyConnected.h"
  #include "ir/operation/BCQGather.h"
  #include "ir/operation/MatrixBandPart.h"
  #include "ir/operation/BatchMatMul.h"
  #include "ir/operation/FusedBatchNorm.h"
  #include "ir/operation/LogSoftmax.h"
-#include "ir/operation/Quantize.h"
  #include "ir/operation/StatelessRandomUniform.h"
diff --git a/runtime/onert/core/include/ir/Operations.lst b/runtime/onert/core/include/ir/Operations.lst

index 75c6d82..ab21468 100644 (file)
--- a/runtime/onert/core/include/ir/Operations.lst
+++ b/runtime/onert/core/include/ir/Operations.lst
@@ -19,62 +19,44 @@
  #endif
  
  // Internal Name
-OP(Add)
-OP(Sub)
  OP(BatchToSpaceND)
+OP(BinaryArithmetic)
  OP(BroadcastTo)
-OP(Cast)
  OP(Conv2D)
  OP(DepthwiseConv2D)
-OP(AvgPool2D)
-OP(MaxPool2D)
+OP(Pool2D)
  OP(Concat)
  OP(Fill)
  OP(FullyConnected)
  OP(Reduce)
  OP(Reshape)
-OP(Mul)
  OP(Softmax)
  OP(Squeeze)
  OP(Slice)
  OP(StridedSlice)
-OP(Tanh)
-OP(Logistic)
-OP(Div)
  OP(Transpose)
-OP(Exp)
+OP(ElementwiseActivation)
+OP(ElementwiseBinary)
+OP(ElementwiseUnary)
  OP(ExpandDims)
  OP(Comparison)
-OP(LogicalAnd)
-OP(LogicalOr)
-OP(LogicalNot)
  OP(LSTM)
-OP(RSQRT)
-OP(ReLU)
  OP(ResizeBilinear)
-OP(ReLU1)
-OP(ReLU6)
+OP(ResizeNearestNeighbor)
  OP(Reverse)
  OP(RNN)
-OP(Round)
-OP(Floor)
  OP(SpaceToBatchND)
  OP(SpaceToDepth)
-OP(L2Pool2D)
  OP(EmbeddingLookup)
  OP(L2Normalization)
  OP(HashtableLookup)
  OP(InstanceNorm)
  OP(PReLU)
  OP(TransposeConv)
-OP(SQRT)
  OP(SquaredDifference)
  OP(TopKV2)
  OP(Gather)
-OP(Neg)
-OP(Abs)
  OP(ArgMax)
-OP(Dequantize)
  OP(Einsum)
  OP(LocalResponseNormalization)
  OP(DepthToSpace)
@@ -86,26 +68,20 @@ OP(Unpack)
  OP(Pad)
  OP(Custom)
  OP(Permute)
-OP(Min)
-OP(Max)
  OP(OneHot)
-OP(Cos)
-OP(Sin)
  OP(Shape)
  OP(ConvertFp32ToFp16)
  OP(ConvertFp16ToFp32)
  OP(If)
  OP(While)
-OP(Log)
  OP(Pow)
-OP(ZerosLike)
  OP(Tile)
  OP(Range)
+OP(Rank)
  OP(BCQFullyConnected)
  OP(BCQGather)
  OP(MatrixBandPart)
  OP(BatchMatMul)
  OP(FusedBatchNorm)
  OP(LogSoftmax)
-OP(Quantize)
  OP(StatelessRandomUniform)
diff --git a/runtime/onert/core/include/ir/Padding.h b/runtime/onert/core/include/ir/Padding.h

index b905391..8a7bcdb 100644 (file)
--- a/runtime/onert/core/include/ir/Padding.h
+++ b/runtime/onert/core/include/ir/Padding.h
@@ -65,7 +65,8 @@ struct Padding
  // TODO Change to Padding struct's method
  const ExplicitPadding calculatePadding(const Padding &padding, const FeatureShape &ifm_shape,
                                         const FeatureShape &ofm_shape, const Stride &stride,
-                                       uint32_t kw, uint32_t kh);
+                                       uint32_t kw, uint32_t kh, uint32_t dwf = 1,
+                                       uint32_t dhf = 1);
  
  } // namespace ir
  } // namespace onert
diff --git a/runtime/onert/core/include/ir/operation/Abs.h b/runtime/onert/core/include/ir/operation/Abs.h

deleted file mode 100644 (file)

index 9126c00..0000000
--- a/runtime/onert/core/include/ir/operation/Abs.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_ABS_H__
-#define __ONERT_IR_OPERATION_ABS_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Abs : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  Abs(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Abs; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_ABS_H__
diff --git a/runtime/onert/core/include/ir/operation/Add.h b/runtime/onert/core/include/ir/operation/BinaryArithmetic.h

similarity index 62%

rename from runtime/onert/core/include/ir/operation/Add.h

rename to runtime/onert/core/include/ir/operation/BinaryArithmetic.h

index 5f5f4e0..110fff5 100644 (file)
--- a/runtime/onert/core/include/ir/operation/Add.h
+++ b/runtime/onert/core/include/ir/operation/BinaryArithmetic.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -14,8 +14,8 @@
   * limitations under the License.
   */
  
-#ifndef __ONERT_IR_OPERATION_ADD_H__
-#define __ONERT_IR_OPERATION_ADD_H__
+#ifndef __ONERT_IR_OPERATION_BINARY_ARITHMETIC_H__
+#define __ONERT_IR_OPERATION_BINARY_ARITHMETIC_H__
  
  #include "ir/Operation.h"
  #include "ir/InternalType.h"
@@ -27,7 +27,7 @@ namespace ir
  namespace operation
  {
  
-class Add : public Operation
+class BinaryArithmetic final : public Operation
  {
  public:
    enum Input
@@ -36,17 +36,28 @@ public:
      RHS
    };
  
+  enum class ArithmeticType
+  {
+    ADD,
+    SUB,
+    MUL,
+    DIV
+  };
+
    struct Param
    {
+    ArithmeticType arithmetic_type;
      Activation activation;
    };
  
  public:
-  Add(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
+  BinaryArithmetic(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+                   const Param &param);
  
  public:
    void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Add; }
+  std::string name() const override;
+  OpCode opcode() const final { return OpCode::BinaryArithmetic; }
  
  public:
    const Param &param() const { return _param; }
@@ -59,4 +70,4 @@ private:
  } // namespace ir
  } // namespace onert
  
-#endif // __ONERT_IR_OPERATION_ADD_H__
+#endif // __ONERT_IR_OPERATION_BINARY_ARITHMETIC_H__
diff --git a/runtime/onert/core/include/ir/operation/BroadcastTo.h b/runtime/onert/core/include/ir/operation/BroadcastTo.h

index 98906ad..06c0334 100644 (file)
--- a/runtime/onert/core/include/ir/operation/BroadcastTo.h
+++ b/runtime/onert/core/include/ir/operation/BroadcastTo.h
@@ -42,7 +42,7 @@ public:
  
  public:
    void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Cast; }
+  OpCode opcode() const final { return OpCode::BroadcastTo; }
  };
  
  } // namespace operation
diff --git a/runtime/onert/core/include/ir/operation/Cast.h b/runtime/onert/core/include/ir/operation/Cast.h

deleted file mode 100644 (file)

index 6fb8c10..0000000
--- a/runtime/onert/core/include/ir/operation/Cast.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_CAST_H__
-#define __ONERT_IR_OPERATION_CAST_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Cast : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  Cast(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Cast; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_CAST_H__
diff --git a/runtime/onert/core/include/ir/operation/Conv2D.h b/runtime/onert/core/include/ir/operation/Conv2D.h

index e23bf3e..d8c7b67 100644 (file)
--- a/runtime/onert/core/include/ir/operation/Conv2D.h
+++ b/runtime/onert/core/include/ir/operation/Conv2D.h
@@ -45,6 +45,7 @@ public:
      Stride stride;
      Padding padding;
      Activation activation;
+    Dilation dilation;
    };
  
  public:
diff --git a/runtime/onert/core/include/ir/operation/Dequantize.h b/runtime/onert/core/include/ir/operation/Dequantize.h

deleted file mode 100644 (file)

index 97a08b3..0000000
--- a/runtime/onert/core/include/ir/operation/Dequantize.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_DEQUANTIZE_H__
-#define __ONERT_IR_OPERATION_DEQUANTIZE_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Dequantize : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  Dequantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Dequantize; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_DEQUANTIZE_H__
diff --git a/runtime/onert/core/include/ir/operation/Einsum.h b/runtime/onert/core/include/ir/operation/Einsum.h

index a3426cc..9892c24 100644 (file)
--- a/runtime/onert/core/include/ir/operation/Einsum.h
+++ b/runtime/onert/core/include/ir/operation/Einsum.h
@@ -41,7 +41,7 @@ public:
  
  public:
    void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Add; }
+  OpCode opcode() const final { return OpCode::Einsum; }
  
  public:
    const Param &param() const { return _param; }
diff --git a/runtime/onert/core/include/ir/operation/Div.h b/runtime/onert/core/include/ir/operation/ElementwiseActivation.h

similarity index 54%

rename from runtime/onert/core/include/ir/operation/Div.h

rename to runtime/onert/core/include/ir/operation/ElementwiseActivation.h

index a7ec1c4..b2a1d3d 100644 (file)
--- a/runtime/onert/core/include/ir/operation/Div.h
+++ b/runtime/onert/core/include/ir/operation/ElementwiseActivation.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -14,11 +14,10 @@
   * limitations under the License.
   */
  
-#ifndef __ONERT_IR_OPERATION_DIV_H__
-#define __ONERT_IR_OPERATION_DIV_H__
+#ifndef __ONERT_IR_OPERATION_ELEMENTWISE_ACTIVATION_H__
+#define __ONERT_IR_OPERATION_ELEMENTWISE_ACTIVATION_H__
  
  #include "ir/Operation.h"
-#include "ir/InternalType.h"
  
  namespace onert
  {
@@ -27,30 +26,46 @@ namespace ir
  namespace operation
  {
  
-class Div : public Operation
+class ElementwiseActivation : public Operation
  {
  public:
    enum Input
    {
-    LHS = 0,
-    RHS
+    INPUT = 0
+  };
+
+  enum class Type
+  {
+    ELU,
+    LOGISTIC,
+    RELU,
+    TANH,
+    LEAKY_RELU
    };
  
    struct Param
    {
-    Activation activation;
+    Type op_type;
+    float alpha;
+    float beta;
+    Param() : op_type(Type::ELU), alpha(0.0f), beta(0.0f) {}
    };
  
  public:
-  Div(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
+  ElementwiseActivation(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+                        const Param &param);
  
  public:
    void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Div; }
+  std::string name() const override;
+  OpCode opcode() const final { return OpCode::ElementwiseActivation; }
  
  public:
    const Param &param() const { return _param; }
  
+public:
+  static float infinity;
+
  private:
    Param _param;
  };
@@ -59,4 +74,4 @@ private:
  } // namespace ir
  } // namespace onert
  
-#endif // __ONERT_IR_OPERATION_DIV_H__
+#endif // __ONERT_IR_OPERATION_ELEMENTWISE_ACTIVATION_H__
diff --git a/runtime/onert/core/include/ir/operation/Mul.h b/runtime/onert/core/include/ir/operation/ElementwiseBinary.h

similarity index 60%

rename from runtime/onert/core/include/ir/operation/Mul.h

rename to runtime/onert/core/include/ir/operation/ElementwiseBinary.h

index 0f01b0e..dd07f60 100644 (file)
--- a/runtime/onert/core/include/ir/operation/Mul.h
+++ b/runtime/onert/core/include/ir/operation/ElementwiseBinary.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -14,11 +14,10 @@
   * limitations under the License.
   */
  
-#ifndef __ONERT_IR_OPERATION_MUL_H__
-#define __ONERT_IR_OPERATION_MUL_H__
+#ifndef __ONERT_IR_OPERATION_ELEMENTWISEBINARY_H__
+#define __ONERT_IR_OPERATION_ELEMENTWISEBINARY_H__
  
  #include "ir/Operation.h"
-#include "ir/InternalType.h"
  
  namespace onert
  {
@@ -27,7 +26,7 @@ namespace ir
  namespace operation
  {
  
-class Mul : public Operation
+class ElementwiseBinary : public Operation
  {
  public:
    enum Input
@@ -36,17 +35,27 @@ public:
      RHS
    };
  
+  enum class ElementwiseBinaryType
+  {
+    LOGICAL_AND,
+    LOGICAL_OR,
+    MAX,
+    MIN
+  };
+
    struct Param
    {
-    Activation activation;
+    ElementwiseBinaryType op_type;
    };
  
  public:
-  Mul(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
+  ElementwiseBinary(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+                    const Param &param);
  
  public:
    void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Mul; }
+  std::string name() const override;
+  OpCode opcode() const final { return OpCode::ElementwiseBinary; }
  
  public:
    const Param &param() const { return _param; }
@@ -59,4 +68,4 @@ private:
  } // namespace ir
  } // namespace onert
  
-#endif // __ONERT_IR_OPERATION_MUL_H__
+#endif // __ONERT_IR_OPERATION_ELEMENTWISEBINARY_H__
diff --git a/runtime/onert/core/include/ir/operation/MaxPool2D.h b/runtime/onert/core/include/ir/operation/ElementwiseUnary.h

similarity index 56%

rename from runtime/onert/core/include/ir/operation/MaxPool2D.h

rename to runtime/onert/core/include/ir/operation/ElementwiseUnary.h

index 300f7cb..c40778a 100644 (file)
--- a/runtime/onert/core/include/ir/operation/MaxPool2D.h
+++ b/runtime/onert/core/include/ir/operation/ElementwiseUnary.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -14,14 +14,10 @@
   * limitations under the License.
   */
  
-#ifndef __ONERT_IR_OPERATION_MAXPOOL2D_H__
-#define __ONERT_IR_OPERATION_MAXPOOL2D_H__
-
-#include <memory>
+#ifndef __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__
+#define __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__
  
  #include "ir/Operation.h"
-#include "ir/InternalType.h"
-#include "ir/Padding.h"
  
  namespace onert
  {
@@ -30,7 +26,7 @@ namespace ir
  namespace operation
  {
  
-class MaxPool2D : public Operation
+class ElementwiseUnary : public Operation
  {
  public:
    enum Input
@@ -38,22 +34,40 @@ public:
      INPUT = 0
    };
  
+  enum class Type
+  {
+    ABS,
+    CAST,
+    COS,
+    DEQUANTIZE,
+    ERF,
+    EXP,
+    FLOOR,
+    LOG,
+    LOGICAL_NOT,
+    NEG,
+    QUANTIZE,
+    ROUND,
+    RSQRT,
+    SIN,
+    SQRT,
+    SQURE,
+    ZEROS_LIKE
+  };
+
    struct Param
    {
-    uint32_t kh;
-    uint32_t kw;
-    Stride stride;
-    Padding padding;
-    Activation activation;
+    Type op_type;
    };
  
  public:
-  MaxPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
-            const Param &param);
+  ElementwiseUnary(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+                   const Param &param);
  
  public:
    void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::MaxPool2D; }
+  std::string name() const override;
+  OpCode opcode() const final { return OpCode::ElementwiseUnary; }
  
  public:
    const Param &param() const { return _param; }
@@ -66,4 +80,4 @@ private:
  } // namespace ir
  } // namespace onert
  
-#endif // __ONERT_IR_OPERATION_MAXPOOL2D_H__
+#endif // __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__
diff --git a/runtime/onert/core/include/ir/operation/Exp.h b/runtime/onert/core/include/ir/operation/Exp.h

deleted file mode 100644 (file)

index 2e68ff0..0000000
--- a/runtime/onert/core/include/ir/operation/Exp.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_EXP_H__
-#define __ONERT_IR_OPERATION_EXP_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Exp : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  Exp(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Exp; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_EXP_H__
diff --git a/runtime/onert/core/include/ir/operation/Floor.h b/runtime/onert/core/include/ir/operation/Floor.h

deleted file mode 100644 (file)

index b34699c..0000000
--- a/runtime/onert/core/include/ir/operation/Floor.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_FLOOR_H__
-#define __ONERT_IR_OPERATION_FLOOR_H__
-
-#include <memory>
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Floor : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  Floor(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Floor; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_FLOOR_H__
diff --git a/runtime/onert/core/include/ir/operation/Log.h b/runtime/onert/core/include/ir/operation/Log.h

deleted file mode 100644 (file)

index a6e3ca3..0000000
--- a/runtime/onert/core/include/ir/operation/Log.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_LOG_H__
-#define __ONERT_IR_OPERATION_LOG_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Log : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  Log(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Log; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_LOG_H__
diff --git a/runtime/onert/core/include/ir/operation/LogicalAnd.h b/runtime/onert/core/include/ir/operation/LogicalAnd.h

deleted file mode 100644 (file)

index dc853b6..0000000
--- a/runtime/onert/core/include/ir/operation/LogicalAnd.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_LOGICAL_AND_H__
-#define __ONERT_IR_OPERATION_LOGICAL_AND_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class LogicalAnd : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT0 = 0,
-    INPUT1 = 1,
-  };
-
-public:
-  LogicalAnd(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::LogicalAnd; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_LOGICAL_AND_H__
diff --git a/runtime/onert/core/include/ir/operation/LogicalNot.h b/runtime/onert/core/include/ir/operation/LogicalNot.h

deleted file mode 100644 (file)

index 9519f6d..0000000
--- a/runtime/onert/core/include/ir/operation/LogicalNot.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_LOGICAL_NOT_H__
-#define __ONERT_IR_OPERATION_LOGICAL_NOT_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class LogicalNot : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0,
-  };
-
-public:
-  LogicalNot(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::LogicalNot; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_LOGICAL_NOT_H__
diff --git a/runtime/onert/core/include/ir/operation/LogicalOr.h b/runtime/onert/core/include/ir/operation/LogicalOr.h

deleted file mode 100644 (file)

index c4b658c..0000000
--- a/runtime/onert/core/include/ir/operation/LogicalOr.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_LOGICAL_OR_H__
-#define __ONERT_IR_OPERATION_LOGICAL_OR_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class LogicalOr : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT0 = 0,
-    INPUT1 = 1,
-  };
-
-public:
-  LogicalOr(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::LogicalOr; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_LOGICAL_OR_H__
diff --git a/runtime/onert/core/include/ir/operation/Logistic.h b/runtime/onert/core/include/ir/operation/Logistic.h

deleted file mode 100644 (file)

index 5421e1c..0000000
--- a/runtime/onert/core/include/ir/operation/Logistic.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_LOGISTIC_H__
-#define __ONERT_IR_OPERATION_LOGISTIC_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Logistic : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  Logistic(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Logistic; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_LOGISTIC_H__
diff --git a/runtime/onert/core/include/ir/operation/Max.h b/runtime/onert/core/include/ir/operation/Max.h

deleted file mode 100644 (file)

index df72d3a..0000000
--- a/runtime/onert/core/include/ir/operation/Max.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_MAX_H__
-#define __ONERT_IR_OPERATION_MAX_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Max : public Operation
-{
-public:
-  enum Input
-  {
-    LHS = 0,
-    RHS
-  };
-
-public:
-  Max(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Max; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_MAX_H__
diff --git a/runtime/onert/core/include/ir/operation/Mean.h b/runtime/onert/core/include/ir/operation/Mean.h

deleted file mode 100644 (file)

index ce2da90..0000000
--- a/runtime/onert/core/include/ir/operation/Mean.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_MEAN_H__
-#define __ONERT_IR_OPERATION_MEAN_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Mean : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT,
-    AXES
-  };
-
-  struct Param
-  {
-    bool keep_dims;
-  };
-
-public:
-  Mean(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Mean; }
-
-public:
-  const Param &param() const { return _param; }
-
-private:
-  Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_MEAN_H__
diff --git a/runtime/onert/core/include/ir/operation/Min.h b/runtime/onert/core/include/ir/operation/Min.h

deleted file mode 100644 (file)

index 117301c..0000000
--- a/runtime/onert/core/include/ir/operation/Min.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_MIN_H__
-#define __ONERT_IR_OPERATION_MIN_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Min : public Operation
-{
-public:
-  enum Input
-  {
-    LHS = 0,
-    RHS
-  };
-
-public:
-  Min(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Min; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_MIN_H__
diff --git a/runtime/onert/core/include/ir/operation/Neg.h b/runtime/onert/core/include/ir/operation/Neg.h

deleted file mode 100644 (file)

index f8123c4..0000000
--- a/runtime/onert/core/include/ir/operation/Neg.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_NEG_H__
-#define __ONERT_IR_OPERATION_NEG_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Neg : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  Neg(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Neg; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_NEG_H__
diff --git a/runtime/onert/core/include/ir/operation/AvgPool2D.h b/runtime/onert/core/include/ir/operation/Pool2D.h

similarity index 69%

rename from runtime/onert/core/include/ir/operation/AvgPool2D.h

rename to runtime/onert/core/include/ir/operation/Pool2D.h

index d5b300a..22425b4 100644 (file)
--- a/runtime/onert/core/include/ir/operation/AvgPool2D.h
+++ b/runtime/onert/core/include/ir/operation/Pool2D.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -14,8 +14,8 @@
   * limitations under the License.
   */
  
-#ifndef __ONERT_IR_OPERATION_AVGPOOL2D_H__
-#define __ONERT_IR_OPERATION_AVGPOOL2D_H__
+#ifndef __ONERT_IR_OPERATION_POOL2D_H__
+#define __ONERT_IR_OPERATION_POOL2D_H__
  
  #include <memory>
  
@@ -30,7 +30,7 @@ namespace ir
  namespace operation
  {
  
-class AvgPool2D : public Operation
+class Pool2D : public Operation
  {
  public:
    enum Input
@@ -38,23 +38,31 @@ public:
      INPUT = 0
    };
  
+  enum class PoolType
+  {
+    AVG,
+    L2,
+    MAX,
+  };
+
    struct Param
    {
+    PoolType op_type;
      uint32_t kh;
      uint32_t kw;
-
      Stride stride;
      Padding padding;
      Activation activation;
    };
  
  public:
-  AvgPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
-            const Param &param);
+  Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+         const Param &param);
  
  public:
    void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::AvgPool2D; }
+  std::string name() const override;
+  OpCode opcode() const final { return OpCode::Pool2D; }
  
  public:
    const Param &param() const { return _param; }
@@ -67,4 +75,4 @@ private:
  } // namespace ir
  } // namespace onert
  
-#endif // __ONERT_IR_OPERATION_AVGPOOL2D_H__
+#endif // __ONERT_IR_OPERATION_POOL2D_H__
diff --git a/runtime/onert/core/include/ir/operation/Quantize.h b/runtime/onert/core/include/ir/operation/Quantize.h

deleted file mode 100644 (file)

index 2533ce4..0000000
--- a/runtime/onert/core/include/ir/operation/Quantize.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_QUANTIZE_H__
-#define __ONERT_IR_OPERATION_QUANTIZE_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Quantize : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0,
-  };
-
-public:
-  Quantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Quantize; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_QUANTIZE_H__
diff --git a/runtime/onert/core/include/ir/operation/RSQRT.h b/runtime/onert/core/include/ir/operation/RSQRT.h

deleted file mode 100644 (file)

index 64bb4f1..0000000
--- a/runtime/onert/core/include/ir/operation/RSQRT.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_RSQRT_H__
-#define __ONERT_IR_OPERATION_RSQRT_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class RSQRT : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  RSQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::RSQRT; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_RSQRT_H__
diff --git a/runtime/onert/core/include/ir/operation/Cos.h b/runtime/onert/core/include/ir/operation/Rank.h

similarity index 75%

rename from runtime/onert/core/include/ir/operation/Cos.h

rename to runtime/onert/core/include/ir/operation/Rank.h

index a6d7851..2fd24ce 100644 (file)
--- a/runtime/onert/core/include/ir/operation/Cos.h
+++ b/runtime/onert/core/include/ir/operation/Rank.h
@@ -14,8 +14,10 @@
   * limitations under the License.
   */
  
-#ifndef __ONERT_IR_OPERATION_COS_H__
-#define __ONERT_IR_OPERATION_COS_H__
+#ifndef __ONERT_IR_OPERATION_RANK_H__
+#define __ONERT_IR_OPERATION_RANK_H__
+
+#include <memory>
  
  #include "ir/Operation.h"
  
@@ -26,7 +28,7 @@ namespace ir
  namespace operation
  {
  
-class Cos : public Operation
+class Rank : public Operation
  {
  public:
    enum Input
@@ -35,15 +37,15 @@ public:
    };
  
  public:
-  Cos(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
+  Rank(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
  
  public:
    void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Cos; }
+  OpCode opcode() const final { return OpCode::Rank; }
  };
  
  } // namespace operation
  } // namespace ir
  } // namespace onert
  
-#endif // __ONERT_IR_OPERATION_COS_H__
+#endif // __ONERT_IR_OPERATION_RANK_H__
diff --git a/runtime/onert/core/include/ir/operation/ReLU.h b/runtime/onert/core/include/ir/operation/ReLU.h

deleted file mode 100644 (file)

index 9eb0c09..0000000
--- a/runtime/onert/core/include/ir/operation/ReLU.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_RELU_H__
-#define __ONERT_IR_OPERATION_RELU_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class ReLU : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  ReLU(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::ReLU; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_RELU_H__
diff --git a/runtime/onert/core/include/ir/operation/ReLU1.h b/runtime/onert/core/include/ir/operation/ReLU1.h

deleted file mode 100644 (file)

index 134ee57..0000000
--- a/runtime/onert/core/include/ir/operation/ReLU1.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_ReLU1_H__
-#define __ONERT_IR_OPERATION_ReLU1_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class ReLU1 : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  ReLU1(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::ReLU1; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_ReLU1_H__
diff --git a/runtime/onert/core/include/ir/operation/ReLU6.h b/runtime/onert/core/include/ir/operation/ReLU6.h

deleted file mode 100644 (file)

index e658c49..0000000
--- a/runtime/onert/core/include/ir/operation/ReLU6.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_ReLU6_H__
-#define __ONERT_IR_OPERATION_ReLU6_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class ReLU6 : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  ReLU6(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::ReLU6; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_ReLU6_H__
diff --git a/runtime/onert/core/include/ir/operation/L2Pool2D.h b/runtime/onert/core/include/ir/operation/ResizeNearestNeighbor.h

similarity index 63%

rename from runtime/onert/core/include/ir/operation/L2Pool2D.h

rename to runtime/onert/core/include/ir/operation/ResizeNearestNeighbor.h

index d369fd5..e4d810e 100644 (file)
--- a/runtime/onert/core/include/ir/operation/L2Pool2D.h
+++ b/runtime/onert/core/include/ir/operation/ResizeNearestNeighbor.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -14,14 +14,12 @@
   * limitations under the License.
   */
  
-#ifndef __ONERT_IR_OPERATION_L2_POOL_2D_H__
-#define __ONERT_IR_OPERATION_L2_POOL_2D_H__
+#ifndef __ONERT_IR_OPERATION_RESIZE_NEAREST_NEIGHBOR_H__
+#define __ONERT_IR_OPERATION_RESIZE_NEAREST_NEIGHBOR_H__
  
  #include <memory>
  
  #include "ir/Operation.h"
-#include "ir/InternalType.h"
-#include "ir/Padding.h"
  
  namespace onert
  {
@@ -30,7 +28,7 @@ namespace ir
  namespace operation
  {
  
-class L2Pool2D : public Operation
+class ResizeNearestNeighbor : public Operation
  {
  public:
    enum Input
@@ -40,20 +38,18 @@ public:
  
    struct Param
    {
-    Padding padding;
-    Stride stride;
-    uint32_t kw;
-    uint32_t kh;
-    Activation activation;
+    int32_t height_out;
+    int32_t width_out;
+    bool align_corners;
    };
  
  public:
-  L2Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
-           const Param &param);
+  ResizeNearestNeighbor(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+                        const Param &param);
  
  public:
    void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::L2Pool2D; }
+  OpCode opcode() const final { return OpCode::ResizeNearestNeighbor; }
  
  public:
    const Param &param() const { return _param; }
@@ -66,4 +62,4 @@ private:
  } // namespace ir
  } // namespace onert
  
-#endif // __ONERT_IR_OPERATION_L2_POOL_2D_H__
+#endif // __ONERT_IR_OPERATION_RESIZE_NEAREST_NEIGHBOR_H__
diff --git a/runtime/onert/core/include/ir/operation/Round.h b/runtime/onert/core/include/ir/operation/Round.h

deleted file mode 100644 (file)

index 44af0d8..0000000
--- a/runtime/onert/core/include/ir/operation/Round.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_ROUND_H__
-#define __ONERT_IR_OPERATION_ROUND_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Round : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  Round(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Round; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_ROUND_H__
diff --git a/runtime/onert/core/include/ir/operation/SQRT.h b/runtime/onert/core/include/ir/operation/SQRT.h

deleted file mode 100644 (file)

index 8563b1a..0000000
--- a/runtime/onert/core/include/ir/operation/SQRT.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_SQRT_H__
-#define __ONERT_IR_OPERATION_SQRT_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class SQRT : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  SQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::SQRT; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_SQRT_H__
diff --git a/runtime/onert/core/include/ir/operation/Select.h b/runtime/onert/core/include/ir/operation/Select.h

index 400ac9d..33bf678 100644 (file)
--- a/runtime/onert/core/include/ir/operation/Select.h
+++ b/runtime/onert/core/include/ir/operation/Select.h
@@ -41,7 +41,7 @@ public:
  
  public:
    void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Abs; }
+  OpCode opcode() const final { return OpCode::Select; }
  };
  
  } // namespace operation
diff --git a/runtime/onert/core/include/ir/operation/Sin.h b/runtime/onert/core/include/ir/operation/Sin.h

deleted file mode 100644 (file)

index aef44ab..0000000
--- a/runtime/onert/core/include/ir/operation/Sin.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_SIN_H__
-#define __ONERT_IR_OPERATION_SIN_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Sin : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  Sin(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Sin; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_SIN_H__
diff --git a/runtime/onert/core/include/ir/operation/Sub.h b/runtime/onert/core/include/ir/operation/Sub.h

deleted file mode 100644 (file)

index 0674e6e..0000000
--- a/runtime/onert/core/include/ir/operation/Sub.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_SUB_H__
-#define __ONERT_IR_OPERATION_SUB_H__
-
-#include "ir/Operation.h"
-#include "ir/InternalType.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Sub : public Operation
-{
-public:
-  enum Input
-  {
-    LHS = 0,
-    RHS
-  };
-
-  struct Param
-  {
-    Activation activation;
-  };
-
-public:
-  Sub(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Sub; }
-
-public:
-  const Param &param() const { return _param; }
-
-private:
-  Param _param;
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_SUB_H__
diff --git a/runtime/onert/core/include/ir/operation/Tanh.h b/runtime/onert/core/include/ir/operation/Tanh.h

deleted file mode 100644 (file)

index 9b8d03b..0000000
--- a/runtime/onert/core/include/ir/operation/Tanh.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_TANH_H__
-#define __ONERT_IR_OPERATION_TANH_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class Tanh : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  Tanh(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::Tanh; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_TANH_H__
diff --git a/runtime/onert/core/include/ir/operation/ZerosLike.h b/runtime/onert/core/include/ir/operation/ZerosLike.h

deleted file mode 100644 (file)

index 7c28518..0000000
--- a/runtime/onert/core/include/ir/operation/ZerosLike.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_OPERATION_ZEROS_LIKE_H__
-#define __ONERT_IR_OPERATION_ZEROS_LIKE_H__
-
-#include "ir/Operation.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-class ZerosLike : public Operation
-{
-public:
-  enum Input
-  {
-    INPUT = 0
-  };
-
-public:
-  ZerosLike(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs);
-
-public:
-  void accept(OperationVisitor &v) const override;
-  OpCode opcode() const final { return OpCode::ZerosLike; }
-};
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_OPERATION_ZEROS_LIKE_H__
diff --git a/runtime/onert/core/include/util/Config.lst b/runtime/onert/core/include/util/Config.lst

index 1718e03..5077fad 100644 (file)
--- a/runtime/onert/core/include/util/Config.lst
+++ b/runtime/onert/core/include/util/Config.lst
@@ -20,7 +20,7 @@
  
  //     Name                    | Type         | Default
  CONFIG(GRAPH_DOT_DUMP          , int          , "0")
-CONFIG(BACKENDS                , std::string  , "cpu;acl_cl;acl_neon")
+CONFIG(BACKENDS                , std::string  , "cpu;acl_cl;acl_neon;bcq") // FIXME Remove bcq
  CONFIG(OP_BACKEND_ALLOPS       , std::string  , "")
  CONFIG(OP_BACKEND_MAP          , std::string  , "")
  CONFIG(DISABLE_COMPILE         , bool         , "0")
diff --git a/runtime/onert/backend/cpu/ops/ExpLayer.h b/runtime/onert/core/include/util/Exceptions.h

similarity index 54%

rename from runtime/onert/backend/cpu/ops/ExpLayer.h

rename to runtime/onert/core/include/util/Exceptions.h

index cd27b0e..fc3fa0f 100644 (file)
--- a/runtime/onert/backend/cpu/ops/ExpLayer.h
+++ b/runtime/onert/core/include/util/Exceptions.h
@@ -14,44 +14,35 @@
   * limitations under the License.
   */
  
-#ifndef __ONERT_BACKEND_CPU_OPS_EXPLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_EXPLAYER_H__
+#ifndef __ONERT_UTIL_ONERTEXCEPTION_H__
+#define __ONERT_UTIL_ONERTEXCEPTION_H__
  
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
+#include <string>
  
  namespace onert
  {
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
  
-class ExpLayer : public ::onert::exec::IFunction
+class OnertException : public std::exception
  {
  public:
-  ExpLayer();
-
-public:
-  void expFloat32();
+  OnertException(const std::string &msg) : _msg{msg} {}
+  OnertException(const std::string &tag, const std::string &msg) : _msg{tag + " : " + msg} {}
  
-  void expQuant8();
-
-  void configure(const IPortableTensor *input, IPortableTensor *output);
-
-  void run() override;
+  const char *what() const noexcept override { return _msg.c_str(); }
  
  private:
-  const IPortableTensor *_input;
-  IPortableTensor *_output;
+  std::string _msg;
+};
+
+class InsufficientBufferSizeException : public OnertException
+{
+public:
+  InsufficientBufferSizeException(const std::string &msg)
+      : OnertException{"InsufficientBufferSize", msg}
+  {
+  }
  };
  
-} // namespace ops
-} // namespace cpu
-} // namespace backend
  } // namespace onert
  
-#endif // __ONERT_BACKEND_CPU_OPS_EXPLAYER_H__
+#endif // __ONERT_UTIL_ONERTEXCEPTION_H__
diff --git a/runtime/onert/core/include/util/ShapeInference.h b/runtime/onert/core/include/util/ShapeInference.h

index a68c22b..1ebed48 100644 (file)
--- a/runtime/onert/core/include/util/ShapeInference.h
+++ b/runtime/onert/core/include/util/ShapeInference.h
@@ -19,15 +19,13 @@
  
  #include "Utils.h"
  
-#include "ir/operation/AvgPool2D.h"
  #include "ir/operation/Concat.h"
-#include "ir/operation/MaxPool2D.h"
  #include "ir/operation/Conv2D.h"
  #include "ir/operation/DepthwiseConv2D.h"
+#include "ir/operation/Pool2D.h"
  #include "ir/operation/Reshape.h"
-#include "ir/operation/RSQRT.h"
  #include "ir/operation/StridedSlice.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
  #include "ir/Index.h"
  #include "ir/Layout.h"
  #include "ir/OperationVisitor.h"
@@ -46,8 +44,6 @@ using Shapes = std::vector<ir::Shape>;
  
  ir::Shape inferArgMaxShape(const ir::Shape &input_shape, int axis, int rank);
  
-ir::Shape inferAvgPoolShape(const ir::Shape &in_shape, const ir::operation::AvgPool2D::Param &param,
-                            ir::Layout layout = ir::Layout::NHWC);
  ir::Shape inferBatchMatMulShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_shape,
                                  const ir::operation::BatchMatMul::Param &param);
  
@@ -74,15 +70,15 @@ ir::Shape inferFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &k
  ir::Shape inferGatherShape(const ir::Shape &input_shape, const ir::Shape &indices_shape, int axis,
                             int rank);
  
-ir::Shape inferMaxPoolShape(const ir::Shape &in_shape, const ir::operation::MaxPool2D::Param &param,
-                            ir::Layout layout = ir::Layout::NHWC);
-
  ir::Shape inferOnehotShape(const ir::Shape &input_shape, const int depth, int axis);
  
  ir::Shape inferPackShape(const ir::Shape &input_shape, int axis, int rank, int num);
  
  ir::Shape inferPadShape(const ir::Shape &in_shape, const int32_t *pad_buf, const size_t num_pads);
  
+ir::Shape inferPoolShape(const ir::Shape &in_shape, const ir::operation::Pool2D::Param &param,
+                         ir::Layout layout = ir::Layout::NHWC);
+
  template <typename T> ir::Shape inferRangeShape(T start_val, T limit_val, T delta_val);
  
  ir::Shape inferReshapeShape(const int32_t *shape_buf, const int32_t shape_num_elements,
diff --git a/runtime/onert/core/src/backend/controlflow/Backend.h b/runtime/onert/core/src/backend/controlflow/Backend.h

index 3c73259..670f775 100644 (file)
--- a/runtime/onert/core/src/backend/controlflow/Backend.h
+++ b/runtime/onert/core/src/backend/controlflow/Backend.h
@@ -21,6 +21,7 @@
  #include "ConstantInitializer.h"
  #include "KernelGenerator.h"
  #include "TensorBuilder.h"
+#include "Tensor.h"
  
  #include <backend/Backend.h>
  
@@ -63,10 +64,12 @@ public:
      //   there is no such case until now, let's support it later
      // TODO Remove TensorBuilder and ConstantInitializer
      // TODO Support Consecutive controflow operation's intermediate tensor
-    auto tb = std::make_shared<TensorBuilder>();
+    auto tr = std::make_shared<TensorRegistry>();
+    auto tb = std::make_shared<TensorBuilder>(tr);
+    context->tensor_registry = tr;
      context->tensor_builder = tb;
-    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
-    context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb);
+    context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+    context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb->dynamicTensorManager(), tr);
      context->tensor_register = nullptr;
      context->optimizer = nullptr;
      return context;
diff --git a/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h b/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h

index 35cc783..e21a8f3 100644 (file)
--- a/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h
+++ b/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h
@@ -17,7 +17,7 @@
  #ifndef __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__
  #define __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__
  
-#include "TensorBuilder.h"
+#include "TensorRegistry.h"
  
  #include <backend/IConstantInitializer.h>
  #include <ir/Operands.h>
@@ -33,16 +33,16 @@ class ConstantInitializer : public IConstantInitializer
  {
  public:
    ConstantInitializer(const ir::Operands &operands,
-                      const std::shared_ptr<TensorBuilder> &tensor_builder)
-      : IConstantInitializer{operands}, _tensor_builder{tensor_builder}
+                      const std::shared_ptr<ITensorRegistry> &tensor_reg)
+      : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
    {
    }
  
  private:
-  std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
+  std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; }
  
  private:
-  std::shared_ptr<TensorBuilder> _tensor_builder;
+  std::shared_ptr<ITensorRegistry> _tensor_reg;
  };
  
  } // namespace controlflow
diff --git a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc

index e538f3f..1288e4c 100644 (file)
--- a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc
+++ b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc
@@ -17,6 +17,8 @@
  #include "DynamicTensorManager.h"
  
  #include "util/logging.h"
+#include "util/Exceptions.h"
+#include "ir/DataType.h"
  
  namespace onert
  {
@@ -25,10 +27,8 @@ namespace backend
  namespace controlflow
  {
  
-DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> &reg,
-                                           const std::shared_ptr<UserTensorRegistry> &user_reg)
-    : _dynamic_mem_mgr{new cpu_common::DynamicMemoryManager()}, _tensors{reg},
-      _user_tensors{user_reg}
+DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<TensorRegistry> &tensors)
+    : _dynamic_mem_mgr{new cpu_common::DynamicMemoryManager()}, _tensors{tensors}
  {
    // DO NOTHING
  }
@@ -36,20 +36,20 @@ DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<cpu_common::Ten
  void DynamicTensorManager::applyShape(const ir::OperandIndex &ind, const ir::Shape &new_shape)
  {
    // NOTE Handle user tensors first
-  auto user_tensor = _user_tensors->getNativeTensor(ind);
+  auto user_tensor = _tensors->getNativeUserTensor(ind);
    if (user_tensor)
    {
      // User tensors cannot be reallocated.
      auto buffer_size = user_tensor->total_size();
      auto new_size = new_shape.num_elements() * sizeOfDataType(user_tensor->data_type());
      if (buffer_size < new_size)
-      throw std::runtime_error{"ExecutorBase: output buffer size is less than output tensor size"};
+      throw InsufficientBufferSizeException{"Output buffer size is less than output tensor size"};
      user_tensor->setShape(new_shape);
      return;
    }
  
-  // NOTE Then handle native tensors
-  auto tensor = _tensors->getNativeTensor(ind);
+  // NOTE Then handle own tensors
+  auto tensor = _tensors->getNativeOwnTensor(ind);
    assert(tensor);
  
    bool previously_dynamic = tensor->is_dynamic();
@@ -102,24 +102,13 @@ void DynamicTensorManager::buildTensor(const ir::OperandIndex &ind,
                                         const ir::OperandInfo &tensor_info,
                                         ir::Layout backend_layout)
  {
-  assert(_tensors->getNativeTensor(ind) == nullptr);
    auto tensor = std::make_shared<cpu_common::Tensor>(tensor_info, backend_layout, this);
-  _tensors->setNativeTensor(ind, tensor);
+  _tensors->setNativeOwnTensor(ind, tensor);
  }
  
  void DynamicTensorManager::planDealloc(ir::OperationIndex op_ind, ir::OperandIndex operand_ind)
  {
-  auto find = _dealloc_tensor_map.find(op_ind);
-  if (find != _dealloc_tensor_map.end())
-  {
-    auto &input_set = find->second;
-    input_set.emplace(operand_ind);
-  }
-  else
-  {
-    _dealloc_tensor_map.emplace(
-        std::make_pair(op_ind, std::unordered_set<ir::OperandIndex>{operand_ind}));
-  }
+  _dealloc_tensor_map[op_ind].emplace(operand_ind);
  }
  
  void DynamicTensorManager::deallocInput(ir::OperationIndex op_ind)
diff --git a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h

index 446427d..dbe388b 100644 (file)
--- a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h
+++ b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h
@@ -17,11 +17,11 @@
  #ifndef __ONERT_BACKEND_CONTROLFLOW_DYNAMICTENSOR_MANAGER_H__
  #define __ONERT_BACKEND_CONTROLFLOW_DYNAMICTENSOR_MANAGER_H__
  
-#include "UserTensorRegistry.h"
+#include "TensorRegistry.h"
+#include "Tensor.h"
  
  #include <backend/IDynamicTensorManager.h>
  #include <backend/cpu_common/MemoryManager.h>
-#include <backend/cpu_common/TensorRegistry.h>
  #include <ir/OperandInfo.h>
  #include <ir/Operation.h>
  #include <ir/Index.h>
@@ -33,16 +33,13 @@ namespace backend
  namespace controlflow
  {
  
-// TODO Find optimized algorithm to manage memory.
-
  /**
   * @brief Class to manage dynamic tensor and its memory
   */
  class DynamicTensorManager : public backend::IDynamicTensorManager
  {
  public:
-  DynamicTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> &reg,
-                       const std::shared_ptr<UserTensorRegistry> &user_reg);
+  DynamicTensorManager(const std::shared_ptr<TensorRegistry> &tensors);
  
    virtual ~DynamicTensorManager() = default;
  
@@ -61,9 +58,7 @@ private:
     * @todo  DynamicMemoryManager is not optimized. Optimized one is needed
     */
    std::shared_ptr<cpu_common::DynamicMemoryManager> _dynamic_mem_mgr;
-  // TODO Refactoring : Merge two TensorRegistries into one
-  const std::shared_ptr<cpu_common::TensorRegistry> _tensors;
-  const std::shared_ptr<UserTensorRegistry> _user_tensors;
+  const std::shared_ptr<TensorRegistry> _tensors;
  
    // contains list of dynamic tensor index, which can be deallocated after running operation
    // note: this map could contain static tensor index too. Careful use is required.
diff --git a/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc b/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc

index eb83b7d..de5a6a5 100644 (file)
--- a/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc
+++ b/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc
@@ -31,24 +31,24 @@ namespace backend
  namespace controlflow
  {
  
-KernelGenerator::KernelGenerator(const ir::Graph &graph,
-                                 const std::shared_ptr<TensorBuilder> &tensor_builder)
-    : _graph{graph}, _tensor_builder{tensor_builder}, _tensor_builder_set{}, _executor_map{nullptr}
+KernelGenerator::KernelGenerator(const ir::Graph &graph, IDynamicTensorManager *dyn_tensor_manager,
+                                 const std::shared_ptr<TensorRegistry> &tensor_reg)
+    : _graph{graph}, _dyn_tensor_manager{dyn_tensor_manager}, _tensor_reg{tensor_reg},
+      _tensor_registries{}, _executor_map{nullptr}
  {
    UNUSED_RELEASE(_graph);
-  UNUSED_RELEASE(_tensor_builder_set);
+  UNUSED_RELEASE(_tensor_registries);
    UNUSED_RELEASE(_executor_map);
  }
  
  void KernelGenerator::visit(const ir::OpSequence &op_seq)
  {
    assert(!_return_fn_seq);
-  assert(_tensor_builder->dynamicTensorManager());
-  assert(_tensor_builder->tensorRegistry());
+  assert(_dyn_tensor_manager);
+  assert(_tensor_reg);
  
-  auto dyn_tensor_manager = _tensor_builder->dynamicTensorManager();
-  auto dyn_shape_inferer = std::make_unique<exec::DynamicShapeInferer>(
-      _graph.operands(), dyn_tensor_manager, _tensor_builder->tensorRegistry());
+  auto dyn_shape_inferer =
+      std::make_unique<exec::DynamicShapeInferer>(_graph.operands(), _tensor_reg);
  
    _return_fn_seq = std::make_unique<exec::FunctionSequence>();
  
@@ -58,8 +58,8 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
      dyn_ctx->op_seq = &op_seq;
      dyn_ctx->operations = &_graph.operations();
      dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
-    dyn_ctx->tensor_registry = _tensor_builder->tensorRegistry();
-    dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
+    dyn_ctx->tensor_registry = _tensor_reg;
+    dyn_ctx->dynamic_tensor_manager = _dyn_tensor_manager;
  
      _return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
    }
@@ -93,12 +93,7 @@ void KernelGenerator::visit(const ir::operation::If &node)
      auto output_tensor = getTensor(output_index);
  
      output_tensors.emplace_back(output_tensor);
-    const auto output_tensor_builder = getTensorBuilder(output_index);
-    if (output_tensor_builder->supportDynamicTensor())
-    {
-      auto output_dyn_manager = output_tensor_builder->dynamicTensorManager();
-      outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index, output_dyn_manager};
-    }
+    outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index};
    }
  
    // IfLayer just set ExecutorMap instead of then and else executor to avoid complexity of
@@ -121,14 +116,7 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
    std::vector<std::shared_ptr<ITensor>> output_tensors{getTensor(output_index)};
    std::vector<std::shared_ptr<ITensor>> input_tensors{getTensor(input_index)};
    std::unordered_map<std::shared_ptr<ITensor>, exec::DynAllocInfo> outputs_dyn_alloc_info;
-  const auto output_tensor_builder = getTensorBuilder(output_index);
-  VERBOSE(PERMUTE_FIND_TB) << output_index << " -> " << output_tensor_builder.get() << std::endl;
-  assert(output_tensor_builder != nullptr);
-  if (output_tensor_builder->supportDynamicTensor())
-  {
-    outputs_dyn_alloc_info[output_tensors.at(0)] =
-        exec::DynAllocInfo{output_index, output_tensor_builder->dynamicTensorManager()};
-  }
+  outputs_dyn_alloc_info[output_tensors.at(0)] = exec::DynAllocInfo{output_index};
  
    auto fn =
        std::make_unique<kernel::PermuteLayer>(input_tensors, output_tensors, outputs_dyn_alloc_info);
@@ -159,12 +147,7 @@ void KernelGenerator::visit(const ir::operation::While &node)
  
      output_tensors.emplace_back(output_tensor);
  
-    const auto output_tensor_builder = getTensorBuilder(output_index);
-    if (output_tensor_builder->supportDynamicTensor())
-    {
-      auto output_dyn_manager = output_tensor_builder->dynamicTensorManager();
-      outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index, output_dyn_manager};
-    }
+    outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index};
    }
  
    // WhileLayer just set ExecutorMap instead of cond and body executor to avoid complexity of
@@ -178,34 +161,7 @@ void KernelGenerator::visit(const ir::operation::While &node)
  
  std::shared_ptr<backend::ITensor> KernelGenerator::getTensor(const ir::OperandIndex &index)
  {
-  std::shared_ptr<backend::ITensor> ret;
-  for (auto tensor_builder : _tensor_builder_set)
-  {
-    auto tensor = tensor_builder->tensorAt(index);
-    if (tensor)
-    {
-      ret = tensor;
-      break;
-    }
-  }
-  assert(ret != nullptr);
-  return ret;
-}
-
-std::shared_ptr<backend::ITensorBuilder>
-KernelGenerator::getTensorBuilder(const ir::OperandIndex &index)
-{
-  std::shared_ptr<backend::ITensorBuilder> ret;
-  for (auto tensor_builder : _tensor_builder_set)
-  {
-    auto reg = tensor_builder->tensorRegistry();
-    auto tensor = reg ? reg->getNativeITensor(index) : tensor_builder->tensorAt(index);
-    if (tensor)
-    {
-      ret = tensor_builder;
-      break;
-    }
-  }
+  std::shared_ptr<backend::ITensor> ret = _tensor_registries.getITensor(index);
    assert(ret != nullptr);
    return ret;
  }
diff --git a/runtime/onert/core/src/backend/controlflow/KernelGenerator.h b/runtime/onert/core/src/backend/controlflow/KernelGenerator.h

index 1fc7793..b84a810 100644 (file)
--- a/runtime/onert/core/src/backend/controlflow/KernelGenerator.h
+++ b/runtime/onert/core/src/backend/controlflow/KernelGenerator.h
@@ -22,9 +22,8 @@
  #include <exec/IExecutor.h>
  #include <ir/Graph.h>
  #include "TensorBuilder.h"
-#include "compiler/TensorBuilders.h"
-
-#include "compiler/TensorBuilders.h"
+#include "compiler/TensorRegistries.h"
+#include "TensorRegistry.h"
  
  namespace onert
  {
@@ -36,11 +35,12 @@ namespace controlflow
  class KernelGenerator : public IKernelGenerator
  {
  public:
-  KernelGenerator(const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder);
+  KernelGenerator(const ir::Graph &graph, IDynamicTensorManager *dyn_tensor_manager,
+                  const std::shared_ptr<TensorRegistry> &tensor_reg);
  
-  void setTensorBuilderSet(const compiler::TensorBuilders &tensor_builder_set)
+  void setTensorRegistries(const compiler::TensorRegistries &tensor_registries)
    {
-    _tensor_builder_set = tensor_builder_set;
+    _tensor_registries = tensor_registries;
    }
    void setExecutorMap(const std::shared_ptr<exec::ExecutorMap> &executor_map)
    {
@@ -57,12 +57,12 @@ public:
  
  private:
    std::shared_ptr<backend::ITensor> getTensor(const ir::OperandIndex &index);
-  std::shared_ptr<backend::ITensorBuilder> getTensorBuilder(const ir::OperandIndex &index);
  
  private:
    const ir::Graph &_graph;
-  std::shared_ptr<TensorBuilder> _tensor_builder;
-  compiler::TensorBuilders _tensor_builder_set;
+  IDynamicTensorManager *_dyn_tensor_manager;
+  std::shared_ptr<TensorRegistry> _tensor_reg;
+  compiler::TensorRegistries _tensor_registries;
    exec::ExecutorMap *_executor_map;
  };
  
diff --git a/runtime/onert/core/src/ir/operation/Log.cc b/runtime/onert/core/src/backend/controlflow/Tensor.h

similarity index 64%

rename from runtime/onert/core/src/ir/operation/Log.cc

rename to runtime/onert/core/src/backend/controlflow/Tensor.h

index 85598bc..ba5bafd 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Log.cc
+++ b/runtime/onert/core/src/backend/controlflow/Tensor.h
@@ -14,26 +14,22 @@
   * limitations under the License.
   */
  
-#include "ir/operation/Log.h"
+#ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__
+#define __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__
  
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
+#include <backend/cpu_common/Tensor.h>
  
  namespace onert
  {
-namespace ir
+namespace backend
  {
-namespace operation
+namespace controlflow
  {
  
-void Log::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Log::Log(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
+using Tensor = cpu_common::Tensor;
  
-} // namespace operation
-} // namespace ir
+} // namespace controlflow
+} // namespace backend
  } // namespace onert
+
+#endif // __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__
diff --git a/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc b/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc

index 5bddb91..e5c3f5f 100644 (file)
--- a/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc
+++ b/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc
@@ -27,10 +27,10 @@ namespace backend
  namespace controlflow
  {
  
-TensorBuilder::TensorBuilder()
-    : _tensor_reg{new cpu_common::TensorRegistry()}, _user_tensor_reg{new UserTensorRegistry()},
-      _static_tensor_mgr{new cpu_common::StaticTensorManager(_tensor_reg)},
-      _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg, _user_tensor_reg)}
+TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg)
+    : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)},
+      _static_tensor_mgr{
+          new cpu_common::StaticTensorManager(_tensor_reg->base_reg(), _dynamic_tensor_mgr.get())}
  {
    /* empty */
  }
@@ -54,10 +54,13 @@ void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::Op
  
  void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
  {
-  assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
+  // TODO Enhance the way of checking user tensors
+  if (_tensor_info_map.find(ind) == _tensor_info_map.end()) // Do not proceed for user tensors
+    return;
+
    const auto tensor_info = _tensor_info_map.at(ind);
  
-  if (!at(ind)->is_dynamic())
+  if (!nativeOwnTensorAt(ind)->is_dynamic())
    {
      const auto size = tensor_info.total_size();
      _static_tensor_mgr->claimPlan(ind, size);
@@ -66,7 +69,11 @@ void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
  
  void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
  {
-  if (!at(ind)->is_dynamic())
+  // TODO Enhance the way of checking user tensors
+  if (_tensor_info_map.find(ind) == _tensor_info_map.end()) // Do not proceed for user tensors
+    return;
+
+  if (!nativeOwnTensorAt(ind)->is_dynamic())
    {
      _static_tensor_mgr->releasePlan(ind);
    }
@@ -74,6 +81,11 @@ void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
  
  bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
  {
+  // User tensors are not registered in _tensor_info_map but objects for them are exist
+  // in the tensor registry.
+  // TODO Enhance the way of checking user tensors
+  if (_tensor_reg->getITensor(ind))
+    return true;
    return _tensor_info_map.find(ind) != _tensor_info_map.end();
  }
  
@@ -89,25 +101,9 @@ void TensorBuilder::allocate()
    //      This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
  }
  
-std::shared_ptr<ITensor> TensorBuilder::tensorAt(const ir::OperandIndex &ind)
-{
-  // NOTE Find from User Tensor Registry first
-  // FIXME There may be both user tensor and native tensor for a `ind` which is a waste
-  auto user_tensor = _user_tensor_reg->getITensor(ind);
-  auto tensor = _tensor_reg->getITensor(ind);
-  if (user_tensor)
-  {
-    return user_tensor;
-  }
-  else
-    return tensor;
-}
-
-void TensorBuilder::iterate(const IterateFunction &fn) { _static_tensor_mgr->iterate(fn); }
-
-std::shared_ptr<cpu_common::Tensor> TensorBuilder::at(const ir::OperandIndex &ind)
+std::shared_ptr<cpu_common::Tensor> TensorBuilder::nativeOwnTensorAt(const ir::OperandIndex &ind)
  {
-  return _tensor_reg->getNativeTensor(ind);
+  return _tensor_reg->getNativeOwnTensor(ind);
  }
  
  std::unique_ptr<ITensorManager> TensorBuilder::releaseStaticTensorManager(void)
@@ -120,10 +116,10 @@ std::unique_ptr<ITensorManager> TensorBuilder::releaseDynamicTensorManager(void)
    return std::move(_dynamic_tensor_mgr);
  }
  
-void TensorBuilder::setUserTensor(const ir::OperandIndex &ind,
-                                  const std::shared_ptr<UserTensor> &tensor)
+void TensorBuilder::setNativeUserTensor(const ir::OperandIndex &ind,
+                                        const std::shared_ptr<UserTensor> &tensor)
  {
-  _user_tensor_reg->setNativeTensor(ind, tensor);
+  _tensor_reg->setNativeUserTensor(ind, tensor);
  }
  
  } // namespace controlflow
diff --git a/runtime/onert/core/src/backend/controlflow/TensorBuilder.h b/runtime/onert/core/src/backend/controlflow/TensorBuilder.h

index 9f2bb37..2f2a2c4 100644 (file)
--- a/runtime/onert/core/src/backend/controlflow/TensorBuilder.h
+++ b/runtime/onert/core/src/backend/controlflow/TensorBuilder.h
@@ -39,9 +39,7 @@ namespace controlflow
  class TensorBuilder : public ITensorBuilder
  {
  public:
-  TensorBuilder();
-
-  bool supportDynamicTensor() override { return true; }
+  TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg);
  
    /**
     * @brief     Register tensor information to allocate on CPU backend
@@ -61,15 +59,6 @@ public:
    void allocate() override;
    void postFunctionPrepare() override { /* DO NOTHING */}
  
-  /**
-   * @brief Get tensor with a specific OperandIndex
-   *
-   * @return shared_ptr<ITensor> if a tensor with given OperandIndex exists. nullptr otherwise.
-   */
-  std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) override;
-
-  void iterate(const IterateFunction &fn) override;
-
    std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) override;
  
    IDynamicTensorManager *dynamicTensorManager(void) override { return _dynamic_tensor_mgr.get(); }
@@ -82,16 +71,13 @@ public:
     *        If not, program will crash with assert or exception.
     * @return shared_ptr<operand::Tensor>
     */
-  std::shared_ptr<cpu_common::Tensor> at(const ir::OperandIndex &ind);
-  void setUserTensor(const ir::OperandIndex &ind, const std::shared_ptr<UserTensor> &tensor);
-
-  std::shared_ptr<ITensorRegistry> tensorRegistry() override { return _tensor_reg; }
+  std::shared_ptr<cpu_common::Tensor> nativeOwnTensorAt(const ir::OperandIndex &ind);
+  void setNativeUserTensor(const ir::OperandIndex &ind, const std::shared_ptr<UserTensor> &tensor);
  
  private:
-  const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
-  const std::shared_ptr<UserTensorRegistry> _user_tensor_reg;
-  std::unique_ptr<cpu_common::StaticTensorManager> _static_tensor_mgr;
+  const std::shared_ptr<TensorRegistry> _tensor_reg;
    std::unique_ptr<DynamicTensorManager> _dynamic_tensor_mgr;
+  std::unique_ptr<cpu_common::StaticTensorManager> _static_tensor_mgr;
    ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
    ir::OperandIndexMap<ir::Layout> _tensor_layout_map;
  };
diff --git a/runtime/onert/core/src/backend/controlflow/TensorRegistry.h b/runtime/onert/core/src/backend/controlflow/TensorRegistry.h

new file mode 100644 (file)

index 0000000..678c5b7
--- /dev/null
+++ b/runtime/onert/core/src/backend/controlflow/TensorRegistry.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__
+#define __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__
+
+#include "backend/cpu_common/TensorRegistry.h"
+#include "backend/ITensorRegistry.h"
+#include "Tensor.h"
+#include "UserTensor.h"
+#include <assert.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace controlflow
+{
+
+/**
+ * @brief Tensor registry class for controlflow backend
+ *
+ * This class contains three types of tensors. Two native tensors(tensors that are managed by this
+ * backend) and the other is migrant tensor.
+ *
+ * - NativeUserTensor - @c UserTensor managed by this backend, buffer is user-given
+ * - NativeOwnTensor  - @c cpu_common::Tensor managed by this backend ( in @c _base_reg )
+ * - MigrantTensor    - @c IPortableTensor managed by other backends ( in @c _base_reg )
+ *
+ * @note @c _base_reg is used in implementation to reuse @c cpu_common::StaticTensorManager
+ *
+ */
+class TensorRegistry : public ITensorRegistry
+{
+public:
+  TensorRegistry() : _base_reg{new cpu_common::TensorRegistry} {}
+
+  std::shared_ptr<ITensor> getITensor(const ir::OperandIndex &ind) override
+  {
+    auto base_tensor = _base_reg->getITensor(ind);
+    if (base_tensor)
+      return base_tensor;
+    return getNativeUserTensor(ind);
+  }
+
+  std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &ind) override
+  {
+    auto base_tensor = _base_reg->getNativeITensor(ind);
+    if (base_tensor)
+      return base_tensor;
+    return getNativeUserTensor(ind);
+  }
+
+  std::shared_ptr<IPortableTensor> getPortableTensor(const ir::OperandIndex &ind)
+  {
+    auto base_tensor = _base_reg->getPortableTensor(ind);
+    if (base_tensor)
+      return base_tensor;
+    return getNativeUserTensor(ind);
+  }
+
+  std::shared_ptr<IPortableTensor> getNativeTensor(const ir::OperandIndex &ind)
+  {
+    auto base_tensor = _base_reg->getNativeTensor(ind);
+    if (base_tensor)
+      return base_tensor;
+    return getNativeUserTensor(ind);
+  }
+
+  std::shared_ptr<Tensor> getNativeOwnTensor(const ir::OperandIndex &ind)
+  {
+    return _base_reg->getNativeTensor(ind);
+  }
+
+  std::shared_ptr<UserTensor> getNativeUserTensor(const ir::OperandIndex &ind)
+  {
+    auto tensor = _native_user_tensors.find(ind);
+    if (tensor != _native_user_tensors.end())
+      return tensor->second;
+    return nullptr;
+  }
+
+  bool setMigrantTensor(const ir::OperandIndex &ind,
+                        const std::shared_ptr<IPortableTensor> &tensor) override
+  {
+    assert(tensor);
+    assert(!getITensor(ind)); // For the ind, tensor is not registered yet
+    _base_reg->setMigrantTensor(ind, tensor);
+    return true;
+  }
+
+  void setNativeOwnTensor(ir::OperandIndex ind, const std::shared_ptr<Tensor> &tensor)
+  {
+    assert(tensor);
+    assert(!getITensor(ind)); // For the ind, tensor is not registered yet
+    _base_reg->setNativeTensor(ind, tensor);
+  }
+
+  void setNativeUserTensor(ir::OperandIndex ind, const std::shared_ptr<UserTensor> &tensor)
+  {
+    assert(tensor);
+    assert(!getITensor(ind)); // For the ind, tensor is not registered yet
+    _native_user_tensors[ind] = tensor;
+  }
+
+  const ir::OperandIndexMap<std::shared_ptr<UserTensor>> &native_user_tensors()
+  {
+    return _native_user_tensors;
+  }
+  std::shared_ptr<cpu_common::TensorRegistry> base_reg() { return _base_reg; }
+
+private:
+  std::shared_ptr<cpu_common::TensorRegistry> _base_reg;
+  ir::OperandIndexMap<std::shared_ptr<UserTensor>> _native_user_tensors;
+};
+
+} // namespace controlflow
+} // namespace backend
+} // namespace onert
+
+#endif // ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__
diff --git a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc b/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc

index 3c095b3..e8f1ea6 100644 (file)
--- a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc
+++ b/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc
@@ -55,7 +55,11 @@ void PermuteLayer::run()
        try
        {
          const auto dst_index = _dst_dyn_alloc_info_map.at(dst_tensor).ind;
-        _dst_dyn_alloc_info_map.at(dst_tensor).dyn_tensor_manager->applyShape(dst_index, new_shape);
+        auto dyn_tensor_manager = dst_tensor->dynamic_tensor_manager();
+        if (!dyn_tensor_manager)
+          throw std::runtime_error{
+              "Error: PermuteLayer: output's TensorManager does not support dynamic tensor"};
+        dyn_tensor_manager->applyShape(dst_index, new_shape);
          assert(dst_tensor->buffer() != nullptr);
        }
        catch (const std::out_of_range &e)
diff --git a/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc b/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc

index cb27d75..f7ce3d0 100644 (file)
--- a/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc
+++ b/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc
@@ -95,17 +95,7 @@ void DynamicTensorManager::buildTensor(const ir::OperandIndex &ind,
  
  void DynamicTensorManager::planDealloc(ir::OperationIndex op_ind, ir::OperandIndex operand_ind)
  {
-  auto find = _dealloc_tensor_map.find(op_ind);
-  if (find != _dealloc_tensor_map.end())
-  {
-    auto &input_set = find->second;
-    input_set.emplace(operand_ind);
-  }
-  else
-  {
-    _dealloc_tensor_map.emplace(
-        std::make_pair(op_ind, std::unordered_set<ir::OperandIndex>{operand_ind}));
-  }
+  _dealloc_tensor_map[op_ind].emplace(operand_ind);
  }
  
  void DynamicTensorManager::deallocInput(ir::OperationIndex op_ind)
diff --git a/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc b/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc

index 820cad3..440f70c 100644 (file)
--- a/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc
+++ b/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc
@@ -26,8 +26,10 @@ namespace backend
  namespace cpu_common
  {
  
-StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg)
-    : _const_mgr{new DynamicMemoryManager()}, _nonconst_mgr{new MemoryManager()}, _tensors{reg}
+StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> &reg,
+                                         IDynamicTensorManager *dynamic_tensor_manager)
+    : _const_mgr{new DynamicMemoryManager()}, _nonconst_mgr{new MemoryManager()}, _tensors{reg},
+      _dynamic_tensor_manager{dynamic_tensor_manager}
  {
    // DO NOTHING
  }
@@ -78,7 +80,7 @@ void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
                                        bool as_const)
  {
    assert(!_tensors->getNativeTensor(ind));
-  auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout, nullptr);
+  auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout, _dynamic_tensor_manager);
    _tensors->setNativeTensor(ind, tensor);
    _as_constants[ind] = as_const;
  }
diff --git a/runtime/onert/core/src/compiler/Compiler.cc b/runtime/onert/core/src/compiler/Compiler.cc

index 33b428a..93dbbc3 100644 (file)
--- a/runtime/onert/core/src/compiler/Compiler.cc
+++ b/runtime/onert/core/src/compiler/Compiler.cc
@@ -134,6 +134,12 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
          backend::controlflow::Config::ID;
    }
  
+  // FIXME This is a workaround for bcq operations, should remove it
+  {
+    _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq";
+    _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq";
+  }
+
    {
      VERBOSE(Compiler) << std::boolalpha;
      VERBOSE(Compiler) << "==== Compiler Options ====" << std::endl;
@@ -181,14 +187,14 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
    auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_options.graph_dump_level);
  
    // Lower: Assign backend
-  std::unordered_map<ir::SubgraphIndex, std::unique_ptr<ir::LoweredGraph>> lowered_subgs;
+  std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>> lowered_subgs;
    _subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
      _options.is_primary_subgraph = (index == ir::SubgraphIndex{0});
      onert::dumper::dot::DotDumper dot_dumper(subg, dump_level);
      dot_dumper.dump(nnfw::misc::str("before_lower_subg-", index.value()));
  
      // Lower: Assign backend
-    lowered_subgs[index] = std::make_unique<ir::LoweredGraph>(subg, _options);
+    lowered_subgs[index] = std::make_unique<compiler::LoweredGraph>(subg, _options);
  
      // Check backend(s) for subgraph support FP16
      bool backends_support_fp16 = true;
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.cc b/runtime/onert/core/src/compiler/ExecutorFactory.cc

index 82afd9e..062c6c9 100644 (file)
--- a/runtime/onert/core/src/compiler/ExecutorFactory.cc
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.cc
@@ -25,6 +25,7 @@
  #include "compiler/ExecutionBuilder.h"
  #include "exec/ExecTime.h"
  #include "compiler/Linear.h"
+#include "compiler/TensorBuilders.h"
  #include "backend/IConstantInitializer.h"
  #include "backend/IKernelGenerator.h"
  #include "backend/IOptimizer.h"
@@ -64,6 +65,23 @@ private:
    std::shared_ptr<backend::IConfig> _config;
  };
  
+// TODO Think of a better way to manage TensorManagers
+backend::TensorManagerSet createTensorManagerSet(const compiler::TensorBuilders &tensor_builders)
+{
+  backend::TensorManagerSet tensor_mgrs;
+  for (auto &tensor_builder : tensor_builders)
+  {
+    auto s_tensor_manager = tensor_builder->releaseStaticTensorManager();
+    if (s_tensor_manager != nullptr)
+      tensor_mgrs.insert(std::move(s_tensor_manager));
+
+    auto d_tensor_manager = tensor_builder->releaseDynamicTensorManager();
+    if (d_tensor_manager != nullptr)
+      tensor_mgrs.insert(std::move(d_tensor_manager));
+  }
+  return tensor_mgrs;
+}
+
  } // namespace
  } // namespace onert
  
@@ -87,14 +105,14 @@ ExecutorFactory::ExecutorFactory()
                                 std::placeholders::_3, true);
  }
  
-exec::IExecutor *ExecutorFactory::create(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+exec::IExecutor *ExecutorFactory::create(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
                                           const compiler::CompilerOptions &options,
                                           const std::shared_ptr<exec::ExecutorMap> &executor_map)
  {
    return _map.at(options.executor)(std::move(lowered_graph), options, executor_map);
  }
  
-void ExecutorFactory::initializeBackendContext(ir::LoweredGraph *lowered_graph)
+void ExecutorFactory::initializeBackendContext(compiler::LoweredGraph *lowered_graph)
  {
    struct Entry
    {
@@ -132,7 +150,7 @@ void ExecutorFactory::initializeBackendContext(ir::LoweredGraph *lowered_graph)
    }
  }
  
-void ExecutorFactory::runTensorRegistration(ir::LoweredGraph *lowered_graph,
+void ExecutorFactory::runTensorRegistration(compiler::LoweredGraph *lowered_graph,
                                              const std::vector<ir::OpSequenceIndex> &order)
  {
    for (const auto index : order)
@@ -141,6 +159,8 @@ void ExecutorFactory::runTensorRegistration(ir::LoweredGraph *lowered_graph,
      const auto backend = lowered_graph->getLowerInfo(index)->backend();
      const auto tensor_register = lowered_graph->backend_contexts().at(backend)->tensor_register;
      auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder;
+    auto model_io = lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs();
+
      if (tensor_register)
      {
        // Custom registration
@@ -154,7 +174,7 @@ void ExecutorFactory::runTensorRegistration(ir::LoweredGraph *lowered_graph,
          const auto &op = lowered_graph->graph().operations().at(op_idx);
          for (const auto &index : (op.getInputs() | ir::Remove::UNDEFINED) + op.getOutputs())
          {
-          if (!tensor_builder->isRegistered(index))
+          if (!tensor_builder->isRegistered(index) && !model_io.contains(index))
            {
              const auto &operand_lower_info =
                  lowered_graph->getLowerInfo(index)->def_factors().getOnlyElement();
@@ -181,15 +201,28 @@ void ExecutorFactory::runTensorRegistration(ir::LoweredGraph *lowered_graph,
  }
  
  std::vector<std::shared_ptr<backend::ITensor>>
-ExecutorFactory::initializeModelIOTensors(ir::LoweredGraph &lowered_graph,
+ExecutorFactory::initializeModelIOTensors(compiler::LoweredGraph &lowered_graph,
                                            const ir::OperandIndexSequence &indices)
  {
    std::vector<std::shared_ptr<backend::ITensor>> ret;
  
-  TensorBuilders tensor_builders{lowered_graph.backend_contexts(), false};
-  std::shared_ptr<backend::controlflow::TensorBuilder> cf_tensor_builder =
-      tensor_builders.getControlflowTensorBuilder();
+  // TODO Store controlflow backend in BackendContext
+  std::shared_ptr<backend::controlflow::TensorBuilder> cf_tensor_builder;
+  std::shared_ptr<backend::controlflow::TensorRegistry> cf_tensor_reg;
+  for (const auto &e : lowered_graph.backend_contexts())
+  {
+    auto backend = e.first;
+    auto &context = e.second;
+    if (backend->config()->id() == backend::controlflow::Config::ID)
+    {
+      cf_tensor_builder =
+          std::dynamic_pointer_cast<backend::controlflow::TensorBuilder>(context->tensor_builder);
+      cf_tensor_reg =
+          std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(context->tensor_registry);
+    }
+  }
    assert(cf_tensor_builder);
+  assert(cf_tensor_reg);
  
    for (auto ind : indices)
    {
@@ -200,15 +233,16 @@ ExecutorFactory::initializeModelIOTensors(ir::LoweredGraph &lowered_graph,
          cf_tensor_builder->dynamicTensorManager());
  
      // Add tensor to controlflow TensorRegistry.
-    cf_tensor_builder->setUserTensor(ind, tensor);
+    cf_tensor_reg->setNativeUserTensor(ind, tensor);
      ret.push_back(tensor);
    }
    return ret;
  }
  
-void ExecutorFactory::prepareExternalTensors(ir::LoweredGraph &lowered_graph,
-                                             TensorBuilders &tensor_builders)
+void ExecutorFactory::prepareExternalTensors(compiler::LoweredGraph &lowered_graph)
  {
+  TensorRegistries tensor_regs{lowered_graph.backend_contexts(), true};
+
    lowered_graph.op_seqs().iterate(
        [&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) {
          auto lower_info = lowered_graph.getLowerInfo(op_seq_index);
@@ -219,20 +253,20 @@ void ExecutorFactory::prepareExternalTensors(ir::LoweredGraph &lowered_graph,
            // If an OpSequence input/output tensor does not have a own tensor object,
            // it must be using external tensors, so find the tensor from other tensor builders and
            // set the tensor to this tensor builder if portable
-          if (!backend_ctx->tensor_builder->tensorAt(ind))
+          if (!backend_ctx->tensor_registry->getITensor(ind))
            {
-            auto tensor = tensor_builders.getITensor(ind);
-            assert(tensor); // The tensor must have been created in one of TensorBuilders
+            auto tensor = tensor_regs.getITensor(ind);
+            assert(tensor); // The tensor must have been registered
              auto ptensor = std::dynamic_pointer_cast<backend::IPortableTensor>(tensor);
              if (ptensor)
-              backend_ctx->tensor_builder->setMigrantTensor(ind, ptensor);
+              backend_ctx->tensor_registry->setMigrantTensor(ind, ptensor);
            }
          }
        });
  }
  
  exec::IExecutor *
-ExecutorFactory::createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
                                        const compiler::CompilerOptions &options,
                                        const std::shared_ptr<exec::ExecutorMap> &executor_map)
  {
@@ -277,13 +311,14 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_
    Linear::planTensors(*lowered_graph, order);
  
    TensorBuilders tensor_builders{lowered_graph->backend_contexts(), true};
+  TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
  
    for (auto &tensor_builder : tensor_builders)
    {
      tensor_builder->prepare();
    }
  
-  prepareExternalTensors(*lowered_graph, tensor_builders);
+  prepareExternalTensors(*lowered_graph);
  
    ExecutionBuilder builder;
  
@@ -296,7 +331,7 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_
      auto cf_kernel_gen = dynamic_cast<backend::controlflow::KernelGenerator *>(kernel_gen.get());
      if (cf_kernel_gen != nullptr)
      {
-      cf_kernel_gen->setTensorBuilderSet(tensor_builders);
+      cf_kernel_gen->setTensorRegistries(tensor_regs);
        cf_kernel_gen->setExecutorMap(executor_map);
      }
      auto fn_seq = kernel_gen->generate(op_seq);
@@ -335,9 +370,10 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_
      });
    }
  
-  auto exec =
-      new exec::LinearExecutor{std::move(lowered_graph), input_tensors,       output_tensors,
-                               tensor_builders,          std::move(code_map), order};
+  backend::TensorManagerSet tensor_mgrs = createTensorManagerSet(tensor_builders);
+  auto exec = new exec::LinearExecutor{
+      std::move(lowered_graph), input_tensors,       output_tensors, tensor_regs,
+      std::move(tensor_mgrs),   std::move(code_map), order};
  
    if (!options.trace_filepath.empty())
    {
@@ -350,7 +386,7 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_
  }
  
  exec::IExecutor *ExecutorFactory::createDataflowExecutor(
-    std::unique_ptr<ir::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options,
+    std::unique_ptr<compiler::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options,
      const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel)
  {
    const auto &backend_contexts = lowered_graph->backend_contexts();
@@ -369,6 +405,7 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
    }
  
    TensorBuilders tensor_builders{lowered_graph->backend_contexts(), true};
+  TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true};
  
    // To make tensors never be deallocated, this is a workaround to use static memory planner
    for (auto &tensor_builder : tensor_builders)
@@ -387,7 +424,7 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
      tensor_builder->prepare();
    }
  
-  prepareExternalTensors(*lowered_graph, tensor_builders);
+  prepareExternalTensors(*lowered_graph);
  
    ExecutionBuilder builder;
  
@@ -401,7 +438,7 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
      if (cf_kernel_gen != nullptr)
      {
        assert(cf_kernel_gen != nullptr);
-      cf_kernel_gen->setTensorBuilderSet(tensor_builders);
+      cf_kernel_gen->setTensorRegistries(tensor_regs);
        cf_kernel_gen->setExecutorMap(executor_map);
      }
      auto fn_seq = kernel_gen->generate(op_seq);
@@ -440,17 +477,20 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
      });
    }
  
+  backend::TensorManagerSet tensor_mgrs = createTensorManagerSet(tensor_builders);
+
    exec::ExecutorBase *exec = nullptr;
    if (parallel)
    {
-    exec = new exec::ParallelExecutor{std::move(lowered_graph), input_tensors, output_tensors,
-                                      tensor_builders, std::move(code_map)};
+    exec = new exec::ParallelExecutor{std::move(lowered_graph), input_tensors,
+                                      output_tensors,           tensor_regs,
+                                      std::move(tensor_mgrs),   std::move(code_map)};
    }
    else
    {
-    auto dataflow_exec =
-        new exec::DataflowExecutor{std::move(lowered_graph), input_tensors, output_tensors,
-                                   tensor_builders, std::move(code_map)};
+    auto dataflow_exec = new exec::DataflowExecutor{std::move(lowered_graph), input_tensors,
+                                                    output_tensors,           tensor_regs,
+                                                    std::move(tensor_mgrs),   std::move(code_map)};
      if (options.he_profiling_mode)
      {
        std::vector<const backend::Backend *> backends;
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.h b/runtime/onert/core/src/compiler/ExecutorFactory.h

index 418e5a7..b8893c0 100644 (file)
--- a/runtime/onert/core/src/compiler/ExecutorFactory.h
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.h
@@ -21,8 +21,8 @@
  
  #include "backend/ITensor.h"
  #include "exec/IExecutor.h"
-#include "ir/LoweredGraph.h"
-#include "TensorBuilders.h"
+#include "compiler/LoweredGraph.h"
+#include "TensorRegistries.h"
  
  namespace onert
  {
@@ -35,7 +35,7 @@ public:
    static ExecutorFactory &get();
  
  public:
-  exec::IExecutor *create(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+  exec::IExecutor *create(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
                            const compiler::CompilerOptions &options,
                            const std::shared_ptr<exec::ExecutorMap> &executor_map);
  
@@ -43,28 +43,27 @@ private:
    ExecutorFactory();
  
  private:
-  static void initializeBackendContext(ir::LoweredGraph *lowered_graph);
-  static void runTensorRegistration(ir::LoweredGraph *lowered_graph,
+  static void initializeBackendContext(compiler::LoweredGraph *lowered_graph);
+  static void runTensorRegistration(compiler::LoweredGraph *lowered_graph,
                                      const std::vector<ir::OpSequenceIndex> &order);
    static std::vector<std::shared_ptr<backend::ITensor>>
-  initializeModelIOTensors(ir::LoweredGraph &lowered_graph,
+  initializeModelIOTensors(compiler::LoweredGraph &lowered_graph,
                             const ir::OperandIndexSequence &indices);
-  static void prepareExternalTensors(ir::LoweredGraph &lowered_graph,
-                                     TensorBuilders &tensor_builders);
+  static void prepareExternalTensors(compiler::LoweredGraph &lowered_graph);
    static exec::IExecutor *
-  createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+  createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
                         const compiler::CompilerOptions &options,
                         const std::shared_ptr<exec::ExecutorMap> &executor_map);
    static exec::IExecutor *
-  createDataflowExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+  createDataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
                           const compiler::CompilerOptions &options,
                           const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel);
  
  private:
-  std::unordered_map<
-      std::string, std::function<exec::IExecutor *(
-                       std::unique_ptr<ir::LoweredGraph>, const compiler::CompilerOptions &options,
-                       const std::shared_ptr<exec::ExecutorMap> &executor_map)>>
+  std::unordered_map<std::string, std::function<exec::IExecutor *(
+                                      std::unique_ptr<compiler::LoweredGraph>,
+                                      const compiler::CompilerOptions &options,
+                                      const std::shared_ptr<exec::ExecutorMap> &executor_map)>>
        _map;
  };
  
diff --git a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc

index 5c4b84e..23a6a25 100644 (file)
--- a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
+++ b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
@@ -44,7 +44,7 @@ namespace onert
  namespace compiler
  {
  
-Fp32ToFp16Converter::Fp32ToFp16Converter(ir::LoweredGraph &lowered_graph)
+Fp32ToFp16Converter::Fp32ToFp16Converter(compiler::LoweredGraph &lowered_graph)
      : _lowered_graph{lowered_graph}
  {
    VERBOSE(Fp32ToFp16Converter) << "Fp16 Enable on" << std::endl;
diff --git a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h

index 5dbf744..eeecb98 100644 (file)
--- a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h
+++ b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h
@@ -17,7 +17,7 @@
  #ifndef __ONERT_COMPILER_FP32_TO_FP16_CONVERTER_H__
  #define __ONERT_COMPILER_FP32_TO_FP16_CONVERTER_H__
  
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
  
  namespace onert
  {
@@ -28,7 +28,7 @@ namespace compiler
  class Fp32ToFp16Converter
  {
  public:
-  Fp32ToFp16Converter(ir::LoweredGraph &lowered_graph);
+  Fp32ToFp16Converter(compiler::LoweredGraph &lowered_graph);
  
  public:
    void run();
@@ -89,7 +89,7 @@ private:
    void convertOperandsOfOpSequence(ir::OpSequence &op_seq);
  
  private:
-  ir::LoweredGraph &_lowered_graph;
+  compiler::LoweredGraph &_lowered_graph;
    OpSeqIndexList _list_fp32_to_fp16;
    OpSeqIndexList _list_fp16_to_fp32;
  };
diff --git a/runtime/onert/core/src/compiler/HEScheduler.cc b/runtime/onert/core/src/compiler/HEScheduler.cc

index de9b4fb..5653b09 100644 (file)
--- a/runtime/onert/core/src/compiler/HEScheduler.cc
+++ b/runtime/onert/core/src/compiler/HEScheduler.cc
@@ -54,42 +54,10 @@ static bool isQuant(const ir::Graph &graph, const ir::Operation &node)
    return false;
  }
  
-static bool isWorkaroundSkip(const ir::Graph &graph, const backend::Backend *backend,
-                             const ir::Operation &node, bool quant)
+static bool isWorkaroundSkip(const ir::Graph &, const backend::Backend *, const ir::Operation &,
+                             bool)
  {
-  /* TODO: this is workaround, come up with better solution if have.
-      Adding exception in stage doesn't help. Because if there is a record for add without
-      broadcast, scheduling will select it since it doesn't distinguish broadcast and
-      non-broadcast like it does for quant non-quantized*/
-  if (backend->config()->id() == "cpu" &&
-      (node.opcode() == ir::OpCode::Add || node.opcode() == ir::OpCode::Sub ||
-       node.opcode() == ir::OpCode::Mul))
-  {
-    const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
-    const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
-    /*Broadcasting isn't supported on CPU: no way to differ the existing exec_time record with and
-     * without broadcasting*/
-    if (!(graph.operands().at(lhs_index).shape() == graph.operands().at(rhs_index).shape()))
-    {
-      return true;
-    }
-  }
-  /* TODO: this is workaround, come up with better solution if have.
-          Adding exception in stage doesn't help. Because if there is a record for Mul without
-          broadcast, scheduling will select it since it doesn't distinguish broadcast and
-          non-broadcast like it does for quant non-quantized*/
-  else if (backend->config()->id() == "acl_neon" && node.opcode() == ir::OpCode::Mul)
-  {
-    const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
-    const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
-
-    // Nontrivial broadcasting isn't supported yet
-    if (quant ||
-        !(graph.operands().at(lhs_index).shape() == graph.operands().at(rhs_index).shape()))
-    {
-      return true;
-    }
-  }
+  // Now, there is no workaround
    return false;
  }
  
diff --git a/runtime/onert/core/src/compiler/HEScheduler.h b/runtime/onert/core/src/compiler/HEScheduler.h

index d8ceca9..b9cee58 100644 (file)
--- a/runtime/onert/core/src/compiler/HEScheduler.h
+++ b/runtime/onert/core/src/compiler/HEScheduler.h
@@ -59,6 +59,8 @@ public:
    {
      for (auto &entry : backend_contexts)
      {
+      if (entry.first->config()->id() == backend::controlflow::Config::ID)
+        continue;
        _all_backends.push_back(entry.first);
      }
      _backend_resolver = std::make_unique<compiler::BackendResolver>();
diff --git a/runtime/onert/core/src/compiler/Linear.cc b/runtime/onert/core/src/compiler/Linear.cc

index 493ca1e..49a9895 100644 (file)
--- a/runtime/onert/core/src/compiler/Linear.cc
+++ b/runtime/onert/core/src/compiler/Linear.cc
@@ -29,7 +29,7 @@ namespace onert
  namespace compiler
  {
  
-std::vector<ir::OpSequenceIndex> Linear::linearize(const ir::LoweredGraph &lowered_graph)
+std::vector<ir::OpSequenceIndex> Linear::linearize(const compiler::LoweredGraph &lowered_graph)
  {
    std::vector<ir::OpSequenceIndex> order;
    lowered_graph.iterateTopolOpSeqs(
@@ -39,7 +39,7 @@ std::vector<ir::OpSequenceIndex> Linear::linearize(const ir::LoweredGraph &lower
    return order;
  }
  
-void Linear::dump(const ir::LoweredGraph &lowered_graph,
+void Linear::dump(const compiler::LoweredGraph &lowered_graph,
                    const std::vector<ir::OpSequenceIndex> &order)
  {
    {
@@ -62,7 +62,7 @@ void Linear::dump(const ir::LoweredGraph &lowered_graph,
    }
  }
  
-void Linear::planTensors(const ir::LoweredGraph &lowered_graph,
+void Linear::planTensors(const compiler::LoweredGraph &lowered_graph,
                           const std::vector<ir::OpSequenceIndex> &order)
  {
    const auto &graph = lowered_graph.graph();
@@ -180,11 +180,9 @@ void Linear::planTensors(const ir::LoweredGraph &lowered_graph,
            tensor_builder_map[ind]->notifyLastUse(ind);
  
            // plan for deallocation of dynamic tensor
-          if (tensor_builder_map[ind]->supportDynamicTensor())
-          {
-            assert(tensor_builder_map[ind]->dynamicTensorManager());
-            tensor_builder_map[ind]->dynamicTensorManager()->planDealloc(op_idx, ind);
-          }
+          auto dyn_tensor_manager = tensor_builder_map[ind]->dynamicTensorManager();
+          if (dyn_tensor_manager)
+            dyn_tensor_manager->planDealloc(op_idx, ind);
          }
        }
      }
diff --git a/runtime/onert/core/src/compiler/Linear.h b/runtime/onert/core/src/compiler/Linear.h

index faeff77..1e24cf9 100644 (file)
--- a/runtime/onert/core/src/compiler/Linear.h
+++ b/runtime/onert/core/src/compiler/Linear.h
@@ -23,7 +23,7 @@
  #include "ir/OpSequences.h"
  #include "ir/Index.h"
  #include "backend/ITensorBuilder.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
  
  namespace onert
  {
@@ -41,10 +41,10 @@ namespace compiler
  class Linear
  {
  public:
-  static std::vector<ir::OpSequenceIndex> linearize(const ir::LoweredGraph &lowered_graph);
-  static void dump(const ir::LoweredGraph &lowered_graph,
+  static std::vector<ir::OpSequenceIndex> linearize(const compiler::LoweredGraph &lowered_graph);
+  static void dump(const compiler::LoweredGraph &lowered_graph,
                     const std::vector<ir::OpSequenceIndex> &order);
-  static void planTensors(const ir::LoweredGraph &lowered_graph,
+  static void planTensors(const compiler::LoweredGraph &lowered_graph,
                            const std::vector<ir::OpSequenceIndex> &order);
  };
  
diff --git a/runtime/onert/core/src/ir/LoweredGraph.cc b/runtime/onert/core/src/compiler/LoweredGraph.cc

similarity index 73%

rename from runtime/onert/core/src/ir/LoweredGraph.cc

rename to runtime/onert/core/src/compiler/LoweredGraph.cc

index 8aedfbd..1489a18 100644 (file)
--- a/runtime/onert/core/src/ir/LoweredGraph.cc
+++ b/runtime/onert/core/src/compiler/LoweredGraph.cc
@@ -14,18 +14,18 @@
   * limitations under the License.
   */
  
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
  
  #include <assert.h>
  #include <sstream>
  #include "util/logging.h"
-#include "pass/ConstantInsertionPass.h"
-#include "pass/ConstantLoweringPass.h"
-#include "pass/PermutationOperationPass.h"
-#include "pass/PermutationInsertionPass.h"
-#include "pass/PermutationEliminationPass.h"
+#include "compiler/pass/ConstantInsertionPass.h"
+#include "compiler/pass/ConstantLoweringPass.h"
+#include "compiler/pass/PermutationOperationPass.h"
+#include "compiler/pass/PermutationInsertionPass.h"
+#include "compiler/pass/PermutationEliminationPass.h"
  #include "ir/GraphIterator.h"
-#include "verifier/Verifier.h"
+#include "ir/verifier/Verifier.h"
  #include "backend/Backend.h"
  #include "backend/IConfig.h"
  #include "compiler/BackendResolver.h"
@@ -34,16 +34,15 @@
  
  namespace onert
  {
-namespace ir
+namespace compiler
  {
  
-LoweredGraph::LoweredGraph(const Graph &graph, const compiler::CompilerOptions &options)
-    : _graph{graph}
+LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &options) : _graph{graph}
  {
    bool linear_executor = (options.executor == "Linear");
  
    // Build backend contexts
-  auto &backend_manager = compiler::BackendManager::get();
+  auto &backend_manager = BackendManager::get();
  
    // Always create Controlflow backend context
    auto cf_backend = backend_manager.getControlflow();
@@ -73,36 +72,37 @@ LoweredGraph::LoweredGraph(const Graph &graph, const compiler::CompilerOptions &
  
    // TODO Move "schedule" phase out of here
    // Schedule
-  std::unique_ptr<compiler::BackendResolver> backend_resolver;
+  std::unique_ptr<BackendResolver> backend_resolver;
    if (options.he_scheduler)
    {
-    auto scheduler = compiler::HEScheduler(_backend_contexts, options);
+    auto scheduler = HEScheduler(_backend_contexts, options);
      backend_resolver = scheduler.schedule(_graph);
      _indexed_ranks = scheduler.getIndexedRanks();
    }
    else
    {
-    auto scheduler = compiler::ManualScheduler(_backend_contexts, options);
+    auto scheduler = ManualScheduler(_backend_contexts, options);
      backend_resolver = scheduler.schedule(_graph);
    }
  
    {
      // operand::LowerInfo holder
-    OperandIndexMap<std::unique_ptr<operand::LowerInfo>> operands_lower_info;
+    ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> operands_lower_info;
  
-    _graph.operands().iterate([&](const OperandIndex &index, const Operand &) {
-      operands_lower_info[index] = std::make_unique<operand::LowerInfo>();
+    _graph.operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &) {
+      operands_lower_info[index] = std::make_unique<ir::operand::LowerInfo>();
      });
  
      // Make op_seqs while checking whether a node can be merged into a op_seq.
      makeOpSequences(operands_lower_info, options, *backend_resolver);
  
-    _op_seqs.iterate([&](const OpSequenceIndex &, OpSequence &op_seq) {
+    _op_seqs.iterate([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) {
        assert(op_seq.operations().size() > 0);
        std::reverse(std::begin(op_seq.operations()), std::end(op_seq.operations()));
      });
  
-    _op_seqs.dump("merged and sorted operations without permutation", _graph.operations());
+    VERBOSE(OpSequences) << "dump without permutation" << std::endl;
+    dumpOpSequences(_op_seqs, _graph.operations());
  
      pass::ConstantInsertionPass ci_pass(*this);
      ci_pass.run();
@@ -127,17 +127,19 @@ LoweredGraph::LoweredGraph(const Graph &graph, const compiler::CompilerOptions &
      pass::PermutationEliminationPass pe_pass(*this);
      pe_pass.run();
  
-    _op_seqs.dump("merged and sorted operations with permutation", _graph.operations());
+    VERBOSE(OpSequences) << "dump with permutation" << std::endl;
+    dumpOpSequences(_op_seqs, _graph.operations());
    }
  
    // Graph verifications
    {
-    assert(verifier::DAGChecker().verify(_graph));
-    assert(verifier::EdgeConsistencyChecker().verify(_graph));
+    assert(ir::verifier::DAGChecker().verify(_graph));
+    assert(ir::verifier::EdgeConsistencyChecker().verify(_graph));
    }
  }
  
-const operation::LowerInfo *LoweredGraph::getLowerInfo(const OpSequenceIndex &op_seq_index) const
+const ir::operation::LowerInfo *
+LoweredGraph::getLowerInfo(const ir::OpSequenceIndex &op_seq_index) const
  {
    auto itr = _lower_info_map.op_seq.find(op_seq_index);
    if (itr == _lower_info_map.op_seq.end())
@@ -145,13 +147,13 @@ const operation::LowerInfo *LoweredGraph::getLowerInfo(const OpSequenceIndex &op
    return itr->second.get();
  }
  
-void LoweredGraph::setLowerInfo(const OpSequenceIndex &op_seq_index,
-                                std::unique_ptr<operation::LowerInfo> &&lower_info)
+void LoweredGraph::setLowerInfo(const ir::OpSequenceIndex &op_seq_index,
+                                std::unique_ptr<ir::operation::LowerInfo> &&lower_info)
  {
    _lower_info_map.op_seq.insert(std::make_pair(op_seq_index, std::move(lower_info)));
  }
  
-void LoweredGraph::removeLowerInfo(const OpSequenceIndex &op_seq_index)
+void LoweredGraph::removeLowerInfo(const ir::OpSequenceIndex &op_seq_index)
  {
    auto &op_seq_lower_info = _lower_info_map.op_seq;
    assert(op_seq_lower_info.find(op_seq_index) != op_seq_lower_info.end());
@@ -165,7 +167,7 @@ void LoweredGraph::removeLowerInfo(const OpSequenceIndex &op_seq_index)
    }
  }
  
-const operand::LowerInfo *LoweredGraph::getLowerInfo(const OperandIndex &index) const
+const ir::operand::LowerInfo *LoweredGraph::getLowerInfo(const ir::OperandIndex &index) const
  {
    auto itr = _lower_info_map.operand.find(index);
    if (itr == _lower_info_map.operand.end())
@@ -173,7 +175,7 @@ const operand::LowerInfo *LoweredGraph::getLowerInfo(const OperandIndex &index)
    return itr->second.get();
  }
  
-operand::LowerInfo *LoweredGraph::getLowerInfo(const OperandIndex &index)
+ir::operand::LowerInfo *LoweredGraph::getLowerInfo(const ir::OperandIndex &index)
  {
    auto itr = _lower_info_map.operand.find(index);
    if (itr == _lower_info_map.operand.end())
@@ -181,25 +183,26 @@ operand::LowerInfo *LoweredGraph::getLowerInfo(const OperandIndex &index)
    return itr->second.get();
  }
  
-void LoweredGraph::setLowerInfo(const OperandIndex &index,
-                                std::unique_ptr<operand::LowerInfo> &&lower_info)
+void LoweredGraph::setLowerInfo(const ir::OperandIndex &index,
+                                std::unique_ptr<ir::operand::LowerInfo> &&lower_info)
  {
    _lower_info_map.operand.insert(std::make_pair(index, std::move(lower_info)));
  }
  
-void LoweredGraph::removeLowerInfo(const OperandIndex &index)
+void LoweredGraph::removeLowerInfo(const ir::OperandIndex &index)
  {
    _lower_info_map.operand.erase(index);
  }
  
  void LoweredGraph::iterateTopolOpSeqs(
-    const std::function<void(const OpSequenceIndex &, const OpSequence &)> &fn) const
+    const std::function<void(const ir::OpSequenceIndex &, const ir::OpSequence &)> &fn) const
  {
-  // Topological Sorting for OpSequences
-  std::vector<OpSequenceIndex> topol_sorted;
-  PostDfsIterator<true>{}.iterateOpSeqs(
-      *this,
-      [&](const OpSequenceIndex &index, const OpSequence &) { topol_sorted.emplace_back(index); });
+  // Topological Sorting for ir::OpSequences
+  std::vector<ir::OpSequenceIndex> topol_sorted;
+  ir::PostDfsIterator<true>{}.iterateOpSeqs(
+      *this, [&](const ir::OpSequenceIndex &index, const ir::OpSequence &) {
+        topol_sorted.emplace_back(index);
+      });
    std::reverse(topol_sorted.begin(), topol_sorted.end());
    for (const auto op_seq_idx : topol_sorted)
    {
@@ -209,12 +212,14 @@ void LoweredGraph::iterateTopolOpSeqs(
  }
  
  void LoweredGraph::iterateTopolOpSeqs(
-    const std::function<void(const OpSequenceIndex &, OpSequence &)> &fn)
+    const std::function<void(const ir::OpSequenceIndex &, ir::OpSequence &)> &fn)
  {
-  // Topological Sorting for OpSequences
-  std::vector<OpSequenceIndex> topol_sorted;
-  PostDfsIterator<false>{}.iterateOpSeqs(
-      *this, [&](const OpSequenceIndex &index, OpSequence &) { topol_sorted.emplace_back(index); });
+  // Topological Sorting for ir::OpSequences
+  std::vector<ir::OpSequenceIndex> topol_sorted;
+  ir::PostDfsIterator<false>{}.iterateOpSeqs(
+      *this, [&](const ir::OpSequenceIndex &index, ir::OpSequence &) {
+        topol_sorted.emplace_back(index);
+      });
    std::reverse(topol_sorted.begin(), topol_sorted.end());
    for (const auto op_seq_idx : topol_sorted)
    {
@@ -223,12 +228,12 @@ void LoweredGraph::iterateTopolOpSeqs(
    }
  }
  
-OpSequenceIndex LoweredGraph::appendFreshSingleOpSequence(const OperationIndex &node_index,
-                                                          const Operation &node)
+ir::OpSequenceIndex LoweredGraph::appendFreshSingleOpSequence(const ir::OperationIndex &node_index,
+                                                              const ir::Operation &node)
  {
    // Create a fresh op_seq with one operation, and append it to op_seqs
    // Create a fresh op_seq
-  auto op_seq = std::make_unique<OpSequence>(_graph.layout());
+  auto op_seq = std::make_unique<ir::OpSequence>(_graph.layout());
  
    // Add an operation
    op_seq->appendOperation(node_index);
@@ -241,21 +246,21 @@ OpSequenceIndex LoweredGraph::appendFreshSingleOpSequence(const OperationIndex &
  }
  
  void LoweredGraph::makeOpSequences(
-    OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info,
-    const compiler::CompilerOptions &options, const compiler::BackendResolver &backend_resolver)
+    ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
+    const CompilerOptions &options, const BackendResolver &backend_resolver)
  {
    // if SUBG_MAX_NODE == 0, no limit on nodes of a op_seq
    const int op_seq_max_node = options.op_seq_max_node;
    assert(op_seq_max_node >= 0);
  
    bool is_profiling = options.he_profiling_mode;
-  OpSequence *op_seq = nullptr;
-  OpSequenceIndex op_seq_index;
+  ir::OpSequence *op_seq = nullptr;
+  ir::OpSequenceIndex op_seq_index;
  
    // NOTE: The below method appends nodes while making one op_seq if needed. If something better
    // ways, happy to update this code.
-  PostDfsConstIterator{}.iterate(
-      _graph, [&](const OperationIndex &node_index, const Operation &node) {
+  ir::PostDfsConstIterator{}.iterate(
+      _graph, [&](const ir::OperationIndex &node_index, const ir::Operation &node) {
          // LowerInfo for in/output operands
          auto backend = backend_resolver.getBackend(node_index);
  
@@ -269,12 +274,12 @@ void LoweredGraph::makeOpSequences(
          for (auto operand : node.getInputs() | ir::Remove::UNDEFINED)
          {
            auto &&lower_info = operands_lower_info.at(operand);
-          lower_info->addUsePermuteFactor(operand::PermuteFactor{backend, backend_layout});
+          lower_info->addUsePermuteFactor(ir::operand::PermuteFactor{backend, backend_layout});
          }
          for (auto operand : node.getOutputs())
          {
            auto &&lower_info = operands_lower_info.at(operand);
-          lower_info->addDefPermuteFactor(operand::PermuteFactor{backend, backend_layout});
+          lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{backend, backend_layout});
          }
  
          bool new_op_seq = (op_seq == nullptr ||
@@ -288,9 +293,9 @@ void LoweredGraph::makeOpSequences(
          {
            auto new_op_seq_index = appendFreshSingleOpSequence(node_index, node);
  
-          // OpSequence LowerInfo
+          // ir::OpSequence LowerInfo
            setLowerInfo(new_op_seq_index,
-                       std::make_unique<operation::LowerInfo>(backend, backend_layout));
+                       std::make_unique<ir::operation::LowerInfo>(backend, backend_layout));
  
            op_seq_index = new_op_seq_index;
            op_seq = &(_op_seqs.at(new_op_seq_index));
@@ -318,16 +323,17 @@ void LoweredGraph::makeOpSequences(
  }
  
  void LoweredGraph::manipulateLowerInfo(
-    OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info, bool is_primary)
+    ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info,
+    bool is_primary)
  {
-  const auto controlflow_backend = compiler::BackendManager::get().getControlflow();
+  const auto controlflow_backend = BackendManager::get().getControlflow();
  
    // TODO Rather than handling primary graph specially,
    //      let the permute inserted and remove it later
    if (is_primary)
    {
      // TODO Rather than using NHWC Get frontend layout of this node from IR
-    auto factor = operand::PermuteFactor{controlflow_backend, Layout::NHWC};
+    auto factor = ir::operand::PermuteFactor{controlflow_backend, ir::Layout::NHWC};
      for (auto index : _graph.getInputs() | ir::Remove::UNDEFINED)
      {
        auto &&lower_info = operands_lower_info.at(index);
@@ -355,9 +361,9 @@ void LoweredGraph::manipulateLowerInfo(
        else
        {
          // In case of that an operand is Graph's input and not input or output of any operation
-        lower_info->addDefPermuteFactor(operand::PermuteFactor{
+        lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{
              controlflow_backend,
-            Layout::NHWC // TODO Get frontend layout of this node from IR
+            ir::Layout::NHWC // TODO Get frontend layout of this node from IR
          });
        }
      }
@@ -368,15 +374,15 @@ void LoweredGraph::manipulateLowerInfo(
      if (lower_info->def_factors().size() == 0)
      {
        // In case of that an operand is Graph's output and not input or output of any operation
-      lower_info->addDefPermuteFactor(operand::PermuteFactor{
+      lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{
            controlflow_backend,
-          Layout::NHWC // TODO Get frontend layout of this node from IR
+          ir::Layout::NHWC // TODO Get frontend layout of this node from IR
        });
      }
    }
  
    // Set LowerInfo for each operand from the operand::LowerInfo holder
-  _graph.operands().iterate([&](const OperandIndex &index, Operand &) {
+  _graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &) {
      setLowerInfo(index, std::move(operands_lower_info[index]));
    });
  }
@@ -388,11 +394,11 @@ void LoweredGraph::dumpLowerInfo()
  
    std::map<uint32_t, std::string> dumps;
  
-  _graph.operands().iterate([&](const OperandIndex &index, Operand &object) {
+  _graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &object) {
      std::stringstream sstream;
      if (!getLowerInfo(index)->def_factors().empty() || !getLowerInfo(index)->use_factors().empty())
      {
-      auto factors_to_string = [](const operand::PermuteFactorSet &factors) {
+      auto factors_to_string = [](const ir::operand::PermuteFactorSet &factors) {
          std::string str;
          for (auto factor : factors)
          {
@@ -403,7 +409,7 @@ void LoweredGraph::dumpLowerInfo()
          return "{ " + str + "}";
        };
  
-      auto operation_index_to_string = [](const OperationIndexSet &operations) {
+      auto operation_index_to_string = [](const ir::OperationIndexSet &operations) {
          std::string str;
          for (auto op : operations)
          {
@@ -427,8 +433,8 @@ void LoweredGraph::dumpLowerInfo()
          sstream << (shape.dim(i)) << " ";
        }
        sstream << "}" << std::endl;
-      sstream << "  - Def Operations  : " << def_ops << std::endl;
-      sstream << "  - Use Operations  : " << use_ops << std::endl;
+      sstream << "  - Def ir::Operations  : " << def_ops << std::endl;
+      sstream << "  - Use ir::Operations  : " << use_ops << std::endl;
        sstream << "  - Lower Info" << std::endl;
        sstream << "    - Def Backends    : " << def_layouts << std::endl;
        sstream << "    - Use Backends    : " << use_layouts << std::endl;
@@ -445,8 +451,9 @@ void LoweredGraph::dumpLowerInfo()
    }
  }
  
-bool LoweredGraph::mergeable(const OpSequenceIndex &op_seq_index, const OperationIndex &node_index,
-                             Layout layout, const compiler::BackendResolver &backend_resolver)
+bool LoweredGraph::mergeable(const ir::OpSequenceIndex &op_seq_index,
+                             const ir::OperationIndex &node_index, ir::Layout layout,
+                             const BackendResolver &backend_resolver)
  {
    // Are they mergeable?
    // 1. the same backend id and layout?
@@ -470,10 +477,10 @@ bool LoweredGraph::mergeable(const OpSequenceIndex &op_seq_index, const Operatio
  
    // Branched?
    {
-    std::unordered_set<OperationIndex> branched_set;
+    std::unordered_set<ir::OperationIndex> branched_set;
  
      // Check for branching up
-    for (const auto &input : op_seq.getInputs() | Remove::DUPLICATED | ir::Remove::UNDEFINED)
+    for (const auto &input : op_seq.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
      {
        const auto &input_obj = _graph.operands().at(input);
        auto def = input_obj.getDef();
@@ -489,7 +496,7 @@ bool LoweredGraph::mergeable(const OpSequenceIndex &op_seq_index, const Operatio
      branched_set.clear();
  
      // Check for branching down
-    for (const auto &output : node.getOutputs() | Remove::DUPLICATED)
+    for (const auto &output : node.getOutputs() | ir::Remove::DUPLICATED)
      {
        // TODO Fix this workaround for the case of model outputs that are used by another operation
        //      This is needed since the branching is decided by operation, but for model outputs,
@@ -516,7 +523,7 @@ bool LoweredGraph::mergeable(const OpSequenceIndex &op_seq_index, const Operatio
      const auto &node_outputs = node.getOutputs();
  
      // op_seq's operations are in order so that we just check the first and the last
-    std::vector<OperationIndex> op_seq_ops{op_seq.operations()[0]};
+    std::vector<ir::OperationIndex> op_seq_ops{op_seq.operations()[0]};
      if (op_seq.operations().size() > 1)
        op_seq_ops.emplace_back(op_seq.operations()[op_seq.operations().size() - 1]);
  
@@ -556,5 +563,5 @@ bool LoweredGraph::mergeable(const OpSequenceIndex &op_seq_index, const Operatio
    return false;
  }
  
-} // namespace ir
+} // namespace compiler
  } // namespace onert
diff --git a/runtime/onert/core/src/compiler/ManualScheduler.cc b/runtime/onert/core/src/compiler/ManualScheduler.cc

index 1d591ae..ed49ee5 100644 (file)
--- a/runtime/onert/core/src/compiler/ManualScheduler.cc
+++ b/runtime/onert/core/src/compiler/ManualScheduler.cc
@@ -40,7 +40,7 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap
    const auto &manual_options = _options.manual_scheduler_options;
    auto backend_resolver = std::make_unique<compiler::BackendResolver>();
  
-  // This fallback will be used for unavailable backends
+  // This fallback will be used in case that `backend_for_all` is unavailable
    auto fallback = [&]() -> const backend::Backend * {
      for (auto backend_id : _options.backend_list)
      {
@@ -50,7 +50,8 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap
      }
      return nullptr;
    }();
-  assert(fallback != nullptr); // There must be at least one fallback
+  if (fallback == nullptr)
+    throw std::runtime_error{"No loaded backends available."};
  
    // 1. Backend for All operations
    const backend::Backend *backend_all = resolveBackend(manual_options.backend_for_all, fallback);
@@ -110,7 +111,7 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap
  const backend::Backend *ManualScheduler::resolveBackend(const std::string &id,
                                                          const backend::Backend *fallback)
  {
-  // Ensure if the backend is available in the backend
+  // Ensure if the backend is available in the current backend context
    const backend::Backend *backend = BackendManager::get().get(id);
    if (!backend || _backend_contexts.find(backend) == _backend_contexts.end())
    {
diff --git a/runtime/onert/core/src/compiler/OperationValidator.cc b/runtime/onert/core/src/compiler/OperationValidator.cc

index 4449631..f7f659e 100644 (file)
--- a/runtime/onert/core/src/compiler/OperationValidator.cc
+++ b/runtime/onert/core/src/compiler/OperationValidator.cc
@@ -68,19 +68,6 @@ void OperationValidator::operator()()
        [&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); });
  }
  
-void OperationValidator::visit(const ir::operation::Abs &node) { checkUnaryOp(node); }
-
-void OperationValidator::visit(const ir::operation::AvgPool2D &node)
-{
-  const auto ofm_index{node.getOutputs().at(0)};
-  if (_ctx.at(ofm_index).info().isDynamic())
-    return;
-
-  const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)};
-
-  OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
-}
-
  void OperationValidator::visit(const ir::operation::BatchMatMul &node)
  {
    const auto lhs_index(node.getInputs().at(ir::operation::BatchMatMul::Input::LHS));
@@ -125,17 +112,6 @@ void OperationValidator::visit(const ir::operation::BatchToSpaceND &node)
    OP_REQUIRES(input_shape.C == output_shape.C);
  }
  
-void OperationValidator::visit(const ir::operation::Cast &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-
-  const auto input_index{node.getInputs().at(0)};
-
-  OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
-}
-
  void OperationValidator::visit(const ir::operation::Comparison &node)
  {
    const auto output_index{node.getOutputs().at(0)};
@@ -177,6 +153,17 @@ void OperationValidator::visit(const ir::operation::InstanceNorm &node)
    OP_REQUIRES(_ctx.at(beta_index).shape().rank() == 1);
  }
  
+void OperationValidator::visit(const ir::operation::Pool2D &node)
+{
+  const auto ofm_index{node.getOutputs().at(0)};
+  if (_ctx.at(ofm_index).info().isDynamic())
+    return;
+
+  const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
+
+  OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
+}
+
  void OperationValidator::visit(const ir::operation::Permute &node)
  {
    VERBOSE(Permute) << "Configure Permute operation" << std::endl;
@@ -298,8 +285,6 @@ void OperationValidator::visit(const ir::operation::RNN &node)
                num_units == _ctx.at(hidden_state_out_index).shape().dim(1));
  }
  
-void OperationValidator::visit(const ir::operation::Round &node) { checkUnaryOp(node); }
-
  void OperationValidator::visit(const ir::operation::SpaceToBatchND &node)
  {
    const auto ofm_index{node.getOutputs().at(0)};
@@ -353,6 +338,51 @@ void OperationValidator::visit(const ir::operation::SpaceToDepth &node)
    OP_REQUIRES(input_shape.C * block_size * block_size == output_shape.C);
  }
  
+void OperationValidator::visit(const ir::operation::ElementwiseActivation &node)
+{
+  checkUnaryOp(node);
+}
+
+void OperationValidator::visit(const ir::operation::ElementwiseBinary &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
+  const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
+
+  OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
+  OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type());
+}
+
+void OperationValidator::visit(const ir::operation::ElementwiseUnary &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
+
+  OP_REQUIRES(node.getInputs().size() == 1);
+  OP_REQUIRES(node.getOutputs().size() == 1);
+
+  // Check if I/O types match
+  if (node.param().op_type == ir::operation::ElementwiseUnary::Type::DEQUANTIZE)
+  {
+    OP_REQUIRES(_ctx.at(input_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM);
+    OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::FLOAT32);
+  }
+  else if (node.param().op_type == ir::operation::ElementwiseUnary::Type::QUANTIZE)
+  {
+    OP_REQUIRES(_ctx.at(input_index).typeInfo().type() == ir::DataType::FLOAT32);
+    OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM);
+  }
+  else if (node.param().op_type != ir::operation::ElementwiseUnary::Type::CAST)
+  {
+    OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type());
+  }
+
+  if (_ctx.at(output_index).info().isDynamic())
+    return;
+
+  OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
+}
+
  void OperationValidator::visit(const ir::operation::EmbeddingLookup &node)
  {
    const auto output_index{node.getOutputs().at(0)};
@@ -389,8 +419,6 @@ void OperationValidator::visit(const ir::operation::EmbeddingLookup &node)
    }
  }
  
-void OperationValidator::visit(const ir::operation::Exp &node) { checkUnaryOp(node); }
-
  void OperationValidator::visit(const ir::operation::ExpandDims &node)
  {
    const auto output_index{node.getOutputs().at(0)};
@@ -405,8 +433,6 @@ void OperationValidator::visit(const ir::operation::ExpandDims &node)
    OP_REQUIRES(_ctx.at(axis_index).shape().rank() <= 1);
  }
  
-void OperationValidator::visit(const ir::operation::Floor &node) { checkUnaryOp(node); }
-
  void OperationValidator::visit(const ir::operation::HashtableLookup &node)
  {
    const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)};
@@ -495,21 +521,6 @@ void OperationValidator::visit(const ir::operation::Gather &node)
    OP_REQUIRES(ofm_shape.rank() <= 4);
  }
  
-void OperationValidator::visit(const ir::operation::Dequantize &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-
-  const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
-
-  OP_REQUIRES(_ctx.at(input_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM);
-  OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::FLOAT32);
-
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-  OP_REQUIRES(_ctx.at(input_index).shape().rank() <= 4);
-  OP_REQUIRES(_ctx.at(input_index).shape() == _ctx.at(output_index).shape());
-}
-
  void OperationValidator::visit(const ir::operation::DepthToSpace &node)
  {
    // param check
@@ -822,30 +833,6 @@ void OperationValidator::visit(const ir::operation::Pad &node)
    OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank());
  }
  
-void OperationValidator::visit(const ir::operation::Min &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  // This validator does not check shape. So checking isDynamic() is skipped.
-
-  const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
-
-  OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
-  OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type());
-}
-
-void OperationValidator::visit(const ir::operation::Max &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  // This validator does not check shape. So checking isDynamic() is skipped.
-
-  const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
-  const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
-
-  OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
-  OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type());
-}
-
  void OperationValidator::visit(const ir::operation::Select &node)
  {
    const auto output_index{node.getOutputs().at(0)};
@@ -899,12 +886,6 @@ void OperationValidator::visit(const ir::operation::Split &node)
    OP_REQUIRES(_ctx.at(input_index).shape().dim(axis) % num_splits == 0);
  }
  
-void OperationValidator::visit(const ir::operation::Cos &node) { checkUnaryOp(node); }
-
-void OperationValidator::visit(const ir::operation::Sin &node) { checkUnaryOp(node); }
-
-void OperationValidator::visit(const ir::operation::RSQRT &node) { checkUnaryOp(node); }
-
  void OperationValidator::visit(const ir::operation::Shape &node)
  {
    const auto output_index{node.getOutputs().at(0)};
@@ -961,12 +942,6 @@ void OperationValidator::visit(const ir::operation::While &node)
    // TODO Add to validate with subgraphs
  }
  
-void OperationValidator::visit(const ir::operation::Neg &node) { checkUnaryOp(node); }
-
-void OperationValidator::visit(const ir::operation::Log &node) { checkUnaryOp(node); }
-
-void OperationValidator::visit(const ir::operation::LogicalNot &node) { checkUnaryOp(node); }
-
  void OperationValidator::visit(const ir::operation::SquaredDifference &node)
  {
    const auto output_index{node.getOutputs().at(0)};
@@ -1027,16 +1002,6 @@ void OperationValidator::visit(const ir::operation::Tile &node)
    OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank());
  }
  
-void OperationValidator::visit(const ir::operation::LogicalOr &node)
-{
-  const auto output_index{node.getOutputs().at(0)};
-  const auto lhs_index{node.getInputs().at(0)};
-  const auto rhs_index{node.getInputs().at(1)};
-
-  OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type());
-  OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type());
-}
-
  void OperationValidator::visit(const ir::operation::Range &node)
  {
    const auto output_index{node.getOutputs().at(0)};
@@ -1084,24 +1049,5 @@ void OperationValidator::visit(const ir::operation::LogSoftmax &node)
    OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
  }
  
-void OperationValidator::visit(const ir::operation::Quantize &node)
-{
-  VERBOSE(Quantize) << "Configure Quantize operation" << std::endl;
-
-  OP_REQUIRES(node.getInputs().size() == 1);
-  OP_REQUIRES(node.getOutputs().size() == 1);
-
-  const auto input_index{node.getInputs().at(0)};
-  const auto output_index{node.getOutputs().at(0)};
-
-  OP_REQUIRES(_ctx.at(input_index).typeInfo().type() == ir::DataType::FLOAT32);
-
-  if (_ctx.at(output_index).info().isDynamic())
-    return;
-
-  OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM);
-
-  OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
-}
  } // namespace compiler
  } // namespace onert
diff --git a/runtime/onert/core/src/compiler/OperationValidator.h b/runtime/onert/core/src/compiler/OperationValidator.h

index b27e686..deb6357 100644 (file)
--- a/runtime/onert/core/src/compiler/OperationValidator.h
+++ b/runtime/onert/core/src/compiler/OperationValidator.h
@@ -44,58 +44,45 @@ public:
    void operator()();
  
  public:
-  void visit(const ir::operation::Abs &node) override;
-  void visit(const ir::operation::AvgPool2D &node) override;
    void visit(const ir::operation::BatchMatMul &node) override;
    void visit(const ir::operation::BatchToSpaceND &node) override;
-  void visit(const ir::operation::Cast &node) override;
    void visit(const ir::operation::Comparison &node) override;
    void visit(const ir::operation::Softmax &node) override;
    void visit(const ir::operation::InstanceNorm &node) override;
    void visit(const ir::operation::Permute &node) override;
+  void visit(const ir::operation::Pool2D &node) override;
    void visit(const ir::operation::Reduce &node) override;
    void visit(const ir::operation::Transpose &node) override;
    void visit(const ir::operation::RNN &node) override;
-  void visit(const ir::operation::Round &node) override;
    void visit(const ir::operation::SpaceToBatchND &node) override;
    void visit(const ir::operation::SpaceToDepth &node) override;
+  void visit(const ir::operation::ElementwiseActivation &node) override;
+  void visit(const ir::operation::ElementwiseBinary &node) override;
+  void visit(const ir::operation::ElementwiseUnary &node) override;
    void visit(const ir::operation::EmbeddingLookup &node) override;
-  void visit(const ir::operation::Exp &node) override;
    void visit(const ir::operation::ExpandDims &node) override;
-  void visit(const ir::operation::Floor &node) override;
    void visit(const ir::operation::HashtableLookup &node) override;
    void visit(const ir::operation::TransposeConv &node) override;
    void visit(const ir::operation::Gather &node) override;
-  void visit(const ir::operation::Dequantize &node) override;
    void visit(const ir::operation::DepthToSpace &node) override;
    void visit(const ir::operation::Pack &node) override;
    void visit(const ir::operation::LSTM &node) override;
    void visit(const ir::operation::L2Normalization &node) override;
    void visit(const ir::operation::Unpack &node) override;
    void visit(const ir::operation::Pad &node) override;
-  void visit(const ir::operation::Min &node) override;
-  void visit(const ir::operation::Max &node) override;
    void visit(const ir::operation::Select &node) override;
    void visit(const ir::operation::StridedSlice &node) override;
    void visit(const ir::operation::Split &node) override;
-  void visit(const ir::operation::Cos &node) override;
-  void visit(const ir::operation::Sin &node) override;
-  void visit(const ir::operation::RSQRT &node) override;
    void visit(const ir::operation::Shape &node) override;
    void visit(const ir::operation::ResizeBilinear &node) override;
    void visit(const ir::operation::Reverse &node) override;
    void visit(const ir::operation::If &node) override;
    void visit(const ir::operation::While &node) override;
-  void visit(const ir::operation::Neg &node) override;
-  void visit(const ir::operation::Log &node) override;
-  void visit(const ir::operation::LogicalNot &node) override;
    void visit(const ir::operation::SquaredDifference &node) override;
    void visit(const ir::operation::Tile &node) override;
-  void visit(const ir::operation::LogicalOr &node) override;
    void visit(const ir::operation::Range &node) override;
    void visit(const ir::operation::MatrixBandPart &node) override;
    void visit(const ir::operation::LogSoftmax &node) override;
-  void visit(const ir::operation::Quantize &node) override;
  
  private:
    void checkUnaryOp(const ir::Operation &node);
diff --git a/runtime/onert/core/src/compiler/StaticShapeInference.cc b/runtime/onert/core/src/compiler/StaticShapeInference.cc

index 76c1edc..4eba1ff 100644 (file)
--- a/runtime/onert/core/src/compiler/StaticShapeInference.cc
+++ b/runtime/onert/core/src/compiler/StaticShapeInference.cc
@@ -25,6 +25,64 @@ namespace onert
  namespace compiler
  {
  
+bool StaticShapeInferer::infer(const ir::OpSequence &op_seq)
+{
+  bool has_dynamic_tensor = false;
+
+  for (const auto &operation_idx : op_seq.operations())
+  {
+    auto &op = _operations.at(operation_idx);
+    auto opcode = op.opcode();
+
+    _return_has_dynamic_tensor = false; // this is used as a return value inside operation's visit()
+
+    // IF: need shape inference for then, else
+    // While: need shape inference for condition, body
+    if (opcode == ir::OpCode::If || opcode == ir::OpCode::While)
+    {
+      op.accept(*this);
+    }
+    else
+    {
+      _return_has_dynamic_tensor = checkDynamicInput(op);
+
+      if (_return_has_dynamic_tensor)
+      {
+        setDynamicOutput(op);
+      }
+      else
+      {
+        op.accept(*this);
+      }
+    }
+
+    has_dynamic_tensor = has_dynamic_tensor || _return_has_dynamic_tensor;
+  }
+
+  return has_dynamic_tensor;
+}
+
+bool StaticShapeInferer::checkDynamicInput(const ir::Operation &op)
+{
+  for (auto input_idx : op.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED)
+  {
+    if (_operands.at(input_idx).info().isDynamic())
+    {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+void StaticShapeInferer::setDynamicOutput(const ir::Operation &op)
+{
+  for (auto output_idx : op.getOutputs())
+  {
+    _operands.at(output_idx).info().setDynamic();
+  }
+}
+
  void StaticShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op,
                                                    const ir::OperandIndex lhs_idx,
                                                    const ir::OperandIndex rhs_idx)
@@ -35,13 +93,6 @@ void StaticShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op,
    const auto output_idx = op.getOutputs().at(0);
    ir::Operand &output = _operands.at(output_idx);
  
-  if (lhs.info().isDynamic() || rhs.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
    // re-sizing output shape
    ir::Shape new_shape = shape_inference::inferEltwiseShape(lhs.info().shape(), rhs.info().shape());
    output.info().shape(new_shape);
@@ -56,14 +107,6 @@ void StaticShapeInferer::handleSimpleUnaryOp(const ir::Operation &op,
    const auto output_idx = op.getOutputs().at(0);
    ir::Operand &output = _operands.at(output_idx);
  
-  // if input is dynamic, output also becomes dynamic
-  if (input.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
    // re-sizing output shape
    ir::Shape new_shape = input.info().shape();
    output.info().shape(new_shape);
@@ -99,17 +142,6 @@ void StaticShapeInferer::dump()
    }
  }
  
-void StaticShapeInferer::visit(const ir::operation::Abs &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Abs::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Add &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Add::Input::LHS),
-                           op.getInputs().at(ir::operation::Add::Input::RHS));
-}
-
  void StaticShapeInferer::visit(const ir::operation::ArgMax &op)
  {
    const auto input_idx{op.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
@@ -118,15 +150,6 @@ void StaticShapeInferer::visit(const ir::operation::ArgMax &op)
    // get mutable output operand
    const auto output_idx = op.getOutputs().at(0);
    ir::Operand &output = _operands.at(output_idx);
-
-  // if input is dynamic, output also becomes dynamic
-  if (input.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
    const auto rank = input.info().shape().rank();
    const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
  
@@ -145,35 +168,22 @@ void StaticShapeInferer::visit(const ir::operation::BatchMatMul &op)
    const auto lhs = _operands.at(lhs_index);
    const auto rhs = _operands.at(rhs_index);
    auto &output = _operands.at(output_index);
-
-  if (lhs.info().isDynamic() || rhs.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
    auto new_shape = shape_inference::inferBatchMatMulShape(lhs.shape(), rhs.shape(), op.param());
    output.info().shape(new_shape);
  }
  
-void StaticShapeInferer::visit(const ir::operation::BroadcastTo &op)
+void StaticShapeInferer::visit(const ir::operation::BinaryArithmetic &op)
  {
-  const auto input_idx{op.getInputs().at(ir::operation::BroadcastTo::Input::INPUT)};
-  const auto &input = _operands.at(input_idx);
+  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS),
+                           op.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS));
+}
  
+void StaticShapeInferer::visit(const ir::operation::BroadcastTo &op)
+{
    // get mutable output operand
    const auto output_idx = op.getOutputs().at(0);
    ir::Operand &output = _operands.at(output_idx);
  
-  // if input is dynamic, output also becomes dynamic.
-  if (input.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
    const auto shape_idx{op.getInputs().at(ir::operation::BroadcastTo::Input::SHAPE)};
    const auto &shape = _operands.at(shape_idx);
  
@@ -192,11 +202,6 @@ void StaticShapeInferer::visit(const ir::operation::BroadcastTo &op)
    output.info().shape(new_shape);
  }
  
-void StaticShapeInferer::visit(const ir::operation::Cast &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Cast::Input::INPUT));
-}
-
  void StaticShapeInferer::visit(const ir::operation::Comparison &op)
  {
    handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Comparison::Input::INPUT0),
@@ -215,14 +220,6 @@ void StaticShapeInferer::visit(const ir::operation::Concat &op)
    {
      const auto input_idx{op.getInputs().at(i)};
      const auto &input = _operands.at(input_idx);
-
-    if (input.info().isDynamic())
-    {
-      output.info().setDynamic();
-      _return_has_dynamic_tensor = true;
-      return;
-    }
-
      input_shapes.emplace_back(input.shape());
    }
  
@@ -241,33 +238,26 @@ void StaticShapeInferer::visit(const ir::operation::Conv2D &op)
    const auto output_idx = op.getOutputs().at(0);
    ir::Operand &output = _operands.at(output_idx);
  
-  if (input.info().isDynamic() || ker.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
    // re-sizing output shape
    ir::Shape new_shape =
        shape_inference::inferConv2DShape(input.info().shape(), ker.info().shape(), op.param());
    output.info().shape(new_shape);
  }
  
-void StaticShapeInferer::visit(const ir::operation::Cos &op)
+void StaticShapeInferer::visit(const ir::operation::ElementwiseActivation &op)
  {
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Cos::Input::INPUT));
+  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT));
  }
  
-void StaticShapeInferer::visit(const ir::operation::Div &op)
+void StaticShapeInferer::visit(const ir::operation::ElementwiseBinary &op)
  {
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Div::Input::LHS),
-                           op.getInputs().at(ir::operation::Div::Input::RHS));
+  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS),
+                           op.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS));
  }
  
-void StaticShapeInferer::visit(const ir::operation::Exp &op)
+void StaticShapeInferer::visit(const ir::operation::ElementwiseUnary &op)
  {
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Exp::Input::INPUT));
+  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT));
  }
  
  void StaticShapeInferer::visit(const ir::operation::ExpandDims &op)
@@ -279,13 +269,6 @@ void StaticShapeInferer::visit(const ir::operation::ExpandDims &op)
    const auto output_idx = op.getOutputs().at(0);
    ir::Operand &output = _operands.at(output_idx);
  
-  if (input.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
    if (!axis.isConstant())
    {
      output.info().setDynamic();
@@ -310,13 +293,6 @@ void StaticShapeInferer::visit(const ir::operation::Fill &op)
    const auto output_idx = op.getOutputs().at(0);
    ir::Operand &output = _operands.at(output_idx);
  
-  if (input.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
    if (!input.isConstant())
    {
      output.info().setDynamic();
@@ -345,15 +321,6 @@ void StaticShapeInferer::visit(const ir::operation::FullyConnected &op)
    // get mutable output operand
    const auto output_idx = op.getOutputs().at(0);
    ir::Operand &output = _operands.at(output_idx);
-
-  // if input or ker is dynamic, output also becomes dynamic
-  if (input.info().isDynamic() || ker.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
    // re-sizing output shape
    ir::Shape new_shape =
        shape_inference::inferFullyConnectedShape(input.info().shape(), ker.info().shape());
@@ -376,15 +343,6 @@ void StaticShapeInferer::visit(const ir::operation::Gather &op)
  
    const auto indices_idx{op.getInputs().at(ir::operation::Gather::Input::INDICES)};
    const auto &indices = _operands.at(indices_idx);
-
-  // if input is dynamic, output also becomes dynamic
-  if (input.info().isDynamic() || indices.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
    const auto rank = input.info().shape().rank();
    const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
  
@@ -476,27 +434,6 @@ void StaticShapeInferer::visit(const ir::operation::If &op)
    }
  }
  
-void StaticShapeInferer::visit(const ir::operation::Log &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Log::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::LogicalNot &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::LogicalNot::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::LogicalOr &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::LogicalOr::Input::INPUT0),
-                           op.getInputs().at(ir::operation::LogicalOr::Input::INPUT1));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Logistic &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Logistic::Input::INPUT));
-}
-
  void StaticShapeInferer::visit(const ir::operation::L2Normalization &op)
  {
    handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::L2Normalization::Input::INPUT));
@@ -507,29 +444,6 @@ void StaticShapeInferer::visit(const ir::operation::MatrixBandPart &op)
    handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::Input::INPUT));
  }
  
-void StaticShapeInferer::visit(const ir::operation::Max &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Max::Input::LHS),
-                           op.getInputs().at(ir::operation::Max::Input::RHS));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Min &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Min::Input::LHS),
-                           op.getInputs().at(ir::operation::Min::Input::RHS));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Mul &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Mul::Input::LHS),
-                           op.getInputs().at(ir::operation::Mul::Input::RHS));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Neg &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Neg::Input::INPUT));
-}
-
  void StaticShapeInferer::visit(const ir::operation::OneHot &op)
  {
    const auto indice_idx{op.getInputs().at(ir::operation::OneHot::Input::INDICES)};
@@ -542,7 +456,7 @@ void StaticShapeInferer::visit(const ir::operation::OneHot &op)
    auto output_idx = op.getOutputs().at(0);
    ir::Operand &output = _operands.at(output_idx);
  
-  if (indice.info().isDynamic() || depth.info().isDynamic() || !depth.isConstant())
+  if (!depth.isConstant())
    {
      output.info().setDynamic();
      _return_has_dynamic_tensor = true;
@@ -558,18 +472,6 @@ void StaticShapeInferer::visit(const ir::operation::OneHot &op)
  
  void StaticShapeInferer::visit(const ir::operation::Pack &op)
  {
-  bool is_any_of_inputs_dynamic = [&]() -> bool {
-    for (uint32_t i = 0; i < op.getInputs().size(); ++i)
-    {
-      const auto &input = _operands.at(op.getInputs().at(i));
-      if (input.info().isDynamic())
-      {
-        return true;
-      }
-    }
-    return false;
-  }();
-
    const auto input_idx{op.getInputs().at(0)};
    const auto &input = _operands.at(input_idx);
  
@@ -577,14 +479,6 @@ void StaticShapeInferer::visit(const ir::operation::Pack &op)
    const auto output_idx = op.getOutputs().at(0);
    ir::Operand &output = _operands.at(output_idx);
  
-  // if input is dynamic, output also becomes dynamic
-  if (is_any_of_inputs_dynamic)
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
    const auto rank = input.shape().rank() + 1;
    const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
    const auto num = op.param().num;
@@ -608,14 +502,6 @@ void StaticShapeInferer::visit(const ir::operation::Pad &op)
    const auto output_idx = op.getOutputs().at(0);
    ir::Operand &output = _operands.at(output_idx);
  
-  // if input is dynamic or pad is dynamic, output also becomes dynamic
-  if (input.info().isDynamic() || pad.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
    // if pad is not constant, output also becomes dynamic
    if (!pad.isConstant())
    {
@@ -638,13 +524,6 @@ void StaticShapeInferer::visit(const ir::operation::Permute &op)
    const auto output_idx = op.getOutputs().at(0);
    ir::Operand &output = _operands.at(output_idx);
  
-  if (input.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
    // re-sizing output shape
    // Permute is a special operation that layouts of input/output may be different on backend
    // However, it is not applied here, so input/output have the same layout of frontend. Because
@@ -672,13 +551,6 @@ void StaticShapeInferer::visit(const ir::operation::Range &op)
    // get mutable output operand
    const auto output_idx = op.getOutputs().at(0);
    ir::Operand &output = _operands.at(output_idx);
-  // if any input is dynamic, output also becomes dynamic
-  if (start_op.info().isDynamic() || limit_op.info().isDynamic() || delta_op.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
  
    ir::Shape new_shape;
    if (start_op.isConstant() && limit_op.isConstant() && delta_op.isConstant())
@@ -716,14 +588,6 @@ void StaticShapeInferer::visit(const ir::operation::Reduce &op)
    const auto output_idx = op.getOutputs().at(0);
    ir::Operand &output = _operands.at(output_idx);
  
-  // if input is dynamic, output also becomes dynamic
-  if (input.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
    std::vector<int32_t> axes_vec;
    for (size_t i = 0; i < axes.shape().num_elements(); ++i)
    {
@@ -761,14 +625,6 @@ void StaticShapeInferer::visit(const ir::operation::Reshape &op)
    const auto output_idx = op.getOutputs().at(0);
    ir::Operand &output = _operands.at(output_idx);
  
-  // if input is dynamic, output also becomes dynamic
-  if (input.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
    // New shape is given by second input tensor
    if (op.getInputs().size() == 2)
    {
@@ -827,14 +683,6 @@ void StaticShapeInferer::visit(const ir::operation::ResizeBilinear &op)
    const auto output_idx = op.getOutputs().at(0);
    ir::Operand &output = _operands.at(output_idx);
  
-  // if input is dynamic, output also becomes dynamic
-  if (input.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
    // Shape inferencing logic based on Params
    ir::Shape new_shape = shape_inference::inferResizeBilinearShape(
        input.shape(), op.param().height_out, op.param().width_out);
@@ -852,16 +700,6 @@ void StaticShapeInferer::visit(const ir::operation::Reverse &op)
    handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Reverse::Input::INPUT));
  }
  
-void StaticShapeInferer::visit(const ir::operation::Round &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Round::Input::INPUT));
-}
-
-void StaticShapeInferer::visit(const ir::operation::RSQRT &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::RSQRT::Input::INPUT));
-}
-
  void StaticShapeInferer::visit(const ir::operation::Select &op)
  {
    const auto input_cond_idx{op.getInputs().at(ir::operation::Select::Input::CONDITION)};
@@ -876,14 +714,6 @@ void StaticShapeInferer::visit(const ir::operation::Select &op)
    auto output_idx = op.getOutputs().at(0);
    ir::Operand &output = _operands.at(output_idx);
  
-  if (input_cond.info().isDynamic() || input_true.info().isDynamic() ||
-      input_false.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
    // Select output shpae
    ir::Shape new_shape = shape_inference::inferSelectShape(
        input_cond.info().shape(), input_true.info().shape(), input_false.info().shape());
@@ -899,14 +729,6 @@ void StaticShapeInferer::visit(const ir::operation::Shape &op)
    const auto output_idx = op.getOutputs().at(0);
    ir::Operand &output = _operands.at(output_idx);
  
-  // if input is dynamic, output also becomes dynamic
-  if (input.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
    // re-sizing output shape
    ir::Shape output_shape;
    output_shape.append(input.info().shape().rank());
@@ -914,11 +736,6 @@ void StaticShapeInferer::visit(const ir::operation::Shape &op)
    output.info().shape(output_shape);
  }
  
-void StaticShapeInferer::visit(const ir::operation::Sin &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Sin::Input::INPUT));
-}
-
  void StaticShapeInferer::visit(const ir::operation::Slice &op)
  {
    const auto input_index{op.getInputs().at(ir::operation::Slice::Input::INPUT)};
@@ -930,13 +747,6 @@ void StaticShapeInferer::visit(const ir::operation::Slice &op)
    const auto output_index = op.getOutputs().at(0);
    ir::Operand &output = _operands.at(output_index);
  
-  if (input.info().isDynamic() || begins.info().isDynamic() || sizes.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
    // Whether input is constant or not does not affect whether output is dynamic or not
    if (!(begins.isConstant() && sizes.isConstant()))
    {
@@ -970,13 +780,6 @@ void StaticShapeInferer::visit(const ir::operation::SpaceToBatchND &op)
    const auto &block_shape = _operands.at(block_shape_idx);
    const auto &padding = _operands.at(padding_idx);
  
-  if (input.info().isDynamic() || block_shape.info().isDynamic() || padding.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
    // Whether input is constant or not does not affect whether output is dynamic or not
    if (!(block_shape.isConstant() && padding.isConstant()))
    {
@@ -1006,18 +809,6 @@ void StaticShapeInferer::visit(const ir::operation::Split &op)
    const auto axis = op.param().axis;
    const auto num_splits = op.param().num_splits;
  
-  if (input.info().isDynamic())
-  {
-    for (int out_tensor_idx = 0; out_tensor_idx < num_splits; out_tensor_idx++)
-    {
-      const auto output_idx = op.getOutputs().at(out_tensor_idx);
-      ir::Operand &output = _operands.at(output_idx);
-      output.info().setDynamic();
-    }
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
    const auto rank = input.info().shape().rank();
    auto axis_resolved = axis < 0 ? axis + rank : axis;
  
@@ -1072,14 +863,6 @@ void StaticShapeInferer::visit(const ir::operation::StridedSlice &op)
    const auto output_index = op.getOutputs().at(0);
    ir::Operand &output = _operands.at(output_index);
  
-  if (input.info().isDynamic() || starts.info().isDynamic() || ends.info().isDynamic() ||
-      strides.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
    if (!(starts.isConstant() && ends.isConstant() && strides.isConstant()))
    {
      output.info().setDynamic();
@@ -1104,17 +887,6 @@ void StaticShapeInferer::visit(const ir::operation::StridedSlice &op)
    output.info().shape(new_shape);
  }
  
-void StaticShapeInferer::visit(const ir::operation::Sub &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Sub::Input::LHS),
-                           op.getInputs().at(ir::operation::Sub::Input::RHS));
-}
-
-void StaticShapeInferer::visit(const ir::operation::Tanh &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Tanh::Input::INPUT));
-}
-
  void StaticShapeInferer::visit(const ir::operation::Tile &op)
  {
    const auto input_idx{op.getInputs().at(ir::operation::Tile::Input::INPUT)};
@@ -1126,13 +898,6 @@ void StaticShapeInferer::visit(const ir::operation::Tile &op)
    const auto output_idx = op.getOutputs().at(0);
    ir::Operand &output = _operands.at(output_idx);
  
-  if (input.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
    if (!multiplier.isConstant())
    {
      output.info().setDynamic();
@@ -1158,13 +923,7 @@ void StaticShapeInferer::visit(const ir::operation::Transpose &op)
    ir::Operand &output = _operands.at(output_idx);
    const auto perm{op.param().perm};
    // const auto rank{op.param().rank};
-  // if input is dynamic, output also becomes dynamic
-  if (input.info().isDynamic())
-  {
-    output.info().setDynamic();
-    _return_has_dynamic_tensor = true;
-    return;
-  }
+
    // set output shape, based on input and params
    ir::Shape new_shape = shape_inference::inferTransposeShape(input.info().shape(), perm);
    output.info().shape(new_shape);
@@ -1175,20 +934,6 @@ void StaticShapeInferer::visit(const ir::operation::Unpack &op)
    const auto input_idx{op.getInputs().at(0)};
    const auto &input = _operands.at(input_idx);
    const auto num = op.param().num;
-
-  // if input is dynamic, output also becomes dynamic
-  if (input.info().isDynamic())
-  {
-    for (int out_tensor_idx = 0; out_tensor_idx < num; out_tensor_idx++)
-    {
-      const auto output_idx = op.getOutputs().at(out_tensor_idx);
-      ir::Operand &output = _operands.at(output_idx);
-      output.info().setDynamic();
-    }
-    _return_has_dynamic_tensor = true;
-    return;
-  }
-
    const auto rank = input.shape().rank();
    const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
  
@@ -1346,11 +1091,6 @@ void StaticShapeInferer::visit(const ir::operation::While &op)
    }
  }
  
-void StaticShapeInferer::visit(const ir::operation::ZerosLike &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ZerosLike::Input::INPUT));
-}
-
  } // namespace compiler
  
  } // namespace onert
diff --git a/runtime/onert/core/src/compiler/TensorBuilders.h b/runtime/onert/core/src/compiler/TensorBuilders.h

index c0a1ebc..3b0360b 100644 (file)
--- a/runtime/onert/core/src/compiler/TensorBuilders.h
+++ b/runtime/onert/core/src/compiler/TensorBuilders.h
@@ -67,17 +67,6 @@ public:
      return _cf_tensor_builder;
    }
  
-  std::shared_ptr<backend::ITensor> getITensor(ir::OperandIndex ind)
-  {
-    for (auto &tensor_builder : _tensor_builders)
-    {
-      auto tensor = tensor_builder->tensorAt(ind);
-      if (tensor)
-        return tensor;
-    }
-    return nullptr;
-  }
-
  private:
    std::unordered_set<std::shared_ptr<backend::ITensorBuilder>> _tensor_builders;
    std::shared_ptr<backend::controlflow::TensorBuilder> _cf_tensor_builder;
diff --git a/runtime/onert/core/src/compiler/TensorRegistries.h b/runtime/onert/core/src/compiler/TensorRegistries.h

new file mode 100644 (file)

index 0000000..8be87b0
--- /dev/null
+++ b/runtime/onert/core/src/compiler/TensorRegistries.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_TENSOR_REGISTRIES_H__
+#define __ONERT_COMPILER_TENSOR_REGISTRIES_H__
+
+#include <unordered_set>
+#include <memory>
+#include "backend/BackendContext.h"
+#include "backend/Backend.h"
+#include "backend/controlflow/Config.h"
+#include "backend/controlflow/TensorBuilder.h"
+#include "backend/controlflow/TensorRegistry.h"
+
+namespace onert
+{
+namespace compiler
+{
+
+class TensorRegistries
+{
+public:
+  TensorRegistries() = default;
+
+  TensorRegistries(const onert::backend::BackendContexts &backend_contexts,
+                   bool include_controlflow)
+  {
+    for (const auto &e : backend_contexts)
+    {
+      auto tensor_reg = e.second->tensor_registry;
+      if (e.first->config()->id() == backend::controlflow::Config::ID)
+      {
+        _cf_tensor_reg =
+            std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(tensor_reg);
+        if (include_controlflow)
+          _tensor_regs.insert(tensor_reg);
+      }
+      else
+      {
+        _tensor_regs.insert(tensor_reg);
+      }
+    }
+  }
+
+  std::unordered_set<std::shared_ptr<onert::backend::ITensorRegistry>>::const_iterator begin() const
+  {
+    return _tensor_regs.cbegin();
+  }
+  std::unordered_set<std::shared_ptr<onert::backend::ITensorRegistry>>::const_iterator end() const
+  {
+    return _tensor_regs.cend();
+  }
+
+  std::shared_ptr<backend::controlflow::TensorRegistry> getControlflowTensorRegistry() const
+  {
+    return _cf_tensor_reg;
+  }
+
+  std::shared_ptr<backend::ITensor> getITensor(ir::OperandIndex ind) const
+  {
+    for (auto &tensor_reg : _tensor_regs)
+    {
+      auto tensor = tensor_reg->getITensor(ind);
+      if (tensor)
+        return tensor;
+    }
+    return nullptr;
+  }
+
+private:
+  std::unordered_set<std::shared_ptr<backend::ITensorRegistry>> _tensor_regs;
+  std::shared_ptr<backend::controlflow::TensorRegistry> _cf_tensor_reg;
+};
+
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_TENSOR_REGISTRIES_H__
diff --git a/runtime/onert/core/src/ir/pass/ConstantInsertionPass.cc b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc

similarity index 87%

rename from runtime/onert/core/src/ir/pass/ConstantInsertionPass.cc

rename to runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc

index 1742a0d..647669e 100644 (file)
--- a/runtime/onert/core/src/ir/pass/ConstantInsertionPass.cc
+++ b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc
@@ -22,20 +22,20 @@
  
  namespace onert
  {
-namespace ir
+namespace compiler
  {
  namespace pass
  {
  
-void ConstantInsertionPass::callback(const OperationIndex &node_index, Operation &node)
+void ConstantInsertionPass::callback(const ir::OperationIndex &node_index, ir::Operation &node)
  {
    const auto &op_sequence_index = _lowered_graph.op_seqs().getOperation(node_index);
    const auto op_seq_lower_info = _lowered_graph.getLowerInfo(op_sequence_index);
    const auto backend = op_seq_lower_info->backend();
    const auto layout = op_seq_lower_info->layout();
-  const auto factor = operand::PermuteFactor{backend, layout};
+  const auto factor = ir::operand::PermuteFactor{backend, layout};
  
-  for (const auto input : node.getInputs() | Remove::DUPLICATED | ir::Remove::UNDEFINED)
+  for (const auto input : node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
    {
      auto &object = _graph.operands().at(input);
  
@@ -47,7 +47,7 @@ void ConstantInsertionPass::callback(const OperationIndex &node_index, Operation
          auto new_object = object;
          new_object.unsetDef();
          // TODO Remove const_case
-        const_cast<OperationIndexSet &>(new_object.getUses()).clear();
+        const_cast<ir::OperationIndexSet &>(new_object.getUses()).clear();
          const auto new_index = _graph.operands().emplace(new_object);
          _replace_operands_map[key] = new_index;
        }
@@ -89,5 +89,5 @@ void ConstantInsertionPass::callback(const OperationIndex &node_index, Operation
  }
  
  } // namespace pass
-} // namespace ir
+} // namespace compiler
  } // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/ConstantInsertionPass.h b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h

similarity index 71%

rename from runtime/onert/core/src/ir/pass/ConstantInsertionPass.h

rename to runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h

index 3ea4dc3..052883c 100644 (file)
--- a/runtime/onert/core/src/ir/pass/ConstantInsertionPass.h
+++ b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h
@@ -14,8 +14,8 @@
   * limitations under the License.
   */
  
-#ifndef __ONERT_GRAPH_PASS_CONSTANT_INSERTION_PASS_H__
-#define __ONERT_GRAPH_PASS_CONSTANT_INSERTION_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_CONSTANT_INSERTION_PASS_H__
+#define __ONERT_COMPILER_PASS_CONSTANT_INSERTION_PASS_H__
  
  #include <ir/operand/PermuteFactor.h>
  #include <ir/Index.h>
@@ -25,7 +25,7 @@
  
  namespace onert
  {
-namespace ir
+namespace compiler
  {
  namespace pass
  {
@@ -39,13 +39,13 @@ public:
    std::string id() final { return "ConstantInsertionPass"; }
  
  public:
-  void callback(const OperationIndex &index, Operation &node) final;
+  void callback(const ir::OperationIndex &index, ir::Operation &node) final;
  
  private:
    struct ReplaceKey
    {
-    OperandIndex index;
-    operand::PermuteFactor factor;
+    ir::OperandIndex index;
+    ir::operand::PermuteFactor factor;
  
      bool operator==(const ReplaceKey &other) const
      {
@@ -61,15 +61,16 @@ private:
      std::size_t operator()(const ReplaceKey &key) const noexcept
      {
        using std::hash;
-      return hash<OperandIndex>()(key.index) ^ (hash<operand::PermuteFactor>()(key.factor) << 1);
+      return hash<ir::OperandIndex>()(key.index) ^
+             (hash<ir::operand::PermuteFactor>()(key.factor) << 1);
      }
    };
  
-  std::unordered_map<ReplaceKey, OperandIndex, KeyHasher> _replace_operands_map;
+  std::unordered_map<ReplaceKey, ir::OperandIndex, KeyHasher> _replace_operands_map;
  };
  
  } // namespace pass
-} // namespace ir
+} // namespace compiler
  } // namespace onert
  
-#endif // __ONERT_GRAPH_PASS_CONSTANT_INSERTION_PASS_H__
+#endif // __ONERT_COMPILER_PASS_CONSTANT_INSERTION_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/ConstantLoweringPass.cc b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc

similarity index 80%

rename from runtime/onert/core/src/ir/pass/ConstantLoweringPass.cc

rename to runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc

index 04f4e59..1c1dbe0 100644 (file)
--- a/runtime/onert/core/src/ir/pass/ConstantLoweringPass.cc
+++ b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc
@@ -23,28 +23,28 @@
  
  namespace onert
  {
-namespace ir
+namespace compiler
  {
  namespace pass
  {
  
-void ConstantLoweringPass::callback(const OperationIndex &node_index, Operation &node)
+void ConstantLoweringPass::callback(const ir::OperationIndex &node_index, ir::Operation &node)
  {
    const auto &op_sequence_index = _lowered_graph.op_seqs().getOperation(node_index);
    const auto op_seq_lower_info = _lowered_graph.getLowerInfo(op_sequence_index);
    const auto backend = op_seq_lower_info->backend();
    const auto layout = op_seq_lower_info->layout();
-  const auto factor = operand::PermuteFactor{backend, layout};
+  const auto factor = ir::operand::PermuteFactor{backend, layout};
  
    // Now this runtime does not support the node making output of operation as constant
-  for (const auto input : node.getInputs() | Remove::DUPLICATED | ir::Remove::UNDEFINED)
+  for (const auto input : node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED)
    {
      auto &object = _graph.operands().at(input);
      if (object.isConstant())
      {
        // All constant operand are already assinged at each backend by ContantInsertionPass. So a
        // constant has `def` and `use` as the same PermuteFactor
-      _lowered_graph.setLowerInfo(input, std::make_unique<operand::LowerInfo>());
+      _lowered_graph.setLowerInfo(input, std::make_unique<ir::operand::LowerInfo>());
        _lowered_graph.getLowerInfo(input)->addDefPermuteFactor(factor);
        _lowered_graph.getLowerInfo(input)->addUsePermuteFactor(factor);
      }
@@ -52,5 +52,5 @@ void ConstantLoweringPass::callback(const OperationIndex &node_index, Operation
  }
  
  } // namespace pass
-} // namespace ir
+} // namespace compiler
  } // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/ConstantLoweringPass.h b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.h

similarity index 76%

rename from runtime/onert/core/src/ir/pass/ConstantLoweringPass.h

rename to runtime/onert/core/src/compiler/pass/ConstantLoweringPass.h

index 5c9f435..e17d776 100644 (file)
--- a/runtime/onert/core/src/ir/pass/ConstantLoweringPass.h
+++ b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.h
@@ -14,15 +14,15 @@
   * limitations under the License.
   */
  
-#ifndef __ONERT_GRAPH_PASS_CONSTANT_LOWERING_PASS_H__
-#define __ONERT_GRAPH_PASS_CONSTANT_LOWERING_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_CONSTANT_LOWERING_PASS_H__
+#define __ONERT_COMPILER_PASS_CONSTANT_LOWERING_PASS_H__
  
  #include <ir/Index.h>
  #include "LoweredOperationPass.h"
  
  namespace onert
  {
-namespace ir
+namespace compiler
  {
  namespace pass
  {
@@ -36,11 +36,11 @@ public:
    std::string id() final { return "ConstantLoweringPass"; }
  
  public:
-  void callback(const OperationIndex &index, Operation &node) final;
+  void callback(const ir::OperationIndex &index, ir::Operation &node) final;
  };
  
  } // namespace pass
-} // namespace ir
+} // namespace compiler
  } // namespace onert
  
-#endif // __ONERT_GRAPH_PASS_CONSTANT_LOWERING_PASS_H__
+#endif // __ONERT_COMPILER_PASS_CONSTANT_LOWERING_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/LoweredOperandPass.h b/runtime/onert/core/src/compiler/pass/LoweredOperandPass.h

similarity index 81%

rename from runtime/onert/core/src/ir/pass/LoweredOperandPass.h

rename to runtime/onert/core/src/compiler/pass/LoweredOperandPass.h

index eefb8dd..0c5f7d7 100644 (file)
--- a/runtime/onert/core/src/ir/pass/LoweredOperandPass.h
+++ b/runtime/onert/core/src/compiler/pass/LoweredOperandPass.h
@@ -18,11 +18,11 @@
  #define __ONERT_IR_PASS_LOWERED_OPERAND_PASS_H__
  
  #include "OperandPass.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
  
  namespace onert
  {
-namespace ir
+namespace compiler
  {
  namespace pass
  {
@@ -30,7 +30,7 @@ namespace pass
  class LoweredOperandPass : public OperandPass
  {
  public:
-  LoweredOperandPass(ir::LoweredGraph &lowered_graph)
+  LoweredOperandPass(compiler::LoweredGraph &lowered_graph)
        : OperandPass{lowered_graph.graph()}, _lowered_graph{lowered_graph}
    {
      // DO NOTHING
@@ -39,14 +39,14 @@ public:
    virtual ~LoweredOperandPass() = default;
  
    std::string id() override = 0;
-  void callback(const OperandIndex &i, Operand &o) override = 0;
+  void callback(const ir::OperandIndex &i, ir::Operand &o) override = 0;
  
  protected:
-  ir::LoweredGraph &_lowered_graph;
+  compiler::LoweredGraph &_lowered_graph;
  };
  
  } // namespace pass
-} // namespace ir
+} // namespace compiler
  } // namespace onert
  
  #endif // __ONERT_IR_PASS_LOWERED_OPERAND_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/LoweredOperationPass.h b/runtime/onert/core/src/compiler/pass/LoweredOperationPass.h

similarity index 82%

rename from runtime/onert/core/src/ir/pass/LoweredOperationPass.h

rename to runtime/onert/core/src/compiler/pass/LoweredOperationPass.h

index 0138712..5c8569b 100644 (file)
--- a/runtime/onert/core/src/ir/pass/LoweredOperationPass.h
+++ b/runtime/onert/core/src/compiler/pass/LoweredOperationPass.h
@@ -18,11 +18,11 @@
  #define __ONERT_IR_PASS_LOWERED_OPERATION_PASS_H__
  
  #include "OperationPass.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
  
  namespace onert
  {
-namespace ir
+namespace compiler
  {
  namespace pass
  {
@@ -30,7 +30,7 @@ namespace pass
  class LoweredOperationPass : public OperationPass
  {
  public:
-  LoweredOperationPass(ir::LoweredGraph &lowered_graph)
+  LoweredOperationPass(LoweredGraph &lowered_graph)
        : OperationPass{lowered_graph.graph()}, _lowered_graph{lowered_graph}
    {
      // DO NOTHING
@@ -39,14 +39,14 @@ public:
    virtual ~LoweredOperationPass() = default;
  
    std::string id() override = 0;
-  void callback(const OperationIndex &i, Operation &o) override = 0;
+  void callback(const ir::OperationIndex &i, ir::Operation &o) override = 0;
  
  protected:
-  ir::LoweredGraph &_lowered_graph;
+  LoweredGraph &_lowered_graph;
  };
  
  } // namespace pass
-} // namespace ir
+} // namespace compiler
  } // namespace onert
  
  #endif // __ONERT_IR_PASS_LOWERED_OPERATION_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/OperandPass.cc b/runtime/onert/core/src/compiler/pass/OperandPass.cc

similarity index 85%

rename from runtime/onert/core/src/ir/pass/OperandPass.cc

rename to runtime/onert/core/src/compiler/pass/OperandPass.cc

index 693a0f4..50c001c 100644 (file)
--- a/runtime/onert/core/src/ir/pass/OperandPass.cc
+++ b/runtime/onert/core/src/compiler/pass/OperandPass.cc
@@ -20,7 +20,7 @@
  
  namespace onert
  {
-namespace ir
+namespace compiler
  {
  namespace pass
  {
@@ -28,9 +28,9 @@ namespace pass
  void OperandPass::run()
  {
    _graph.operands().iterate(
-      [&](const OperandIndex &index, Operand &object) { callback(index, object); });
+      [&](const ir::OperandIndex &index, ir::Operand &object) { callback(index, object); });
  }
  
  } // namespace pass
-} // namespace ir
+} // namespace compiler
  } // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/OperandPass.h b/runtime/onert/core/src/compiler/pass/OperandPass.h

similarity index 79%

rename from runtime/onert/core/src/ir/pass/OperandPass.h

rename to runtime/onert/core/src/compiler/pass/OperandPass.h

index 3930607..b094879 100644 (file)
--- a/runtime/onert/core/src/ir/pass/OperandPass.h
+++ b/runtime/onert/core/src/compiler/pass/OperandPass.h
@@ -14,8 +14,8 @@
   * limitations under the License.
   */
  
-#ifndef __ONERT_GRAPH_PASS_OPERAND_PASS_H__
-#define __ONERT_GRAPH_PASS_OPERAND_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_OPERAND_PASS_H__
+#define __ONERT_COMPILER_PASS_OPERAND_PASS_H__
  
  #include "Pass.h"
  #include "ir/Index.h"
@@ -30,7 +30,7 @@ class Operand;
  
  namespace onert
  {
-namespace ir
+namespace compiler
  {
  namespace pass
  {
@@ -44,11 +44,11 @@ public:
  public:
    std::string id() override = 0;
    void run() override final;
-  virtual void callback(const OperandIndex &i, Operand &o) = 0;
+  virtual void callback(const ir::OperandIndex &i, ir::Operand &o) = 0;
  };
  
  } // namespace pass
-} // namespace ir
+} // namespace compiler
  } // namespace onert
  
-#endif // __ONERT_GRAPH_PASS_OPERAND_PASS_H__
+#endif // __ONERT_COMPILER_PASS_OPERAND_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/OperationPass.cc b/runtime/onert/core/src/compiler/pass/OperationPass.cc

similarity index 86%

rename from runtime/onert/core/src/ir/pass/OperationPass.cc

rename to runtime/onert/core/src/compiler/pass/OperationPass.cc

index 84b1da3..d7a55cb 100644 (file)
--- a/runtime/onert/core/src/ir/pass/OperationPass.cc
+++ b/runtime/onert/core/src/compiler/pass/OperationPass.cc
@@ -22,7 +22,7 @@
  
  namespace onert
  {
-namespace ir
+namespace compiler
  {
  namespace pass
  {
@@ -30,9 +30,9 @@ namespace pass
  void OperationPass::run()
  {
    _graph.operations().iterate(
-      [&](const OperationIndex &index, Operation &node) { callback(index, node); });
+      [&](const ir::OperationIndex &index, ir::Operation &node) { callback(index, node); });
  }
  
  } // namespace pass
-} // namespace ir
+} // namespace compiler
  } // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/OperationPass.h b/runtime/onert/core/src/compiler/pass/OperationPass.h

similarity index 84%

rename from runtime/onert/core/src/ir/pass/OperationPass.h

rename to runtime/onert/core/src/compiler/pass/OperationPass.h

index 1733f87..ac4d818 100644 (file)
--- a/runtime/onert/core/src/ir/pass/OperationPass.h
+++ b/runtime/onert/core/src/compiler/pass/OperationPass.h
@@ -19,8 +19,8 @@
   * @brief This file contains OperationPass class
   */
  
-#ifndef __ONERT_GRAPH_PASS_OPERATION_PASS_H__
-#define __ONERT_GRAPH_PASS_OPERATION_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_OPERATION_PASS_H__
+#define __ONERT_COMPILER_PASS_OPERATION_PASS_H__
  
  #include "Pass.h"
  #include "ir/Index.h"
@@ -35,7 +35,7 @@ class Operation;
  
  namespace onert
  {
-namespace ir
+namespace compiler
  {
  namespace pass
  {
@@ -62,7 +62,7 @@ public:
     * @param index is the index of a node in graph
     * @param node is the node in graph
     */
-  virtual void callback(const OperationIndex &index, Operation &node) = 0;
+  virtual void callback(const ir::OperationIndex &index, ir::Operation &node) = 0;
  
    /**
     * @brief Run the pass
@@ -71,7 +71,7 @@ public:
  };
  
  } // namespace pass
-} // namespace ir
+} // namespace compiler
  } // namespace onert
  
-#endif // __ONERT_GRAPH_PASS_OPERATION_PASS_H__
+#endif // __ONERT_COMPILER_PASS_OPERATION_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/Pass.h b/runtime/onert/core/src/compiler/pass/Pass.h

similarity index 78%

rename from runtime/onert/core/src/ir/pass/Pass.h

rename to runtime/onert/core/src/compiler/pass/Pass.h

index 1c6628f..3f356c3 100644 (file)
--- a/runtime/onert/core/src/ir/pass/Pass.h
+++ b/runtime/onert/core/src/compiler/pass/Pass.h
@@ -14,8 +14,8 @@
   * limitations under the License.
   */
  
-#ifndef __ONERT_GRAPH_PASS_PASS_H__
-#define __ONERT_GRAPH_PASS_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_PASS_H__
+#define __ONERT_COMPILER_PASS_PASS_H__
  
  #include <string>
  
@@ -24,12 +24,12 @@ namespace onert
  namespace ir
  {
  class Graph;
-} // namespace ir
+} // namespace compiler
  } // namespace onert
  
  namespace onert
  {
-namespace ir
+namespace compiler
  {
  namespace pass
  {
@@ -37,7 +37,7 @@ namespace pass
  class Pass
  {
  public:
-  Pass(Graph &graph) : _graph{graph} {}
+  Pass(ir::Graph &graph) : _graph{graph} {}
    virtual ~Pass() = default;
  
  public:
@@ -45,11 +45,11 @@ public:
    virtual void run() = 0;
  
  protected:
-  Graph &_graph;
+  ir::Graph &_graph;
  };
  
  } // namespace pass
-} // namespace ir
+} // namespace compiler
  } // namespace onert
  
-#endif // __ONERT_GRAPH_PASS_PASS_H__
+#endif // __ONERT_COMPILER_PASS_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/PermutationEliminationPass.cc b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc

similarity index 78%

rename from runtime/onert/core/src/ir/pass/PermutationEliminationPass.cc

rename to runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc

index 2deccd4..f016970 100644 (file)
--- a/runtime/onert/core/src/ir/pass/PermutationEliminationPass.cc
+++ b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc
@@ -21,35 +21,33 @@
  
  namespace onert
  {
-namespace ir
+namespace compiler
  {
  namespace pass
  {
  
-void PermutationEliminationPass::callback(const OperationIndex &ind, Operation &node)
+void PermutationEliminationPass::callback(const ir::OperationIndex &ind, ir::Operation &node)
  {
    _op_ind = ind;
    node.accept(*this);
  };
  
-void PermutationEliminationPass::visit(const operation::Permute &node)
+void PermutationEliminationPass::visit(const ir::operation::Permute &node)
  {
    auto in_operand = node.getInputs().at(0);
    auto out_operand = node.getOutputs().at(0);
  
-  // Check if two tensors are both portable
-  // TODO Make this general, this is just a workaround to check two tensors are portable
+  // Check if two tensors are both portable if not, we can't eliminate the node
    {
      auto in_def_factor = _lowered_graph.getLowerInfo(in_operand)->def_factors().getOnlyElement();
      auto out_def_factor = _lowered_graph.getLowerInfo(out_operand)->def_factors().getOnlyElement();
  
-    auto in_backend_id = in_def_factor.backend()->config()->id();
-    auto out_backend_id = out_def_factor.backend()->config()->id();
+    auto in_config = in_def_factor.backend()->config();
+    auto out_config = out_def_factor.backend()->config();
  
-    // TODO Fix this workaround that removes only Permute between cpu and controlflow backend.
-    //      This should be general.
-    if (!((in_backend_id == backend::controlflow::Config::ID && out_backend_id == "cpu") ||
-          (in_backend_id == "cpu" && out_backend_id == backend::controlflow::Config::ID)))
+    // FIXME Supporting dynamic tensor does not exactly mean those are portable.
+    //       It may need to have another config option for checking if each uses `IPortableTensor`.
+    if (!(in_config->supportDynamicTensor() && out_config->supportDynamicTensor()))
        return;
    }
  
@@ -65,7 +63,7 @@ void PermutationEliminationPass::visit(const operation::Permute &node)
        if (!op_seq.getOutputs().contains(in_operand))
          return;
  
-      // Update OpSequence/Operation edges and Operand edges
+      // Update OpSequence/ir::Operation edges and ir::Operand edges
        op_seq.replaceOutputs(in_operand, out_operand);
        for (auto op : op_seq.operations())
        {
@@ -106,8 +104,8 @@ void PermutationEliminationPass::visit(const operation::Permute &node)
      });
  
      VERBOSE(removePermute) << "Permute Op removed, node index : " << _op_ind << std::endl;
-    VERBOSE(removePermute) << "  - Input (removed) Operand : " << in_operand << std::endl;
-    VERBOSE(removePermute) << "  - Output(kept)    Operand : " << out_operand << std::endl;
+    VERBOSE(removePermute) << "  - Input (removed) ir::Operand : " << in_operand << std::endl;
+    VERBOSE(removePermute) << "  - Output(kept)    ir::Operand : " << out_operand << std::endl;
    }
    else
    {
@@ -145,11 +143,11 @@ void PermutationEliminationPass::visit(const operation::Permute &node)
      }
  
      VERBOSE(removePermute) << "Permute Op removed, node index : " << _op_ind << std::endl;
-    VERBOSE(removePermute) << "  - Input (kept)    Operand : " << in_operand << std::endl;
-    VERBOSE(removePermute) << "  - Output(removed) Operand : " << out_operand << std::endl;
+    VERBOSE(removePermute) << "  - Input (kept)    ir::Operand : " << in_operand << std::endl;
+    VERBOSE(removePermute) << "  - Output(removed) ir::Operand : " << out_operand << std::endl;
    }
  }
  
  } // namespace pass
-} // namespace ir
+} // namespace compiler
  } // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/PermutationEliminationPass.h b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h

similarity index 81%

rename from runtime/onert/core/src/ir/pass/PermutationEliminationPass.h

rename to runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h

index 614e44c..29daf1a 100644 (file)
--- a/runtime/onert/core/src/ir/pass/PermutationEliminationPass.h
+++ b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h
@@ -14,15 +14,15 @@
   * limitations under the License.
   */
  
-#ifndef __ONERT_GRAPH_PASS_PERMUTATION_ELIMINATION_PASS_H__
-#define __ONERT_GRAPH_PASS_PERMUTATION_ELIMINATION_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_PERMUTATION_ELIMINATION_PASS_H__
+#define __ONERT_COMPILER_PASS_PERMUTATION_ELIMINATION_PASS_H__
  
  #include "ir/OperationVisitor.h"
  #include "LoweredOperationPass.h"
  
  namespace onert
  {
-namespace ir
+namespace compiler
  {
  namespace pass
  {
@@ -40,7 +40,7 @@ namespace pass
   * @note This is an optimization pass which means that everything should work fine even if this pass
   *       was skipped.
   */
-class PermutationEliminationPass : public LoweredOperationPass, public OperationVisitor
+class PermutationEliminationPass : public LoweredOperationPass, public ir::OperationVisitor
  {
  public:
    using LoweredOperationPass::LoweredOperationPass;
@@ -49,17 +49,17 @@ public:
    std::string id() final { return "PermutationEliminationPass"; }
  
  public:
-  void callback(const OperationIndex &i, Operation &n) final;
+  void callback(const ir::OperationIndex &i, ir::Operation &n) final;
  
  private:
-  void visit(const operation::Permute &) final;
+  void visit(const ir::operation::Permute &) final;
  
  private:
    ir::OperationIndex _op_ind;
  };
  
  } // namespace pass
-} // namespace ir
+} // namespace compiler
  } // namespace onert
  
-#endif // __ONERT_GRAPH_PASS_PERMUTATION_ELIMINATION_PASS_H__
+#endif // __ONERT_COMPILER_PASS_PERMUTATION_ELIMINATION_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/PermutationInsertionPass.cc b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc

similarity index 87%

rename from runtime/onert/core/src/ir/pass/PermutationInsertionPass.cc

rename to runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc

index 3578af8..c83a72a 100644 (file)
--- a/runtime/onert/core/src/ir/pass/PermutationInsertionPass.cc
+++ b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
@@ -31,12 +31,12 @@
  
  namespace onert
  {
-namespace ir
+namespace compiler
  {
  namespace pass
  {
  
-void PermutationInsertionPass::callback(const OperandIndex &index, Operand &object)
+void PermutationInsertionPass::callback(const ir::OperandIndex &index, ir::Operand &object)
  {
    auto &&operand_li = _lowered_graph.getLowerInfo(index);
    assert(operand_li);
@@ -48,10 +48,10 @@ void PermutationInsertionPass::callback(const OperandIndex &index, Operand &obje
      return;
    }
  
-  std::list<OperationIndex> permute_indexes;
+  std::list<ir::OperationIndex> permute_indexes;
  
    // Build a map for all necessary type of operands
-  std::unordered_map<operand::PermuteFactor, OperandIndex> factor_to_index;
+  std::unordered_map<ir::operand::PermuteFactor, ir::OperandIndex> factor_to_index;
    {
      assert(operand_li->def_factors().size() == 1);
      for (auto factor : operand_li->def_factors())
@@ -72,7 +72,7 @@ void PermutationInsertionPass::callback(const OperandIndex &index, Operand &obje
  
    // Update operations' input that uses this operand
    {
-    std::list<OperationIndex> remove_list;
+    std::list<ir::OperationIndex> remove_list;
  
      auto uses = object.getUses();
      for (auto use : uses)
@@ -121,8 +121,8 @@ void PermutationInsertionPass::callback(const OperandIndex &index, Operand &obje
    }
  }
  
-OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &operand_index,
-                                                       const operand::PermuteFactor &factor)
+ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandIndex &operand_index,
+                                                           const ir::operand::PermuteFactor &factor)
  {
    assert(!_graph.isBuildingPhase());
  
@@ -143,14 +143,14 @@ OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &opera
    auto output_backend = factor.backend();
    // NOTE Permute may not have specific layout because the layout of input and output may be
    // different.
-  const auto permute_node_layout = Layout::UNKNOWN;
+  const auto permute_node_layout = ir::Layout::UNKNOWN;
    // NOTE If one backend supports several layout, the backend must support Permute operation
    const backend::Backend *permute_node_backend = compiler::BackendManager::get().getControlflow();
    if (input_backend == output_backend)
    {
      permute_node_backend = input_backend;
    }
-  const operand::PermuteFactor permute_node_factor{permute_node_backend, permute_node_layout};
+  const ir::operand::PermuteFactor permute_node_factor{permute_node_backend, permute_node_layout};
  
    // Update LowerInfo of input operand
    auto operand_lower_info = _lowered_graph.getLowerInfo(operand_index);
@@ -158,7 +158,7 @@ OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &opera
    operand_lower_info->addUsePermuteFactor(permute_node_factor);
  
    // Update LowerInfo of output operand
-  auto out_operand_li = std::make_unique<operand::LowerInfo>();
+  auto out_operand_li = std::make_unique<ir::operand::LowerInfo>();
  
    // The input and output factors of all nodes will be the same except Permute. So Tensor's
    // allocators allocates memory using only the information of def permutation factor now.
@@ -170,13 +170,13 @@ OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &opera
    // Insert permute operation to the graph
    const auto input_layout = input_factor.layout();
    const auto output_layout = factor.layout();
-  using Permute = operation::Permute;
+  using Permute = ir::operation::Permute;
    const auto permute_type = [&]() {
-    if (input_layout == Layout::NHWC && output_layout == Layout::NCHW)
+    if (input_layout == ir::Layout::NHWC && output_layout == ir::Layout::NCHW)
      {
        return Permute::Type::NHWC_TO_NCHW;
      }
-    else if (input_layout == Layout::NCHW && output_layout == Layout::NHWC)
+    else if (input_layout == ir::Layout::NCHW && output_layout == ir::Layout::NHWC)
      {
        return Permute::Type::NCHW_TO_NHWC;
      }
@@ -200,7 +200,7 @@ OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &opera
      auto &op_seq = _lowered_graph.op_seqs().at(op_seq_index);
      op_seq.setInputs(node.getInputs());
      op_seq.setOutputs(node.getOutputs());
-    _lowered_graph.setLowerInfo(op_seq_index, std::make_unique<operation::LowerInfo>(
+    _lowered_graph.setLowerInfo(op_seq_index, std::make_unique<ir::operation::LowerInfo>(
                                                    permute_node_backend, permute_node_layout));
    }
  
@@ -212,5 +212,5 @@ OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &opera
    return node_index;
  }
  } // namespace pass
-} // namespace ir
+} // namespace compiler
  } // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/PermutationInsertionPass.h b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.h

similarity index 71%

rename from runtime/onert/core/src/ir/pass/PermutationInsertionPass.h

rename to runtime/onert/core/src/compiler/pass/PermutationInsertionPass.h

index 6c30c6f..7585153 100644 (file)
--- a/runtime/onert/core/src/ir/pass/PermutationInsertionPass.h
+++ b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.h
@@ -14,17 +14,17 @@
   * limitations under the License.
   */
  
-#ifndef __ONERT_GRAPH_PASS_PERMUTATION_INSERTION_PASS_H__
-#define __ONERT_GRAPH_PASS_PERMUTATION_INSERTION_PASS_H__
+#ifndef __ONERT_COMPILER_PASS_PERMUTATION_INSERTION_PASS_H__
+#define __ONERT_COMPILER_PASS_PERMUTATION_INSERTION_PASS_H__
  
  #include "LoweredOperandPass.h"
  #include "compiler/BackendManager.h"
-#include "ir/Operand.h" //for OperationIndex
+#include "ir/Operand.h"
  #include "ir/operand/PermuteFactor.h"
  
  namespace onert
  {
-namespace ir
+namespace compiler
  {
  namespace pass
  {
@@ -36,7 +36,7 @@ public:
  
  public:
    std::string id() override { return "PermutationInsertionPass"; }
-  void callback(const OperandIndex &index, Operand &object) override;
+  void callback(const ir::OperandIndex &index, ir::Operand &object) override;
  
  private:
    /**
@@ -45,14 +45,14 @@ private:
     * @param operand_index is the target operand index for the insertion
     * @param factor is the output operand's backend type and layout
     *
-   * @return OperationIndex
+   * @return ir::OperationIndex
     */
-  OperationIndex insertPermute(const OperandIndex &operand_index,
-                               const operand::PermuteFactor &factor);
+  ir::OperationIndex insertPermute(const ir::OperandIndex &operand_index,
+                                   const ir::operand::PermuteFactor &factor);
  };
  
  } // namespace pass
-} // namespace ir
+} // namespace compiler
  } // namespace onert
  
-#endif // __ONERT_GRAPH_PASS_PERMUTATION_INSERTION_PASS_H__
+#endif // __ONERT_COMPILER_PASS_PERMUTATION_INSERTION_PASS_H__
diff --git a/runtime/onert/core/src/ir/pass/PermutationOperationPass.cc b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc

similarity index 81%

rename from runtime/onert/core/src/ir/pass/PermutationOperationPass.cc

rename to runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc

index 6eb412c..c5c95c7 100644 (file)
--- a/runtime/onert/core/src/ir/pass/PermutationOperationPass.cc
+++ b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc
@@ -23,11 +23,13 @@
  
  namespace onert
  {
-namespace ir
+namespace compiler
  {
  namespace pass
  {
  
+using namespace ir;
+
  void PermutationOperationPass::callback(const OperationIndex &, Operation &node)
  {
    node.accept(*this);
@@ -70,7 +72,7 @@ void PermutationOperationPass::applyExpandRanks(const Operation &node)
                                   "operand used in more than one node");
        // TODO remove const_cast later. For example, _ctx may need to be a non const variable or
        //      a node to extend shape may be inserted in front of this operation
-      const_cast<ir::Shape &>(operand.shape()).extendRank(expanded_rank);
+      const_cast<Shape &>(operand.shape()).extendRank(expanded_rank);
      }
    }
  }
@@ -134,7 +136,7 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node)
        const auto op_seq_li = _lowered_graph.getLowerInfo(op_seq_index);
        _lowered_graph.setLowerInfo(
            next_op_seq_index,
-          std::make_unique<operation::LowerInfo>(op_seq_li->backend(), op_seq_li->layout()));
+          std::make_unique<ir::operation::LowerInfo>(op_seq_li->backend(), op_seq_li->layout()));
      }
    }
  
@@ -164,8 +166,8 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node)
      auto &new_op_seq = _lowered_graph.op_seqs().at(new_op_seq_index);
      new_op_seq.setInputs(node.getInputs());
      new_op_seq.setOutputs(node.getOutputs());
-    _lowered_graph.setLowerInfo(new_op_seq_index,
-                                std::make_unique<operation::LowerInfo>(backend, frontend_layout));
+    _lowered_graph.setLowerInfo(
+        new_op_seq_index, std::make_unique<ir::operation::LowerInfo>(backend, frontend_layout));
    }
  
    // Change PermuteFactors of operands of target node
@@ -175,7 +177,7 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node)
      const auto backend = op_seq_li->backend();
      const operand::PermuteFactor removed_factor{backend, backend_layout};
      const operand::PermuteFactor new_factor{backend, frontend_layout};
-    for (const auto &input : node.getInputs() | Remove::DUPLICATED | ir::Remove::UNDEFINED)
+    for (const auto &input : node.getInputs() | Remove::DUPLICATED | Remove::UNDEFINED)
      {
        bool canRemove = true;
        for (const auto &use : _graph.operands().at(input).getUses())
@@ -227,17 +229,31 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node)
    }
  }
  
-void PermutationOperationPass::visit(const operation::Add &node) { applyExpandRanks(node); }
+void PermutationOperationPass::visit(const ir::operation::BinaryArithmetic &node)
+{
+  applyExpandRanks(node);
+}
  
-void PermutationOperationPass::visit(const operation::Concat &node) { applyExpandRanks(node); }
+void PermutationOperationPass::visit(const ir::operation::Concat &node) { applyExpandRanks(node); }
  
-void PermutationOperationPass::visit(const operation::Comparison &node) { applyExpandRanks(node); }
+void PermutationOperationPass::visit(const ir::operation::Comparison &node)
+{
+  applyExpandRanks(node);
+}
  
-void PermutationOperationPass::visit(const operation::Div &node) { applyExpandRanks(node); }
+void PermutationOperationPass::visit(const ir::operation::ElementwiseBinary &node)
+{
+  applyExpandRanks(node);
+}
  
-void PermutationOperationPass::visit(const operation::FullyConnected &node)
+void PermutationOperationPass::visit(const ir::operation::ElementwiseUnary &node)
  {
-  const auto &input_ind = node.getInputs().at(operation::FullyConnected::Input::INPUT);
+  applyExpandRanks(node);
+}
+
+void PermutationOperationPass::visit(const ir::operation::FullyConnected &node)
+{
+  const auto &input_ind = node.getInputs().at(ir::operation::FullyConnected::Input::INPUT);
    const auto &input_obj = _graph.operands().at(input_ind);
    const auto &input_shape = input_obj.shape();
  
@@ -247,9 +263,9 @@ void PermutationOperationPass::visit(const operation::FullyConnected &node)
    }
  }
  
-void PermutationOperationPass::visit(const operation::Gather &node)
+void PermutationOperationPass::visit(const ir::operation::Gather &node)
  {
-  const auto &input_ind = node.getInputs().at(operation::Gather::Input::INPUT);
+  const auto &input_ind = node.getInputs().at(ir::operation::Gather::Input::INPUT);
    const auto &input_obj = _graph.operands().at(input_ind);
    const auto &input_shape = input_obj.shape();
  
@@ -263,21 +279,9 @@ void PermutationOperationPass::visit(const operation::Gather &node)
    }
  }
  
-void PermutationOperationPass::visit(const operation::LogicalAnd &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::LogicalNot &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::LogicalOr &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::Max &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::Min &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::Mul &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::Pack &node)
+void PermutationOperationPass::visit(const ir::operation::Pack &node)
  {
-  const auto &input_ind = node.getInputs().at(operation::Reshape::Input::INPUT);
+  const auto &input_ind = node.getInputs().at(ir::operation::Reshape::Input::INPUT);
    const auto &input_obj = _graph.operands().at(input_ind);
    const auto &input_shape = input_obj.shape();
  
@@ -291,11 +295,11 @@ void PermutationOperationPass::visit(const operation::Pack &node)
    }
  }
  
-void PermutationOperationPass::visit(const operation::PReLU &node) { applyExpandRanks(node); }
+void PermutationOperationPass::visit(const ir::operation::PReLU &node) { applyExpandRanks(node); }
  
-void PermutationOperationPass::visit(const operation::Reshape &node)
+void PermutationOperationPass::visit(const ir::operation::Reshape &node)
  {
-  const auto &input_ind = node.getInputs().at(operation::Reshape::Input::INPUT);
+  const auto &input_ind = node.getInputs().at(ir::operation::Reshape::Input::INPUT);
    const auto &input_obj = _graph.operands().at(input_ind);
    const auto &input_shape = input_obj.shape();
  
@@ -309,16 +313,14 @@ void PermutationOperationPass::visit(const operation::Reshape &node)
    }
  }
  
-void PermutationOperationPass::visit(const operation::SquaredDifference &node)
+void PermutationOperationPass::visit(const ir::operation::SquaredDifference &node)
  {
    applyExpandRanks(node);
  }
  
-void PermutationOperationPass::visit(const operation::Sub &node) { applyExpandRanks(node); }
-
-void PermutationOperationPass::visit(const operation::Unpack &node)
+void PermutationOperationPass::visit(const ir::operation::Unpack &node)
  {
-  const auto &input_ind = node.getInputs().at(operation::Reshape::Input::INPUT);
+  const auto &input_ind = node.getInputs().at(ir::operation::Reshape::Input::INPUT);
    const auto &input_obj = _graph.operands().at(input_ind);
    const auto &input_shape = input_obj.shape();
  
@@ -333,5 +335,5 @@ void PermutationOperationPass::visit(const operation::Unpack &node)
  }
  
  } // namespace pass
-} // namespace ir
+} // namespace compiler
  } // namespace onert
diff --git a/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h

new file mode 100644 (file)

index 0000000..2dd76b9
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_PASS_PERMUTATION_OPERATION_PASS_H__
+#define __ONERT_COMPILER_PASS_PERMUTATION_OPERATION_PASS_H__
+
+#include "ir/OperationVisitor.h"
+#include "LoweredOperationPass.h"
+
+namespace onert
+{
+namespace compiler
+{
+namespace pass
+{
+
+class PermutationOperationPass : public LoweredOperationPass, public ir::OperationVisitor
+{
+public:
+  using LoweredOperationPass::LoweredOperationPass;
+
+public:
+  std::string id() final { return "PermutationOperationPass"; }
+
+public:
+  void callback(const ir::OperationIndex &i, ir::Operation &n) final;
+
+public:
+  void visit(const ir::operation::BinaryArithmetic &) final;
+  void visit(const ir::operation::Comparison &) final;
+  void visit(const ir::operation::Concat &) final;
+  void visit(const ir::operation::ElementwiseBinary &) final;
+  void visit(const ir::operation::ElementwiseUnary &) final;
+  void visit(const ir::operation::Pack &) final;
+  void visit(const ir::operation::PReLU &) final;
+  void visit(const ir::operation::SquaredDifference &) final;
+  void visit(const ir::operation::Unpack &) final;
+  void visit(const ir::operation::FullyConnected &) final;
+  void visit(const ir::operation::Gather &) final;
+  void visit(const ir::operation::Reshape &) final;
+
+private:
+  void applyExpandRanks(const ir::Operation &);
+  void changeToKeepLayout(const ir::Operation &);
+};
+
+} // namespace pass
+} // namespace compiler
+} // namespace onert
+
+#endif // __ONERT_COMPILER_PASS_PERMUTATION_OPERATION_PASS_H__
diff --git a/runtime/onert/core/src/dumper/dot/DotDumper.h b/runtime/onert/core/src/dumper/dot/DotDumper.h

index 668785a..fdbca16 100644 (file)
--- a/runtime/onert/core/src/dumper/dot/DotDumper.h
+++ b/runtime/onert/core/src/dumper/dot/DotDumper.h
@@ -15,7 +15,7 @@
   */
  
  #include "ir/Graph.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
  
  #ifndef __ONERT_DUMPER_DOT_DOT_DUMPER_H__
  #define __ONERT_DUMPER_DOT_DOT_DUMPER_H__
@@ -42,7 +42,7 @@ public:
        : _lowered_graph{nullptr}, _graph(graph), _level{level}
    {
    }
-  DotDumper(const ir::LoweredGraph *lowered_graph, Level level)
+  DotDumper(const compiler::LoweredGraph *lowered_graph, Level level)
        : _lowered_graph{lowered_graph}, _graph(_lowered_graph->graph()), _level{level}
    {
    }
@@ -57,7 +57,7 @@ public:
    void dump(const std::string &tag);
  
  private:
-  const ir::LoweredGraph *_lowered_graph;
+  const compiler::LoweredGraph *_lowered_graph;
    const ir::Graph &_graph;
    Level _level;
  };
diff --git a/runtime/onert/core/src/exec/DataflowExecutor.cc b/runtime/onert/core/src/exec/DataflowExecutor.cc

index cb516b5..a69ae9c 100644 (file)
--- a/runtime/onert/core/src/exec/DataflowExecutor.cc
+++ b/runtime/onert/core/src/exec/DataflowExecutor.cc
@@ -78,11 +78,13 @@ bool DataflowExecutor::noWaitingJobs()
  }
  
  DataflowExecutor::DataflowExecutor(
-    std::unique_ptr<ir::LoweredGraph> lowered_graph,
+    std::unique_ptr<compiler::LoweredGraph> lowered_graph,
      const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
      const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
-    const compiler::TensorBuilders &tensor_builders, compiler::CodeMap &&code_map)
-    : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_builders},
+    const compiler::TensorRegistries &tensor_regs, backend::TensorManagerSet &&tensor_mgrs,
+    compiler::CodeMap &&code_map)
+    : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
+                   std::move(tensor_mgrs)},
        _code_map{std::move(code_map)}
  {
    VERBOSE(DataflowExecutor) << "Constructing Dataflow Executor" << std::endl;
diff --git a/runtime/onert/core/src/exec/DataflowExecutor.h b/runtime/onert/core/src/exec/DataflowExecutor.h

index aebb03c..8d60e3e 100644 (file)
--- a/runtime/onert/core/src/exec/DataflowExecutor.h
+++ b/runtime/onert/core/src/exec/DataflowExecutor.h
@@ -49,10 +49,11 @@ public:
     * @param tensor_builders Tensor builders that are currently used
     * @param code_map OpSequence and its code map
     */
-  DataflowExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+  DataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
                     const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
                     const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
-                   const compiler::TensorBuilders &tensor_builders, compiler::CodeMap &&code_map);
+                   const compiler::TensorRegistries &tensor_regs,
+                   backend::TensorManagerSet &&tensor_mgrs, compiler::CodeMap &&code_map);
  
    void executeImpl() override;
  
diff --git a/runtime/onert/core/src/exec/DynamicShapeInference.cc b/runtime/onert/core/src/exec/DynamicShapeInference.cc

index 5ec7012..70bddfc 100644 (file)
--- a/runtime/onert/core/src/exec/DynamicShapeInference.cc
+++ b/runtime/onert/core/src/exec/DynamicShapeInference.cc
@@ -100,17 +100,6 @@ void DynamicShapeInferer::handleSimpleUnaryOp(const ir::Operation &op,
    assert(output->buffer() != nullptr);
  }
  
-void DynamicShapeInferer::visit(const ir::operation::Abs &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Abs::INPUT));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Add &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Add::Input::LHS),
-                           op.getInputs().at(ir::operation::Add::Input::RHS));
-}
-
  void DynamicShapeInferer::visit(const ir::operation::ArgMax &op)
  {
    const auto input_idx{op.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
@@ -155,6 +144,12 @@ void DynamicShapeInferer::visit(const ir::operation::BatchMatMul &op)
    dynamicTensorManagerOf(output)->applyShape(output_index, new_shape);
  }
  
+void DynamicShapeInferer::visit(const ir::operation::BinaryArithmetic &op)
+{
+  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS),
+                           op.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS));
+}
+
  void DynamicShapeInferer::visit(const ir::operation::BroadcastTo &op)
  {
    auto output_ind = op.getOutputs().at(0);
@@ -179,11 +174,6 @@ void DynamicShapeInferer::visit(const ir::operation::BroadcastTo &op)
    assert(output->buffer() != nullptr);
  }
  
-void DynamicShapeInferer::visit(const ir::operation::Cast &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Cast::INPUT));
-}
-
  void DynamicShapeInferer::visit(const ir::operation::Comparison &op)
  {
    handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Comparison::Input::INPUT0),
@@ -292,20 +282,20 @@ void DynamicShapeInferer::visit(const ir::operation::Conv2D &op)
    assert(output->buffer() != nullptr);
  }
  
-void DynamicShapeInferer::visit(const ir::operation::Cos &op)
+void DynamicShapeInferer::visit(const ir::operation::ElementwiseActivation &op)
  {
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Cos::Input::INPUT));
+  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseActivation::INPUT));
  }
  
-void DynamicShapeInferer::visit(const ir::operation::Div &op)
+void DynamicShapeInferer::visit(const ir::operation::ElementwiseBinary &op)
  {
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Div::Input::LHS),
-                           op.getInputs().at(ir::operation::Div::Input::RHS));
+  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS),
+                           op.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS));
  }
  
-void DynamicShapeInferer::visit(const ir::operation::Exp &op)
+void DynamicShapeInferer::visit(const ir::operation::ElementwiseUnary &op)
  {
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Exp::Input::INPUT));
+  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT));
  }
  
  void DynamicShapeInferer::visit(const ir::operation::ExpandDims &op)
@@ -430,27 +420,6 @@ void DynamicShapeInferer::visit(const ir::operation::Gather &op)
    assert(output->buffer() != nullptr);
  }
  
-void DynamicShapeInferer::visit(const ir::operation::Log &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Log::Input::INPUT));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::LogicalNot &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::LogicalNot::Input::INPUT));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::LogicalOr &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::LogicalOr::Input::INPUT0),
-                           op.getInputs().at(ir::operation::LogicalOr::Input::INPUT1));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Logistic &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Logistic::INPUT));
-}
-
  void DynamicShapeInferer::visit(const ir::operation::L2Normalization &op)
  {
    handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::L2Normalization::INPUT));
@@ -461,29 +430,6 @@ void DynamicShapeInferer::visit(const ir::operation::MatrixBandPart &op)
    handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::INPUT));
  }
  
-void DynamicShapeInferer::visit(const ir::operation::Max &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Max::Input::LHS),
-                           op.getInputs().at(ir::operation::Max::Input::RHS));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Min &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Min::Input::LHS),
-                           op.getInputs().at(ir::operation::Min::Input::RHS));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Mul &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Mul::Input::LHS),
-                           op.getInputs().at(ir::operation::Mul::Input::RHS));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Neg &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Neg::Input::INPUT));
-}
-
  void DynamicShapeInferer::visit(const ir::operation::OneHot &op)
  {
    auto output_ind = op.getOutputs().at(0);
@@ -766,7 +712,7 @@ void DynamicShapeInferer::visit(const ir::operation::ResizeBilinear &op)
    if (output_shape != output->getShape() || output->buffer() == nullptr)
    {
      // change on output shape
-    _dynamic_tensor_manager->applyShape(output_ind, output_shape);
+    dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape);
    }
    assert(output->buffer() != nullptr);
  }
@@ -776,16 +722,6 @@ void DynamicShapeInferer::visit(const ir::operation::Reverse &op)
    handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Reverse::INPUT));
  }
  
-void DynamicShapeInferer::visit(const ir::operation::Round &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Round::Input::INPUT));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::RSQRT &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::RSQRT::INPUT));
-}
-
  void DynamicShapeInferer::visit(const ir::operation::Select &op)
  {
    const auto input_cond_idx = op.getInputs().at(ir::operation::Select::Input::CONDITION);
@@ -836,11 +772,6 @@ void DynamicShapeInferer::visit(const ir::operation::Shape &op)
    assert(output->buffer() != nullptr);
  }
  
-void DynamicShapeInferer::visit(const ir::operation::Sin &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Sin::Input::INPUT));
-}
-
  void DynamicShapeInferer::visit(const ir::operation::Slice &op)
  {
    const auto input_index{op.getInputs().at(ir::operation::Slice::Input::INPUT)};
@@ -1003,17 +934,6 @@ void DynamicShapeInferer::visit(const ir::operation::StridedSlice &op)
    assert(output->buffer() != nullptr);
  }
  
-void DynamicShapeInferer::visit(const ir::operation::Sub &op)
-{
-  handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Sub::Input::LHS),
-                           op.getInputs().at(ir::operation::Sub::Input::RHS));
-}
-
-void DynamicShapeInferer::visit(const ir::operation::Tanh &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Tanh::INPUT));
-}
-
  void DynamicShapeInferer::visit(const ir::operation::Tile &op)
  {
    auto output_ind = op.getOutputs().at(0);
@@ -1091,10 +1011,5 @@ void DynamicShapeInferer::visit(const ir::operation::Unpack &op)
    }
  }
  
-void DynamicShapeInferer::visit(const ir::operation::ZerosLike &op)
-{
-  handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ZerosLike::INPUT));
-}
-
  } // namespace exec
  } // namespace onert
diff --git a/runtime/onert/core/src/exec/Execution.cc b/runtime/onert/core/src/exec/Execution.cc

index 5b401ec..7feb3ab 100644 (file)
--- a/runtime/onert/core/src/exec/Execution.cc
+++ b/runtime/onert/core/src/exec/Execution.cc
@@ -38,7 +38,10 @@ void Execution::changeInputShape(const ir::IOIndex &index, const ir::Shape &new_
    if (_io_desc.inputs.at(index.value()) != 0)
      throw std::runtime_error("Error in calling order");
  
-  _io_desc.input_shape_signature[index] = new_shape;
+  // This will be used later to set input tensor dynamic
+  // Note that 'compiled' model will not be updated with new_shape
+  // but new_shape will change model input shape while 'running' the model
+  _io_desc.dynamic_input_shapes[index] = new_shape;
  }
  
  // TODO Remove default parameter
@@ -54,8 +57,8 @@ void Execution::setInput(const ir::IOIndex &index, const void *buffer, size_t le
    // if input_shape_sig is set, input_shape_sig overrides shape in info
    // note: input_shape_sig contains shape passed by nnfw_set_input_tensorinfo()
    {
-    auto input_shape_sig = _io_desc.input_shape_signature.find(index);
-    auto size_required = (input_shape_sig != _io_desc.input_shape_signature.end())
+    auto input_shape_sig = _io_desc.dynamic_input_shapes.find(index);
+    auto size_required = (input_shape_sig != _io_desc.dynamic_input_shapes.end())
                               ? input_shape_sig->second.num_elements() *
                                     onert::ir::sizeOfDataType(info.typeInfo().type())
                               : info.total_size();
@@ -154,8 +157,8 @@ bool Execution::isFinished(void) const { return finished; }
  
  ir::Shape Execution::getInputShape(ir::IOIndex ind) const
  {
-  auto itr = _io_desc.input_shape_signature.find(ind);
-  if (itr == _io_desc.input_shape_signature.end())
+  auto itr = _io_desc.dynamic_input_shapes.find(ind);
+  if (itr == _io_desc.dynamic_input_shapes.end())
    {
      auto operand_idx = primary_subgraph().getInputs().at(ind.value());
      return primary_subgraph().operands().at(operand_idx).shape();
diff --git a/runtime/onert/core/src/exec/ExecutorBase.cc b/runtime/onert/core/src/exec/ExecutorBase.cc

index 864ccb3..f835a96 100644 (file)
--- a/runtime/onert/core/src/exec/ExecutorBase.cc
+++ b/runtime/onert/core/src/exec/ExecutorBase.cc
@@ -26,12 +26,14 @@ namespace onert
  namespace exec
  {
  
-ExecutorBase::ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph,
+ExecutorBase::ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph,
                             const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
                             const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
-                           const compiler::TensorBuilders &tensor_builders)
+                           const compiler::TensorRegistries &tensor_regs,
+                           backend::TensorManagerSet &&tensor_mgrs)
      : _lowered_graph{std::move(lowered_graph)}, _graph{_lowered_graph->graph()},
-      _input_tensors{input_tensors}, _output_tensors{output_tensors}, _mutex()
+      _input_tensors{input_tensors}, _output_tensors{output_tensors},
+      _tensor_mgrs{std::move(tensor_mgrs)}, _mutex()
  {
    // TODO Fix the way of knowing whether it is primary or not
    bool primary_executor = !(_input_tensors.empty() && _output_tensors.empty());
@@ -41,23 +43,10 @@ ExecutorBase::ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph,
        std::vector<std::shared_ptr<backend::ITensor>> list;
        for (auto ind : ind_seq)
        {
-        std::shared_ptr<backend::ITensor> tensor;
-        for (auto &tensor_builder : tensor_builders)
-        {
-          auto tensor_registry = tensor_builder->tensorRegistry();
-          assert(tensor_registry);
-          tensor = tensor_registry->getNativeITensor(ind);
-          if (tensor != nullptr)
-          {
-            if (tensor_builder->supportDynamicTensor())
-            {
-              DynAllocInfo dyn_alloc_info{ind, tensor_builder->dynamicTensorManager()};
-              _input_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
-            }
-            break;
-          }
-        }
+        std::shared_ptr<backend::ITensor> tensor = tensor_regs.getITensor(ind);
          assert(tensor != nullptr);
+        DynAllocInfo dyn_alloc_info{ind};
+        _input_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
          list.push_back(tensor);
        }
        return list;
@@ -66,23 +55,10 @@ ExecutorBase::ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph,
        std::vector<std::shared_ptr<backend::ITensor>> list;
        for (auto ind : ind_seq)
        {
-        std::shared_ptr<backend::ITensor> tensor;
-        for (auto &tensor_builder : tensor_builders)
-        {
-          auto tensor_registry = tensor_builder->tensorRegistry();
-          assert(tensor_registry);
-          tensor = tensor_registry->getNativeITensor(ind);
-          if (tensor != nullptr)
-          {
-            if (tensor_builder->supportDynamicTensor())
-            {
-              DynAllocInfo dyn_alloc_info{ind, tensor_builder->dynamicTensorManager()};
-              _output_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
-            }
-            break;
-          }
-        }
+        std::shared_ptr<backend::ITensor> tensor = tensor_regs.getITensor(ind);
          assert(tensor != nullptr);
+        DynAllocInfo dyn_alloc_info{ind};
+        _output_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
          list.push_back(tensor);
        }
        return list;
@@ -92,42 +68,23 @@ ExecutorBase::ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph,
    }
    else
    {
-    // If primary graph, all the inputs and outputs belong to controlflow backend
-    auto cf_dyn_tensor_builder = tensor_builders.getControlflowTensorBuilder();
-    assert(cf_dyn_tensor_builder);
-
      assert(input_tensors.size() == _graph.getInputs().size());
      assert(output_tensors.size() == _graph.getOutputs().size());
      for (uint32_t i = 0; i < input_tensors.size(); i++)
      {
        auto tensor = input_tensors[i];
        auto ind = _graph.getInputs().at(i);
-      DynAllocInfo dyn_alloc_info{ind, cf_dyn_tensor_builder->dynamicTensorManager()};
+      DynAllocInfo dyn_alloc_info{ind};
        _input_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
      }
      for (uint32_t i = 0; i < output_tensors.size(); i++)
      {
        auto tensor = output_tensors[i];
        auto ind = _graph.getOutputs().at(i);
-      DynAllocInfo dyn_alloc_info{ind, cf_dyn_tensor_builder->dynamicTensorManager()};
+      DynAllocInfo dyn_alloc_info{ind};
        _output_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info);
      }
    }
-
-  // Prepare each TensorManager on each backend
-  for (auto &tensor_builder : tensor_builders)
-  {
-    auto s_tensor_manager = tensor_builder->releaseStaticTensorManager();
-    if (s_tensor_manager != nullptr)
-      _tensor_mgrs.insert(std::move(s_tensor_manager));
-
-    if (tensor_builder->supportDynamicTensor())
-    {
-      auto d_tensor_manager = tensor_builder->releaseDynamicTensorManager();
-      if (d_tensor_manager != nullptr)
-        _tensor_mgrs.insert(std::move(d_tensor_manager));
-    }
-  }
  }
  
  void ExecutorBase::execute(const std::vector<std::shared_ptr<backend::ITensor>> &src_tensors,
@@ -192,8 +149,8 @@ void ExecutorBase::execute(const IODescription &desc)
      // TODO Remove dynamic_cast
      auto tensor = std::dynamic_pointer_cast<backend::controlflow::UserTensor>(_input_tensors[i]);
      assert(tensor);
-    auto input_shape = desc.input_shape_signature.find(ir::IOIndex{i});
-    if (input_shape != desc.input_shape_signature.end())
+    auto input_shape = desc.dynamic_input_shapes.find(ir::IOIndex{i});
+    if (input_shape != desc.dynamic_input_shapes.end())
      {
        tensor->set_dynamic();
        tensor->setShape(input_shape->second);
@@ -258,8 +215,8 @@ void ExecutorBase::execute(const IODescription &desc)
   */
  void ExecutorBase::handleDynamicInputTensor(ir::IOIndex io_ind, const IODescription &desc)
  {
-  auto shape_sig_found = desc.input_shape_signature.find(io_ind);
-  if (shape_sig_found != desc.input_shape_signature.end())
+  auto shape_sig_found = desc.dynamic_input_shapes.find(io_ind);
+  if (shape_sig_found != desc.dynamic_input_shapes.end())
    {
      auto dyn_alloc_info = _input_to_dyn_alloc_info.find(_input_tensors[io_ind.value()]);
      if (dyn_alloc_info == _input_to_dyn_alloc_info.end())
@@ -269,7 +226,9 @@ void ExecutorBase::handleDynamicInputTensor(ir::IOIndex io_ind, const IODescript
      auto changed_input_shape = shape_sig_found->second;
      auto operand_ind = dyn_alloc_info->second.ind;
  
-    dyn_alloc_info->second.dyn_tensor_manager->applyShape(operand_ind, changed_input_shape);
+    auto dyn_tensor_manager = _input_tensors[io_ind.value()]->dynamic_tensor_manager();
+    assert(dyn_tensor_manager);
+    dyn_tensor_manager->applyShape(operand_ind, changed_input_shape);
    }
  }
  
diff --git a/runtime/onert/core/src/exec/ExecutorBase.h b/runtime/onert/core/src/exec/ExecutorBase.h

index 080c9bb..a13be7d 100644 (file)
--- a/runtime/onert/core/src/exec/ExecutorBase.h
+++ b/runtime/onert/core/src/exec/ExecutorBase.h
@@ -25,7 +25,7 @@
  #include "Sink.h"
  #include "ShapeConverter.h"
  #include "exec/IExecutor.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
  #include "ir/LowerInfoMap.h"
  #include "backend/IConfig.h"
  #include "backend/Backend.h"
@@ -33,9 +33,8 @@
  #include "exec/IFunction.h"
  #include "backend/IDynamicTensorManager.h"
  #include "backend/ITensorManager.h"
-#include "backend/ITensorBuilder.h"
  #include "exec/ExecutionObservee.h"
-#include "compiler/TensorBuilders.h"
+#include "compiler/TensorRegistries.h"
  #include <list>
  
  namespace onert
@@ -51,10 +50,11 @@ public:
     * @param graph Graph object
     * @param tensor_builders Tensor builders that are currently used
     */
-  ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph,
+  ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph,
                 const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
                 const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
-               const compiler::TensorBuilders &tensor_builders);
+               const compiler::TensorRegistries &tensor_regs,
+               backend::TensorManagerSet &&tensor_mgrs);
  
    virtual ~ExecutorBase() = default;
  
@@ -102,7 +102,7 @@ protected:
  protected:
    ExecutionObservee _subject;
    std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
-  std::unique_ptr<ir::LoweredGraph> _lowered_graph;
+  std::unique_ptr<compiler::LoweredGraph> _lowered_graph;
    const ir::Graph &_graph;
    std::vector<std::shared_ptr<backend::ITensor>> _input_tensors;
    std::vector<std::shared_ptr<backend::ITensor>> _output_tensors;
diff --git a/runtime/onert/core/src/exec/FunctionSequence.cc b/runtime/onert/core/src/exec/FunctionSequence.cc

index d413e81..fb31f75 100644 (file)
--- a/runtime/onert/core/src/exec/FunctionSequence.cc
+++ b/runtime/onert/core/src/exec/FunctionSequence.cc
@@ -28,7 +28,8 @@ namespace exec
  
  void FunctionSequence::run()
  {
-  if (_enable_dynamic_shape_inferer)
+  // TODO Find out when `_enable_dynamic_shape_inferer` is true but `_dynamic_tensor_ctx` is false
+  if (_enable_dynamic_shape_inferer && _dynamic_tensor_ctx)
    {
      if (_dynamic_tensor_ctx->op_seq->size() != _functions.size())
        throw std::runtime_error("operation and functions should be mapped one by one");
diff --git a/runtime/onert/core/src/exec/LinearExecutor.h b/runtime/onert/core/src/exec/LinearExecutor.h

index 5c099bc..c224d3f 100644 (file)
--- a/runtime/onert/core/src/exec/LinearExecutor.h
+++ b/runtime/onert/core/src/exec/LinearExecutor.h
@@ -46,12 +46,14 @@ public:
     * @param tensor_builders Tensor builders that are currently used
     * @param code_map OpSequence and its code map
     */
-  LinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+  LinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
                   const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
                   const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
-                 const compiler::TensorBuilders &tensor_builders, compiler::CodeMap &&code_map,
+                 const compiler::TensorRegistries &tensor_regs,
+                 backend::TensorManagerSet &&tensor_mgrs, compiler::CodeMap &&code_map,
                   const std::vector<ir::OpSequenceIndex> &order)
-      : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_builders}
+      : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs,
+                     std::move(tensor_mgrs)}
    {
      for (auto index : order)
      {
diff --git a/runtime/onert/core/src/exec/ParallelExecutor.cc b/runtime/onert/core/src/exec/ParallelExecutor.cc

index b5d8177..ab234aa 100644 (file)
--- a/runtime/onert/core/src/exec/ParallelExecutor.cc
+++ b/runtime/onert/core/src/exec/ParallelExecutor.cc
@@ -60,12 +60,13 @@ void ParallelExecutor::notify(uint32_t finished_job_id)
  }
  
  ParallelExecutor::ParallelExecutor(
-    std::unique_ptr<ir::LoweredGraph> lowered_graph,
+    std::unique_ptr<compiler::LoweredGraph> lowered_graph,
      const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
      const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
-    const compiler::TensorBuilders &tensor_builders, compiler::CodeMap &&code_map)
-    : DataflowExecutor{std::move(lowered_graph), input_tensors, output_tensors, tensor_builders,
-                       std::move(code_map)}
+    const compiler::TensorRegistries &tensor_regs, backend::TensorManagerSet &&tensor_mgrs,
+    compiler::CodeMap &&code_map)
+    : DataflowExecutor{std::move(lowered_graph), input_tensors,      output_tensors, tensor_regs,
+                       std::move(tensor_mgrs),   std::move(code_map)}
  {
    VERBOSE(ParallelExecutor) << "Constructing Parallel Executor" << std::endl;
  }
diff --git a/runtime/onert/core/src/exec/ParallelExecutor.h b/runtime/onert/core/src/exec/ParallelExecutor.h

index 462cbc6..929edfc 100644 (file)
--- a/runtime/onert/core/src/exec/ParallelExecutor.h
+++ b/runtime/onert/core/src/exec/ParallelExecutor.h
@@ -50,10 +50,11 @@ public:
     * @param tensor_builders Tensor builders that are currently used
     * @param code_map OpSequence and its code map
     */
-  ParallelExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph,
+  ParallelExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
                     const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors,
                     const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors,
-                   const compiler::TensorBuilders &tensor_builders, compiler::CodeMap &&code_map);
+                   const compiler::TensorRegistries &tensor_regs,
+                   backend::TensorManagerSet &&tensor_mgrs, compiler::CodeMap &&code_map);
  
    void executeImpl() override;
  
diff --git a/runtime/onert/core/src/exec/feature/nchw/Reader.h b/runtime/onert/core/src/exec/feature/nchw/Reader.h

index 48642d8..7be9df4 100644 (file)
--- a/runtime/onert/core/src/exec/feature/nchw/Reader.h
+++ b/runtime/onert/core/src/exec/feature/nchw/Reader.h
@@ -33,7 +33,7 @@ namespace feature
  namespace nchw
  {
  
-template <typename T> class Reader final : public feature::Reader<T>
+template <typename T> class Reader : public feature::Reader<T>
  {
  public:
    // Construct for buffer of model inputs
@@ -68,15 +68,14 @@ public:
    }
  
  public:
-  T at(uint32_t ch, uint32_t row, uint32_t col) const override
+  T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const final
    {
-    const auto offset = feature_index_to_byte_offset(0, ch, row, col);
-
-    const T *ptr = reinterpret_cast<const T *>(_ptr + offset);
-
-    return *ptr;
+    return getRef(batch, ch, row, col);
    }
-  T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const override
+  T at(uint32_t ch, uint32_t row, uint32_t col) const final { return getRef(0, ch, row, col); }
+
+protected:
+  const T &getRef(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const
    {
      const auto offset = feature_index_to_byte_offset(batch, ch, row, col);
  
diff --git a/runtime/onert/core/src/exec/feature/nchw/View.h b/runtime/onert/core/src/exec/feature/nchw/View.h

index ff55de1..dbaf1a9 100644 (file)
--- a/runtime/onert/core/src/exec/feature/nchw/View.h
+++ b/runtime/onert/core/src/exec/feature/nchw/View.h
@@ -17,7 +17,7 @@
  #ifndef __ONERT_EXEC_FEATURE_NCHW_VIEW_H__
  #define __ONERT_EXEC_FEATURE_NCHW_VIEW_H__
  
-#include "../Reader.h"
+#include "Reader.h"
  
  #include "backend/ITensor.h"
  #include "ir/Shape.h"
@@ -34,99 +34,31 @@ namespace feature
  namespace nchw
  {
  
-template <typename T> class View final : public feature::Reader<T>
+template <typename T> class View final : public Reader<T>
  {
  public:
    // Construct for buffer of model inputs
-  View(const ir::FeatureShape &shape, T *ptr, size_t len)
-      : _shape{shape}, _ptr{reinterpret_cast<uint8_t *>(ptr)}, _len{len}
+  View(const ir::FeatureShape &shape, T *ptr, size_t len) : Reader<T>{shape, ptr, len}
    {
-    assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len);
-
-    _strides.W = sizeof(T);
-    _strides.H = shape.W * sizeof(T);
-    _strides.C = shape.W * shape.H * sizeof(T);
-    _strides.N = shape.W * shape.H * shape.C * sizeof(T);
+    // DO NOTHING
    }
  
    // Construct for backend tensor
-  View(::onert::backend::ITensor *tensor)
-      : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
-  {
-    assert(tensor->layout() == ir::Layout::NCHW);
-
-    const auto start_offset = tensor->calcOffset({0, 0, 0, 0});
-    _strides.W = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
-    _strides.H = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
-    _strides.C = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
-    _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
-
-    _shape.W = tensor->dimension(3);
-    _shape.H = tensor->dimension(2);
-    _shape.C = tensor->dimension(1);
-    _shape.N = tensor->dimension(0);
-  }
-
-public:
-  T at(uint32_t ch, uint32_t row, uint32_t col) const override
+  View(::onert::backend::ITensor *tensor) : Reader<T>{tensor}
    {
-    const auto offset = feature_index_to_byte_offset(0, ch, row, col);
-
-    T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
-    return *ptr;
-  }
-  T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const override
-  {
-    const auto offset = feature_index_to_byte_offset(batch, ch, row, col);
-
-    T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
-    return *ptr;
+    // DO NOTHING
    }
  
  public:
-  T &at(uint32_t ch, uint32_t row, uint32_t col)
-  {
-    const auto offset = feature_index_to_byte_offset(0, ch, row, col);
-
-    T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
-    return *ptr;
-  }
+  using Reader<T>::at;
    T &at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col)
    {
-    const auto offset = feature_index_to_byte_offset(batch, ch, row, col);
-
-    T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
-    return *ptr;
+    return const_cast<T &>(Reader<T>::getRef(batch, ch, row, col));
    }
-
-private:
-  size_t feature_index_to_byte_offset(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const
+  T &at(uint32_t ch, uint32_t row, uint32_t col)
    {
-    assert(1u * _shape.N > batch); // shape.N > batch
-    assert(1u * _shape.C > ch);    // shape.C > ch
-    assert(1u * _shape.H > row);   // shape.H > row
-    assert(1u * _shape.W > col);   // shape.W > col
-
-    uint32_t res = 0;
-    res += batch * _strides.N;
-    res += ch * _strides.C;
-    res += row * _strides.H;
-    res += col * _strides.W;
-
-    return res;
+    return const_cast<T &>(Reader<T>::getRef(0, ch, row, col));
    }
-
-private:
-  // TODO Remove _shape
-  ir::FeatureShape _shape;
-  using Strides = ir::FeatureShape;
-  Strides _strides;
-  uint8_t *_ptr;
-  size_t _len;
  };
  
  } // namespace nchw
diff --git a/runtime/onert/core/src/exec/feature/nhwc/Reader.h b/runtime/onert/core/src/exec/feature/nhwc/Reader.h

index ef27992..7730cee 100644 (file)
--- a/runtime/onert/core/src/exec/feature/nhwc/Reader.h
+++ b/runtime/onert/core/src/exec/feature/nhwc/Reader.h
@@ -34,7 +34,7 @@ namespace feature
  namespace nhwc
  {
  
-template <typename T> class Reader final : public feature::Reader<T>
+template <typename T> class Reader : public feature::Reader<T>
  {
  public:
    // Construct for buffer of model inputs
@@ -70,15 +70,14 @@ public:
    }
  
  public:
-  T at(uint32_t row, uint32_t col, uint32_t ch) const override
+  T at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const final
    {
-    const auto offset = feature_index_to_byte_offset(0, row, col, ch);
-
-    const T *ptr = reinterpret_cast<const T *>(_ptr + offset);
-
-    return *ptr;
+    return getRef(batch, row, col, ch);
    }
-  T at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const override
+  T at(uint32_t row, uint32_t col, uint32_t ch) const final { return getRef(0, row, col, ch); }
+
+protected:
+  const T &getRef(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const
    {
      const auto offset = feature_index_to_byte_offset(batch, row, col, ch);
  
diff --git a/runtime/onert/core/src/exec/feature/nhwc/View.h b/runtime/onert/core/src/exec/feature/nhwc/View.h

index a09961a..72c8c34 100644 (file)
--- a/runtime/onert/core/src/exec/feature/nhwc/View.h
+++ b/runtime/onert/core/src/exec/feature/nhwc/View.h
@@ -35,101 +35,31 @@ namespace feature
  namespace nhwc
  {
  
-template <typename T> class View final : public feature::Reader<T>
+template <typename T> class View final : public Reader<T>
  {
  public:
    // Construct for buffer of model inputs
-  View(const ir::FeatureShape &shape, T *ptr, size_t len)
-      : _shape{shape}, _ptr{reinterpret_cast<uint8_t *>(ptr)}, _len{len}
+  View(const ir::FeatureShape &shape, T *ptr, size_t len) : Reader<T>{shape, ptr, len}
    {
-    UNUSED_RELEASE(len); // Workaround for unused variable in release mode
-    assert(shape.N * shape.H * shape.W * shape.C * sizeof(T) == len);
-
-    // No padding
-    _strides.C = sizeof(T);
-    _strides.W = shape.C * sizeof(T);
-    _strides.H = shape.C * shape.W * sizeof(T);
-    _strides.N = shape.C * shape.W * shape.H * sizeof(T);
+    // DO NOTHING
    }
  
    // Construct for backend tensor
-  View(backend::ITensor *tensor)
-      : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()}
+  View(backend::ITensor *tensor) : Reader<T>{tensor}
    {
-    assert(tensor->layout() == ir::Layout::NHWC);
-
-    const auto start_offset = tensor->calcOffset({0, 0, 0, 0});
-    _strides.C = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset;
-    _strides.W = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset;
-    _strides.H = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset;
-    _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset;
-
-    _shape.C = tensor->dimension(3);
-    _shape.W = tensor->dimension(2);
-    _shape.H = tensor->dimension(1);
-    _shape.N = tensor->dimension(0);
+    // DO NOTHING
    }
  
  public:
-  T at(uint32_t row, uint32_t col, uint32_t ch) const override
-  {
-    const auto offset = feature_index_to_byte_offset(0, row, col, ch);
-
-    const T *ptr = reinterpret_cast<const T *>(_ptr + offset);
-
-    return *ptr;
-  }
-  T at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const override
-  {
-    const auto offset = feature_index_to_byte_offset(batch, row, col, ch);
-
-    const T *ptr = reinterpret_cast<const T *>(_ptr + offset);
-
-    return *ptr;
-  }
-
-  T &at(uint32_t row, uint32_t col, uint32_t ch)
-  {
-    const auto offset = feature_index_to_byte_offset(0, row, col, ch);
-
-    T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
-    return *ptr;
-  }
-
+  using Reader<T>::at;
    T &at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch)
    {
-    const auto offset = feature_index_to_byte_offset(batch, row, col, ch);
-
-    T *ptr = reinterpret_cast<T *>(_ptr + offset);
-
-    return *ptr;
+    return const_cast<T &>(Reader<T>::getRef(batch, row, col, ch));
    }
-
-private:
-  size_t feature_index_to_byte_offset(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const
+  T &at(uint32_t row, uint32_t col, uint32_t ch)
    {
-    assert(1u * _shape.N > batch); // shape.N > batch
-    assert(1u * _shape.H > row);   // shape.H > row
-    assert(1u * _shape.W > col);   // shape.W > col
-    assert(1u * _shape.C > ch);    // shape.C > ch
-
-    uint32_t res = 0;
-    res += batch * _strides.N;
-    res += row * _strides.H;
-    res += col * _strides.W;
-    res += ch * _strides.C;
-
-    return res;
+    return const_cast<T &>(Reader<T>::getRef(0, row, col, ch));
    }
-
-private:
-  // TODO Remove _shape
-  ir::FeatureShape _shape;
-  using Strides = ir::FeatureShape;
-  Strides _strides;
-  uint8_t *_ptr;
-  size_t _len;
  };
  
  } // namespace nhwc
diff --git a/runtime/onert/core/src/interp/InterpOps.lst b/runtime/onert/core/src/interp/InterpOps.lst

index 5f646b8..0714df3 100644 (file)
--- a/runtime/onert/core/src/interp/InterpOps.lst
+++ b/runtime/onert/core/src/interp/InterpOps.lst
@@ -22,43 +22,32 @@
  //
  // Same list with Operations.lst
  // Make comment out if operation is not supported in interpreter
-INTERP_OP(Add)
-INTERP_OP(Sub)
+INTERP_OP(BinaryArithmetic)
  //INTERP_OP(BatchToSpaceND)
  //INTERP_OP(Cast)
  INTERP_OP(Conv2D)
  INTERP_OP(DepthwiseConv2D)
-INTERP_OP(AvgPool2D)
-INTERP_OP(MaxPool2D)
+INTERP_OP(Pool2D)
  INTERP_OP(Concat)
  INTERP_OP(FullyConnected)
  //INTERP_OP(Reduce)
  INTERP_OP(Reshape)
-INTERP_OP(Mul)
  INTERP_OP(Softmax)
  //INTERP_OP(Squeeze)
  //INTERP_OP(Slice)
  //INTERP_OP(StridedSlice)
-INTERP_OP(Tanh)
-INTERP_OP(Logistic)
-//INTERP_OP(Div)
+INTERP_OP(ElementwiseActivation)
  //INTERP_OP(Transpose)
  //INTERP_OP(Exp)
  //INTERP_OP(Comparison)
-//INTERP_OP(LogicalAnd)
-//INTERP_OP(LogicalOr)
  //INTERP_OP(LogicalNot)
  //INTERP_OP(LSTM)
  //INTERP_OP(RSQRT)
-INTERP_OP(ReLU)
  //INTERP_OP(ResizeBilinear)
-INTERP_OP(ReLU1)
-INTERP_OP(ReLU6)
  //INTERP_OP(RNN)
  //INTERP_OP(Floor)
  //INTERP_OP(SpaceToBatchND)
  //INTERP_OP(SpaceToDepth)
-//INTERP_OP(L2Pool2D)
  //INTERP_OP(EmbeddingLookup)
  //INTERP_OP(L2Normalization)
  //INTERP_OP(HashtableLookup)
@@ -81,6 +70,4 @@ INTERP_OP(Gather)
  INTERP_OP(Pad)
  //INTERP_OP(Custom)
  //INTERP_OP(Permute)
-//INTERP_OP(Min)
-//INTERP_OP(Max)
  //INTERP_OP(OneHot)
diff --git a/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc b/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc

index 44c9554..86e8835 100644 (file)
--- a/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc
+++ b/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc
@@ -19,9 +19,7 @@
  #include "OperationUtil.h"
  
  #include "interp/Registration.h"
-#include "ir/operation/Add.h"
-#include "ir/operation/Sub.h"
-#include "ir/operation/Mul.h"
+#include "ir/operation/BinaryArithmetic.h"
  #include "misc/polymorphic_downcast.h"
  #include "cker/Types.h"
  
@@ -39,12 +37,13 @@ enum class OpType
    MUL
  };
  
-template <typename node_type> void prepareAdd(ExecEnv *env, const ir::Operation &node)
+void prepare(ExecEnv *env, const ir::Operation &node)
  {
-  const auto &add_node = nnfw::misc::polymorphic_downcast<const node_type &>(node);
+  const auto &arithmetic_node =
+      nnfw::misc::polymorphic_downcast<const ir::operation::BinaryArithmetic &>(node);
  
-  const auto lhs_index = node.getInputs().at(add_node.LHS);
-  const auto rhs_index = node.getInputs().at(add_node.RHS);
+  const auto lhs_index = node.getInputs().at(arithmetic_node.LHS);
+  const auto rhs_index = node.getInputs().at(arithmetic_node.RHS);
    const auto out_index = node.getOutputs().at(0);
  
    const auto lhs_tensor = env->tensorAt(lhs_index);
@@ -54,7 +53,7 @@ template <typename node_type> void prepareAdd(ExecEnv *env, const ir::Operation
    // TODO Util function to compare TensorInfo
    if (lhs_tensor->data_type() != rhs_tensor->data_type())
    {
-    throw std::runtime_error{"Interp(Add): Different input types"};
+    throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Different input types"};
    }
  
    bool try_broadcast = (lhs_tensor->tensorInfo().shape() != rhs_tensor->tensorInfo().shape());
@@ -65,7 +64,7 @@ template <typename node_type> void prepareAdd(ExecEnv *env, const ir::Operation
                                          rhs_tensor->tensorInfo().shape(), success);
      if (!success)
      {
-      throw std::runtime_error{"Interp(Add): Fail to brodcasting"};
+      throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Fail to brodcasting"};
      }
  
      auto output_info =
@@ -86,7 +85,7 @@ template <typename node_type> void prepareAdd(ExecEnv *env, const ir::Operation
    // TODO Util function to compare TensorInfo
    if (lhs_tensor->data_type() != out_tensor->data_type())
    {
-    throw std::runtime_error{"Interp(Add): Invalid output type"};
+    throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Invalid output type"};
    }
  }
  
@@ -103,9 +102,9 @@ inline void setActivationParams(int32_t min, int32_t max,
    params->quantized_activation_max = max;
  }
  
-template <typename raw_type, typename param_type, OpType op_type>
+template <typename raw_type, OpType op_type>
  void invoke(const ITensor *lhs_tensor, const ITensor *rhs_tensor, const ITensor *out_tensor,
-            const param_type &param)
+            const ir::operation::BinaryArithmetic::Param &param)
  {
    const auto lhs_buffer = lhs_tensor->bufferRO();
    const auto rhs_buffer = rhs_tensor->bufferRO();
@@ -146,13 +145,11 @@ void invoke(const ITensor *lhs_tensor, const ITensor *rhs_tensor, const ITensor
                                                 out_shape, out_ptr);
  }
  
-template <typename node_type, typename param_type, OpType op_type>
-void invokeAdd(const ExecEnv *env, const ir::Operation &node)
+template <OpType op_type>
+void invokeBinaryArithmetic(const ExecEnv *env, const ir::operation::BinaryArithmetic &node)
  {
-  const auto &arithmetic_node = nnfw::misc::polymorphic_downcast<const node_type &>(node);
-
-  const auto lhs_index = node.getInputs().at(arithmetic_node.LHS);
-  const auto rhs_index = node.getInputs().at(arithmetic_node.RHS);
+  const auto lhs_index = node.getInputs().at(node.LHS);
+  const auto rhs_index = node.getInputs().at(node.RHS);
    const auto out_index = node.getOutputs().at(0);
    const auto lhs_tensor = env->tensorAt(lhs_index);
    const auto rhs_tensor = env->tensorAt(rhs_index);
@@ -161,38 +158,46 @@ void invokeAdd(const ExecEnv *env, const ir::Operation &node)
  
    if (data_type == ir::DataType::INT32)
    {
-    invoke<int32_t, param_type, op_type>(lhs_tensor, rhs_tensor, out_tensor,
-                                         arithmetic_node.param());
+    invoke<int32_t, op_type>(lhs_tensor, rhs_tensor, out_tensor, node.param());
    }
    else if (data_type == ir::DataType::FLOAT32)
    {
-    invoke<float, param_type, op_type>(lhs_tensor, rhs_tensor, out_tensor, arithmetic_node.param());
+    invoke<float, op_type>(lhs_tensor, rhs_tensor, out_tensor, node.param());
    }
    else
    {
      throw std::runtime_error{"NYI: Unsupported data type"};
    }
  }
-} // namespace
  
-OpKernel *getAdd()
+void invokeBinaryArithmeticOps(const ExecEnv *env, const ir::Operation &node)
  {
-  static OpKernel kernel = {prepareAdd<ir::operation::Add>,
-                            invokeAdd<ir::operation::Add, ir::operation::Add::Param, OpType::ADD>};
-  return &kernel;
-}
+  const auto &arithmetic_node =
+      nnfw::misc::polymorphic_downcast<const ir::operation::BinaryArithmetic &>(node);
  
-OpKernel *getSub()
-{
-  static OpKernel kernel = {prepareAdd<ir::operation::Sub>,
-                            invokeAdd<ir::operation::Sub, ir::operation::Sub::Param, OpType::SUB>};
-  return &kernel;
+  switch (arithmetic_node.param().arithmetic_type)
+  {
+    case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+      invokeBinaryArithmetic<OpType::ADD>(env, arithmetic_node);
+      break;
+    case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+      invokeBinaryArithmetic<OpType::SUB>(env, arithmetic_node);
+      break;
+    case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+      invokeBinaryArithmetic<OpType::MUL>(env, arithmetic_node);
+      break;
+    default:
+      throw std::runtime_error{"Interp(BinaryArithmetic): NYI unsupported operation " +
+                               arithmetic_node.name()};
+      break;
+  }
  }
  
-OpKernel *getMul()
+} // namespace
+
+OpKernel *getBinaryArithmetic()
  {
-  static OpKernel kernel = {prepareAdd<ir::operation::Mul>,
-                            invokeAdd<ir::operation::Mul, ir::operation::Mul::Param, OpType::MUL>};
+  static OpKernel kernel = {prepare, invokeBinaryArithmeticOps};
    return &kernel;
  }
  
diff --git a/runtime/onert/core/src/interp/operations/UnaryActivations.cc b/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc

similarity index 60%

rename from runtime/onert/core/src/interp/operations/UnaryActivations.cc

rename to runtime/onert/core/src/interp/operations/ElementwiseActivations.cc

index ea5e241..c8773be 100644 (file)
--- a/runtime/onert/core/src/interp/operations/UnaryActivations.cc
+++ b/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc
@@ -20,10 +20,11 @@
  
  #include "interp/Registration.h"
  
-#include "ir/operation/ReLU.h"
-#include "ir/operation/ReLU1.h"
-#include "ir/operation/ReLU6.h"
-#include "ir/operation/Tanh.h"
+#include "ir/operation/ElementwiseActivation.h"
+
+#include <misc/polymorphic_downcast.h>
+#include <cker/operation/Logistic.h>
+#include <cker/operation/Tanh.h>
  
  namespace onert
  {
@@ -34,9 +35,8 @@ namespace
  
  enum class ActivationType
  {
+  Logistic,
    ReLU,
-  ReLU1,
-  ReLU6,
    Tanh
  };
  
@@ -65,30 +65,25 @@ void prepare(ExecEnv *env, const ir::Operation &node)
    // TODO Util function to compare TensorInfo
    if (input_tensor->data_type() != output_tensor->data_type())
    {
-    throw std::runtime_error{"Interp(Activations): Invalid output type"};
+    throw std::runtime_error{"Interp(ElementwiseActivation): Invalid output type"};
    }
  }
  
  template <ActivationType act_type>
-void evalFloat(const float *input_ptr, float *output_ptr, uint64_t num_elements)
+void evalFloat(const float *input_ptr, float *output_ptr, uint64_t num_elements, float alpha,
+               float beta)
  {
    std::function<float(const float &)> fn = [](const float &) { return std::nanf(""); };
    switch (act_type)
    {
      case ActivationType::ReLU:
-      fn = [](const float &in) { return std::max(0.f, in); };
-      break;
-    case ActivationType::ReLU1:
-      fn = [](const float &in) { return std::min(std::max(-1.f, in), 1.f); };
-      break;
-    case ActivationType::ReLU6:
-      fn = [](const float &in) { return std::min(std::max(0.f, in), 6.f); };
+      fn = [alpha, beta](const float &in) { return std::min(std::max(beta, in), alpha); };
        break;
      case ActivationType::Tanh:
        fn = [](const float &in) { return std::tanh(in); };
        break;
      default:
-      throw std::runtime_error{"Interp(Activations): NYI - Unsupported activation"};
+      throw std::runtime_error{"Interp(ElementwiseActivation): NYI - Unsupported activation"};
        break;
    }
  
@@ -114,38 +109,51 @@ template <ActivationType act_type> void invoke(const ExecEnv *env, const ir::Ope
      uint64_t elements = input_tensor->num_elements();
      const float *input_start = reinterpret_cast<const float *>(input_tensor->bufferRO());
      float *out = reinterpret_cast<float *>(output_tensor->buffer());
-
-    evalFloat<act_type>(input_start, out, elements);
+    if (act_type == ActivationType::Logistic)
+    {
+      const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
+      const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
+      nnfw::cker::Logistic(cker_input_shape, input_start, cker_output_shape, out);
+    }
+    else
+    {
+      const auto &act_node =
+          nnfw::misc::polymorphic_downcast<const ir::operation::ElementwiseActivation &>(node);
+      evalFloat<act_type>(input_start, out, elements, act_node.param().alpha,
+                          act_node.param().beta);
+    }
    }
    else
    {
-    throw std::runtime_error{"Interp(ReLU6): NYI - Support float only"};
+    throw std::runtime_error{"Interp(" + node.name() + "): NYI - Support float only"};
    }
  }
  
-} // namespace
-
-OpKernel *getReLU()
+void invokeElementwiseActivation(const ExecEnv *env, const ir::Operation &node)
  {
-  static OpKernel kernel = {prepare, invoke<ActivationType::ReLU>};
-  return &kernel;
-}
-
-OpKernel *getReLU1()
-{
-  static OpKernel kernel = {prepare, invoke<ActivationType::ReLU1>};
-  return &kernel;
+  const auto &act_node =
+      nnfw::misc::polymorphic_downcast<const ir::operation::ElementwiseActivation &>(node);
+  switch (act_node.param().op_type)
+  {
+    case ir::operation::ElementwiseActivation::Type::LOGISTIC:
+      invoke<ActivationType::Logistic>(env, node);
+      break;
+    case ir::operation::ElementwiseActivation::Type::RELU:
+      invoke<ActivationType::ReLU>(env, node);
+      break;
+    case ir::operation::ElementwiseActivation::Type::TANH:
+      invoke<ActivationType::Tanh>(env, node);
+      break;
+    default:
+      throw std::runtime_error("Interp(" + node.name() + "): NYI - Unsupported activation");
+  }
  }
  
-OpKernel *getReLU6()
-{
-  static OpKernel kernel = {prepare, invoke<ActivationType::ReLU6>};
-  return &kernel;
-}
+} // namespace
  
-OpKernel *getTanh()
+OpKernel *getElementwiseActivation()
  {
-  static OpKernel kernel = {prepare, invoke<ActivationType::Tanh>};
+  static OpKernel kernel = {prepare, invokeElementwiseActivation};
    return &kernel;
  }
  
diff --git a/runtime/onert/core/src/interp/operations/Logistic.cc b/runtime/onert/core/src/interp/operations/Logistic.cc

deleted file mode 100644 (file)

index c23cbb7..0000000
--- a/runtime/onert/core/src/interp/operations/Logistic.cc
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/Logistic.h>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-#include "ir/operation/Logistic.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void prepareLogistic(ExecEnv *env, const ir::Operation &node)
-{
-  const auto input_index = node.getInputs().at(0);
-  const auto output_index = node.getOutputs().at(0);
-
-  const auto input_tensor = env->tensorAt(input_index);
-
-  const auto output_info = env->graph().operands().at(output_index).info();
-
-  // Check shape and type lhs is same with rhs
-  // TODO Util function to compare TensorInfo
-  if (output_info.total_size() == 0)
-  {
-    throw std::runtime_error{"Interp(TConv): NYI unspecified output shape"};
-  }
-  else
-  {
-    env->allocateIfNeeded(output_index, output_info);
-  }
-
-  const auto output_tensor = env->tensorAt(output_index);
-  if (input_tensor->data_type() != output_tensor->data_type())
-  {
-    throw std::runtime_error{"Interp(Logistic): Invalid output type"};
-  }
-}
-
-void invoke(const ITensor *input_tensor, const ITensor *output_tensor)
-{
-  const auto input_buffer = input_tensor->bufferRO();
-  auto output_buffer = output_tensor->buffer();
-
-  const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape());
-  const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape());
-  const float *input_ptr = reinterpret_cast<const float *>(input_buffer);
-  float *output_ptr = reinterpret_cast<float *>(output_buffer);
-
-  nnfw::cker::Logistic(cker_input_shape, input_ptr, cker_output_shape, output_ptr);
-}
-
-void invokeLogistic(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto input_index = node.getInputs().at(0);
-  const auto output_index = node.getOutputs().at(0);
-
-  const auto input_tensor = env->tensorAt(input_index);
-  const auto output_tensor = env->tensorAt(output_index);
-
-  const auto data_type = input_tensor->data_type();
-
-  if (data_type == ir::DataType::FLOAT32)
-  {
-    invoke(input_tensor, output_tensor);
-  }
-  else
-  {
-    throw std::runtime_error{"Interp(Logistic): NYI - Unsupported data type"};
-  }
-}
-} // namespace
-
-OpKernel *getLogistic()
-{
-  static OpKernel kernel = {prepareLogistic, invokeLogistic};
-  return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/MaxPool2D.cc b/runtime/onert/core/src/interp/operations/MaxPool2D.cc

deleted file mode 100644 (file)

index 313948f..0000000
--- a/runtime/onert/core/src/interp/operations/MaxPool2D.cc
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cker/operation/MaxPool.h>
-
-#include "OperationUtil.h"
-
-#include "interp/Registration.h"
-#include "ir/operation/MaxPool2D.h"
-#include "util/Utils.h"
-#include "util/ShapeInference.h"
-#include "misc/polymorphic_downcast.h"
-
-namespace onert
-{
-namespace interp
-{
-namespace
-{
-
-void prepareMaxPool2D(ExecEnv *env, const ir::Operation &node)
-{
-  const auto in_index = node.getInputs().at(0);
-  const auto out_index = node.getOutputs().at(0);
-
-  const auto in_tensor = env->tensorAt(in_index);
-
-  assert(in_tensor->num_dimensions() == 4);
-  UNUSED_RELEASE(in_tensor);
-
-  const auto output_info = env->graph().operands().at(out_index).info();
-  if (output_info.total_size() == 0)
-  {
-    // Handle unspecified output shape
-    const auto &maxpool_node =
-        nnfw::misc::polymorphic_downcast<const ir::operation::MaxPool2D &>(node);
-    const auto infered_output_shape =
-        shape_inference::inferMaxPoolShape(in_tensor->tensorInfo().shape(), maxpool_node.param());
-    env->allocateIfNeeded(
-        out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
-  }
-  else
-  {
-    env->allocateIfNeeded(out_index, output_info);
-  }
-
-  auto out_tensor = env->tensorAt(out_index);
-  UNUSED_RELEASE(out_tensor);
-
-  // Handle same ifm & ofm data type only
-  assert(in_tensor->data_type() == out_tensor->data_type());
-  assert(out_tensor->num_dimensions() == 4);
-}
-
-void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
-            const ir::operation::MaxPool2D::Param &param)
-{
-  // TODO support NCHW frontend
-  const auto ifm_shape = in_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
-  const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
-  const auto padding =
-      ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, param.kw, param.kh);
-  // Calculate
-  nnfw::cker::PoolParams cker_param;
-  calculateActivationRange(param.activation, &cker_param.float_activation_min,
-                           &cker_param.float_activation_max);
-  cker_param.filter_width = param.kw;
-  cker_param.filter_height = param.kh;
-  cker_param.padding_values.width = padding.left;
-  cker_param.padding_values.height = padding.top;
-  cker_param.stride_width = param.stride.horizontal;
-  cker_param.stride_height = param.stride.vertical;
-
-  const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
-  const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
-  const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
-  float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
-
-  nnfw::cker::MaxPool(cker_param, in_shape, in_ptr, out_shape, out_ptr);
-}
-
-void invokeMaxPool2D(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto &maxpool_node =
-      nnfw::misc::polymorphic_downcast<const ir::operation::MaxPool2D &>(node);
-
-  const auto in_index = node.getInputs().at(0);
-  const auto out_index = node.getOutputs().at(0);
-
-  const auto in_tensor = env->tensorAt(in_index);
-  const auto out_tensor = env->tensorAt(out_index);
-
-  const auto data_type = in_tensor->data_type();
-  if (data_type == ir::DataType::FLOAT32)
-  {
-    invoke(in_tensor, out_tensor, maxpool_node.param());
-  }
-  else
-  {
-    throw std::runtime_error{"NYI: Support float32 only"};
-  }
-}
-} // namespace
-
-OpKernel *getMaxPool2D()
-{
-  static OpKernel kernel = {prepareMaxPool2D, invokeMaxPool2D};
-  return &kernel;
-}
-
-} // namespace interp
-} // namespace onert
diff --git a/runtime/onert/core/src/interp/operations/AvgPool2D.cc b/runtime/onert/core/src/interp/operations/Pool2D.cc

similarity index 58%

rename from runtime/onert/core/src/interp/operations/AvgPool2D.cc

rename to runtime/onert/core/src/interp/operations/Pool2D.cc

index 42fe423..92f9d70 100644 (file)
--- a/runtime/onert/core/src/interp/operations/AvgPool2D.cc
+++ b/runtime/onert/core/src/interp/operations/Pool2D.cc
@@ -15,11 +15,12 @@
   */
  
  #include <cker/operation/AveragePool.h>
+#include <cker/operation/MaxPool.h>
  
  #include "OperationUtil.h"
  
  #include "interp/Registration.h"
-#include "ir/operation/AvgPool2D.h"
+#include "ir/operation/Pool2D.h"
  #include "util/Utils.h"
  #include "util/ShapeInference.h"
  #include "misc/polymorphic_downcast.h"
@@ -28,12 +29,13 @@ namespace onert
  {
  namespace interp
  {
-namespace avgpool2d
+namespace pool2d
  {
  
-void prepareAvgPool2D(ExecEnv *env, const ir::Operation &node)
+void preparePool2D(ExecEnv *env, const ir::Operation &node)
  {
-  const auto in_index = node.getInputs().at(0);
+  const auto &pool_node = nnfw::misc::polymorphic_downcast<const ir::operation::Pool2D &>(node);
+  const auto in_index = node.getInputs().at(pool_node.INPUT);
    const auto out_index = node.getOutputs().at(0);
  
    const auto in_tensor = env->tensorAt(in_index);
@@ -45,10 +47,8 @@ void prepareAvgPool2D(ExecEnv *env, const ir::Operation &node)
    if (output_info.total_size() == 0)
    {
      // Handle unspecified output shape
-    const auto &avgpool_node =
-        nnfw::misc::polymorphic_downcast<const ir::operation::AvgPool2D &>(node);
      const auto infered_output_shape =
-        shape_inference::inferAvgPoolShape(in_tensor->tensorInfo().shape(), avgpool_node.param());
+        shape_inference::inferPoolShape(in_tensor->tensorInfo().shape(), pool_node.param());
      env->allocateIfNeeded(
          out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo()));
    }
@@ -65,18 +65,44 @@ void prepareAvgPool2D(ExecEnv *env, const ir::Operation &node)
    assert(out_tensor->num_dimensions() == 4);
  }
  
-void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
-            const ir::operation::AvgPool2D::Param &param)
+template <typename T>
+void invoke(const nnfw::cker::PoolParams &params, const nnfw::cker::Shape &in_shape,
+            const T *in_ptr, const nnfw::cker::Shape &out_shape, T *out_ptr,
+            ir::operation::Pool2D::PoolType op_type)
  {
-  // TODO Support NCHW frontend
+  switch (op_type)
+  {
+    case ir::operation::Pool2D::PoolType::AVG:
+      nnfw::cker::AveragePool<T>(params, in_shape, in_ptr, out_shape, out_ptr);
+      break;
+    case ir::operation::Pool2D::PoolType::MAX:
+      nnfw::cker::MaxPool<T>(params, in_shape, in_ptr, out_shape, out_ptr);
+      break;
+    default:
+      throw std::runtime_error{"Interp(Pool2D): NYI unsupported operation"};
+      break;
+  }
+}
+
+void invokePool2DOps(const ExecEnv *env, const ir::Operation &node)
+{
+  const auto &pool_node = nnfw::misc::polymorphic_downcast<const ir::operation::Pool2D &>(node);
+
+  const auto in_index = node.getInputs().at(0);
+  const auto out_index = node.getOutputs().at(0);
+
+  // Check lhs shape is same with rhs (with broadcast)
+  const auto in_tensor = env->tensorAt(in_index);
+  const auto out_tensor = env->tensorAt(out_index);
+
+  // TODO support NCHW frontend
    const auto ifm_shape = in_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
    const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC);
+  const auto param = pool_node.param();
    const auto padding =
        ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, param.kw, param.kh);
    // Calculate
    nnfw::cker::PoolParams cker_param;
-  calculateActivationRange(param.activation, &cker_param.float_activation_min,
-                           &cker_param.float_activation_max);
    cker_param.filter_width = param.kw;
    cker_param.filter_height = param.kh;
    cker_param.padding_values.width = padding.left;
@@ -84,41 +110,29 @@ void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
    cker_param.stride_width = param.stride.horizontal;
    cker_param.stride_height = param.stride.vertical;
  
-  const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
-  const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
-  const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
-  float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
-
-  nnfw::cker::AveragePool(cker_param, in_shape, in_ptr, out_shape, out_ptr);
-}
-
-void invokeAvgPool2D(const ExecEnv *env, const ir::Operation &node)
-{
-  const auto &avgpool_node =
-      nnfw::misc::polymorphic_downcast<const ir::operation::AvgPool2D &>(node);
-
-  const auto in_index = node.getInputs().at(0);
-  const auto out_index = node.getOutputs().at(0);
-
-  // Check lhs shape is same with rhs (with broadcast)
-  const auto in_tensor = env->tensorAt(in_index);
-  const auto out_tensor = env->tensorAt(out_index);
-
    const auto data_type = in_tensor->data_type();
    if (data_type == ir::DataType::FLOAT32)
    {
-    invoke(in_tensor, out_tensor, avgpool_node.param());
+    calculateActivationRange(param.activation, &cker_param.float_activation_min,
+                             &cker_param.float_activation_max);
+
+    const auto in_shape = convertShape(in_tensor->tensorInfo().shape());
+    const auto out_shape = convertShape(out_tensor->tensorInfo().shape());
+    const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO());
+    float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer());
+    // Now, invoke() supports only Pool2D in float
+    invoke<float>(cker_param, in_shape, in_ptr, out_shape, out_ptr, param.op_type);
    }
    else
    {
      throw std::runtime_error{"NYI: Support float only"};
    }
  }
-} // namespace avgpool2d
+} // namespace pool2d
  
-OpKernel *getAvgPool2D()
+OpKernel *getPool2D()
  {
-  static OpKernel kernel = {avgpool2d::prepareAvgPool2D, avgpool2d::invokeAvgPool2D};
+  static OpKernel kernel = {pool2d::preparePool2D, pool2d::invokePool2DOps};
    return &kernel;
  }
  
diff --git a/runtime/onert/core/src/interp/operations/Softmax.cc b/runtime/onert/core/src/interp/operations/Softmax.cc

index 6d9359e..d30f78d 100644 (file)
--- a/runtime/onert/core/src/interp/operations/Softmax.cc
+++ b/runtime/onert/core/src/interp/operations/Softmax.cc
@@ -29,43 +29,6 @@ namespace interp
  namespace
  {
  
-void Softmax2D(const float *in, const int input_size, const int batch_size, const float beta,
-               float *out)
-{
-  assert(input_size > 0);
-
-  // For each batch
-  for (int b = 0; b < batch_size; b++)
-  {
-    // Find the max coeff.
-    float max_coeff = in[0];
-    for (int i = 1; i < input_size; i++)
-    {
-      if (in[i] > max_coeff)
-        max_coeff = in[i];
-    }
-
-    // Compute the normalized sum of exps.
-    float exp_sum = 0.0;
-    for (int i = 0; i < input_size; i++)
-    {
-      out[i] = std::exp((in[i] - max_coeff) * beta);
-      exp_sum += out[i];
-    }
-
-    // Divide by the sum of exps.
-    float reciprocal_sum_exp = 1.f / exp_sum;
-    for (int i = 0; i < input_size; i++)
-    {
-      out[i] *= reciprocal_sum_exp;
-    }
-
-    // Advance in and out pointers for the next batch.
-    in += input_size;
-    out += input_size;
-  }
-}
-
  void prepareSoftMax(ExecEnv *env, const ir::Operation &node)
  {
    const auto in_index = node.getInputs().at(0);
@@ -108,7 +71,7 @@ void invoke(const ITensor *in_tensor, const ITensor *out_tensor,
      uint32_t batch_size = in_tensor->dimension(0);
      uint32_t input_size = in_tensor->dimension(1);
  
-    Softmax2D(in_ptr, input_size, batch_size, beta, out_ptr);
+    nnfw::cker::Softmax(in_ptr, input_size, batch_size, beta, out_ptr);
    }
    else if (in_tensor->num_dimensions() == 4)
    {
diff --git a/runtime/onert/core/src/ir/Graph.cc b/runtime/onert/core/src/ir/Graph.cc

index 0db9b61..fe8b1b4 100644 (file)
--- a/runtime/onert/core/src/ir/Graph.cc
+++ b/runtime/onert/core/src/ir/Graph.cc
@@ -56,18 +56,34 @@ void Graph::setOperandValue(const OperandIndex &ind, std::shared_ptr<Data> data)
    _operands.at(ind).data(std::move(data));
  }
  
-void Graph::addInput(const OperandIndex &ind)
+void Graph::addInput(const OperandIndex &ind, const std::string &name)
  {
    assert(isBuildingPhase());
+  if (!name.empty())
+    _name_to_input.emplace(name, IOIndex{_inputs.size()});
    _inputs.append(ind);
  }
  
-void Graph::addOutput(const OperandIndex &ind)
+void Graph::addOutput(const OperandIndex &ind, const std::string &name)
  {
    assert(isBuildingPhase());
+  if (!name.empty())
+    _name_to_output.emplace(name, IOIndex{_outputs.size()});
    _outputs.append(ind);
  }
  
+IOIndex Graph::getInputIndex(const std::string &name) const
+{
+  auto itr = _name_to_input.find(name);
+  return (itr == _name_to_input.end()) ? IOIndex{} : itr->second;
+}
+
+IOIndex Graph::getOutputIndex(const std::string &name) const
+{
+  auto itr = _name_to_output.find(name);
+  return (itr == _name_to_output.end()) ? IOIndex{} : itr->second;
+}
+
  void Graph::finishBuilding(void)
  {
    assert(isBuildingPhase());
diff --git a/runtime/onert/core/src/ir/GraphIterator.cc b/runtime/onert/core/src/ir/GraphIterator.cc

index 2b29a9e..4bea1a5 100644 (file)
--- a/runtime/onert/core/src/ir/GraphIterator.cc
+++ b/runtime/onert/core/src/ir/GraphIterator.cc
@@ -17,7 +17,7 @@
  #include "GraphIterator.h"
  
  #include "ir/OperationIndexMap.h"
-#include "ir/LoweredGraph.h"
+#include "compiler/LoweredGraph.h"
  
  namespace onert
  {
diff --git a/runtime/onert/core/src/ir/GraphIterator.h b/runtime/onert/core/src/ir/GraphIterator.h

index 534ffef..b54314e 100644 (file)
--- a/runtime/onert/core/src/ir/GraphIterator.h
+++ b/runtime/onert/core/src/ir/GraphIterator.h
@@ -23,12 +23,19 @@
  
  namespace onert
  {
+namespace compiler
+{
+class LoweredGraph;
+} // namespace compiler
+} // namespace onert
+
+namespace onert
+{
  namespace ir
  {
  
  class Graph;
  class Operation;
-class LoweredGraph;
  class OpSequence;
  
  template <bool is_const> class Iterator
@@ -65,7 +72,8 @@ public:
    using NodeRef = typename Iterator<is_const>::NodeRef;
    using IterFn = typename Iterator<is_const>::IterFn;
    using LoweredGraphRef =
-      typename std::conditional<is_const, const LoweredGraph &, LoweredGraph &>::type;
+      typename std::conditional<is_const, const typename compiler::LoweredGraph &,
+                                typename compiler::LoweredGraph &>::type;
    using OpSequenceRef = typename std::conditional<is_const, const OpSequence &, OpSequence &>::type;
    using OpSeqIndexRef = const OpSequenceIndex &;
    using OpSeqIterFn = std::function<void(OpSeqIndexRef, OpSequenceRef)>;
diff --git a/runtime/onert/core/src/ir/OpSequences.cc b/runtime/onert/core/src/ir/OpSequences.cc

index a87d31a..6888478 100644 (file)
--- a/runtime/onert/core/src/ir/OpSequences.cc
+++ b/runtime/onert/core/src/ir/OpSequences.cc
@@ -83,15 +83,6 @@ OpSequenceIndex OpSequences::getOperation(const OperationIndex &operation_index)
    return ret;
  }
  
-// TODO: Extract this into external helper function
-void OpSequences::dump(const std::string &msg, const Operations &operations) const
-{
-  VERBOSE(OpSequences) << "OpSequences(" << msg << ")" << std::endl;
-  iterate([&](const OpSequenceIndex &idx, const OpSequence &op_seq) {
-    VERBOSE(OpSequences) << idx.value() << "] " << getStrFromOpSeq(op_seq, operations) << std::endl;
-  });
-}
-
  void OpSequences::removeFromOpSequence(const OperationIndex &operation_index)
  {
    const auto op_seq_index = findOperation(operation_index);
@@ -122,5 +113,12 @@ OpSequenceIndex OpSequences::findOperation(const OperationIndex &operation_index
    throw std::runtime_error("Operation not found");
  }
  
+void dumpOpSequences(const OpSequences &op_seqs, const Operations &operations)
+{
+  op_seqs.iterate([&](const OpSequenceIndex &idx, const OpSequence &op_seq) {
+    VERBOSE(OpSequences) << idx.value() << "] " << getStrFromOpSeq(op_seq, operations) << std::endl;
+  });
+}
+
  } // namespace ir
  } // namespace onert
diff --git a/runtime/onert/core/src/ir/OperationDumper.cc b/runtime/onert/core/src/ir/OperationDumper.cc

index e3cbce5..48361f4 100644 (file)
--- a/runtime/onert/core/src/ir/OperationDumper.cc
+++ b/runtime/onert/core/src/ir/OperationDumper.cc
@@ -27,206 +27,137 @@ namespace ir
  
  using namespace operation;
  
-OperationDumper::OperationDumper(const std::string &start_msg)
+namespace
  {
-  VERBOSE(LIR) << start_msg << std::endl;
-}
-
-void OperationDumper::visit(const Abs &node)
+void dumpUnaryInputOp(const Operation &node, const std::string &adding_input = "")
  {
-  VERBOSE(LIR) << "* Abs" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Abs::Input::INPUT) << ")"
+  VERBOSE(LIR) << "* " << node.name() << std::endl;
+  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(0) << ") " << adding_input
                 << std::endl;
    VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
  }
  
-void OperationDumper::visit(const Add &node)
+void dumpBinaryInputOp(const Operation &node, const std::string &adding_input = "")
  {
-  VERBOSE(LIR) << "* Add" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Add::Input::LHS) << ", "
-               << node.getInputs().at(Add::Input::RHS) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const ArgMax &node)
-{
-  VERBOSE(LIR) << "* ArgMax" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(ArgMax::Input::INPUT) << ")"
-               << std::endl;
+  VERBOSE(LIR) << "* " << node.name() << std::endl;
+  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(0) << ", " << node.getInputs().at(0)
+               << ") " << adding_input << std::endl;
    VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
  }
  
-void OperationDumper::visit(const AvgPool2D &node)
+void dumpConvOp(const Operation &node, const std::string &padding_type)
  {
-  VERBOSE(LIR) << "* AvgPool2D(Implicit)" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : IFM(" << node.getInputs().at(AvgPool2D::Input::INPUT) << ")"
-               << std::endl;
+  VERBOSE(LIR) << "* " << node.name() << "(" << padding_type << ")" << std::endl;
+  VERBOSE(LIR) << "  - Inputs : IFM(" << node.getInputs().at(Conv2D::Input::INPUT) << ") Kernel("
+               << node.getInputs().at(Conv2D::Input::KERNEL) << ") Bias("
+               << node.getInputs().at(Conv2D::Input::BIAS) << ")" << std::endl;
    VERBOSE(LIR) << "  - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
  }
  
-void OperationDumper::visit(const BatchToSpaceND &node)
+void dumpPackingOp(const Operation &node)
  {
-  VERBOSE(LIR) << "* BatchToSpaceND" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(BatchToSpaceND::Input::INPUT) << ")"
-               << " BlockSize(" << node.getInputs().at(BatchToSpaceND::Input::BLOCK_SIZE) << ")"
-               << std::endl;
+  VERBOSE(LIR) << "* " << node.name() << std::endl;
+  std::string inputs;
+  for (auto i : node.getInputs())
+  {
+    inputs += std::to_string(i.value()) + ",";
+  }
+  VERBOSE(LIR) << "  - Inputs : Inputs(" << inputs << ")" << std::endl;
    VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
  }
+} // namespace
  
-void OperationDumper::visit(const operation::BroadcastTo &node)
+OperationDumper::OperationDumper(const std::string &start_msg)
  {
-  VERBOSE(LIR) << "* BroadcastTo" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(BroadcastTo::Input::INPUT) << ", "
-               << node.getInputs().at(BroadcastTo::Input::SHAPE) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+  VERBOSE(LIR) << start_msg << std::endl;
  }
  
-void OperationDumper::visit(const Cast &node)
-{
-  VERBOSE(LIR) << "* Cast" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Cast::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const ArgMax &node) { dumpUnaryInputOp(node); }
  
-void OperationDumper::visit(const Comparison &node)
+void OperationDumper::visit(const BatchToSpaceND &node)
  {
-  VERBOSE(LIR) << "* Comparison" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Comparison::Input::INPUT0) << ", "
-               << node.getInputs().at(Comparison::Input::INPUT1) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+  std::string block_size =
+      "BlockSize(" +
+      std::to_string(node.getInputs().at(BatchToSpaceND::Input::BLOCK_SIZE).value()) + ")";
+  dumpUnaryInputOp(node, block_size);
  }
  
-void OperationDumper::visit(const Concat &node)
-{
-  VERBOSE(LIR) << "* Concat" << std::endl;
-  std::string inputs;
-  for (auto i : node.getInputs())
-  {
-    inputs += std::to_string(i.value()) + ",";
-  }
-  VERBOSE(LIR) << "  - Inputs : IFM(" << inputs << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const BinaryArithmetic &node) { dumpBinaryInputOp(node); }
+
+void OperationDumper::visit(const operation::BroadcastTo &node) { dumpBinaryInputOp(node); }
+
+void OperationDumper::visit(const Comparison &node) { dumpBinaryInputOp(node); }
+
+void OperationDumper::visit(const Concat &node) { dumpPackingOp(node); }
  
  void OperationDumper::visit(const Conv2D &node)
  {
    std::string padding_type =
        node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
-  VERBOSE(LIR) << "* Conv2D(" << padding_type << ")" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : IFM(" << node.getInputs().at(Conv2D::Input::INPUT) << ") Kernel("
-               << node.getInputs().at(Conv2D::Input::KERNEL) << ") Bias("
-               << node.getInputs().at(Conv2D::Input::BIAS) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
+  dumpConvOp(node, padding_type);
  }
  
-void OperationDumper::visit(const ConvertFp16ToFp32 &node)
-{
-  VERBOSE(LIR) << "* ConvertFp16ToFp32" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(ConvertFp16ToFp32::Input::INPUT)
-               << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const ConvertFp16ToFp32 &node) { dumpUnaryInputOp(node); }
  
-void OperationDumper::visit(const ConvertFp32ToFp16 &node)
-{
-  VERBOSE(LIR) << "* ConvertFp32ToFp16" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(ConvertFp32ToFp16::Input::INPUT)
-               << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Cos &node)
-{
-  VERBOSE(LIR) << "* Cos" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Cos::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const ConvertFp32ToFp16 &node) { dumpUnaryInputOp(node); }
  
-void OperationDumper::visit(const DepthToSpace &node)
-{
-  VERBOSE(LIR) << "* DepthToSpace" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(DepthToSpace::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const DepthToSpace &node) { dumpUnaryInputOp(node); }
  
  void OperationDumper::visit(const DepthwiseConv2D &node)
  {
    std::string padding_type =
        node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
-  VERBOSE(LIR) << "* DepthwiseConv2D(" << padding_type << ")" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : IFM(" << node.getInputs().at(DepthwiseConv2D::Input::INPUT)
-               << ") Kernel(" << node.getInputs().at(DepthwiseConv2D::Input::KERNEL) << ") Bias("
-               << node.getInputs().at(DepthwiseConv2D::Input::BIAS) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
+  dumpConvOp(node, padding_type);
  }
  
-void OperationDumper::visit(const Dequantize &node)
+void OperationDumper::visit(const ElementwiseActivation &node)
  {
-  VERBOSE(LIR) << "* Dequantize" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Dequantize::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+  std::string params;
+  if (node.param().op_type == ElementwiseActivation::Type::RELU)
+  {
+    params = " lower value(" + std::to_string(node.param().alpha) + ") upper value(" +
+             std::to_string(node.param().beta) + ")";
+  }
+  else if (node.param().op_type == ElementwiseActivation::Type::LEAKY_RELU)
+  {
+    params = " alpha value(" + std::to_string(node.param().alpha) + ")";
+  }
+  dumpUnaryInputOp(node, params);
  }
  
-void OperationDumper::visit(const Div &node)
-{
-  VERBOSE(LIR) << "* Div" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Div::Input::LHS) << ", "
-               << node.getInputs().at(Div::Input::RHS) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const ElementwiseBinary &node) { dumpBinaryInputOp(node); }
+
+void OperationDumper::visit(const ElementwiseUnary &node) { dumpUnaryInputOp(node); }
  
  void OperationDumper::visit(const EmbeddingLookup &node)
  {
-  VERBOSE(LIR) << "* EmbeddingLookup" << std::endl;
+  VERBOSE(LIR) << "* " << node.name() << std::endl;
    VERBOSE(LIR) << "  - Inputs : Lookups(" << node.getInputs().at(EmbeddingLookup::Input::LOOKUPS)
                 << ") VALUES(" << node.getInputs().at(EmbeddingLookup::Input::VALUES) << ")"
                 << std::endl;
    VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
  }
  
-void OperationDumper::visit(const Exp &node)
-{
-  VERBOSE(LIR) << "* Exp" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Exp::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
  void OperationDumper::visit(const ExpandDims &node)
  {
-  VERBOSE(LIR) << "* ExpandDims" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(ExpandDims::Input::INPUT)
-               << ") AXIS(" << node.getInputs().at(ExpandDims::Input::AXIS) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Floor &node)
-{
-  VERBOSE(LIR) << "* Floor" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Floor::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+  std::string axis =
+      "AXIS(" + std::to_string(node.getInputs().at(ExpandDims::Input::AXIS).value()) + ")";
+  dumpUnaryInputOp(node, axis);
  }
  
  void OperationDumper::visit(const FullyConnected &node)
  {
-  VERBOSE(LIR) << "* FullyConnected" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : IFM(" << node.getInputs().at(FullyConnected::Input::INPUT)
-               << ") Weight(" << node.getInputs().at(FullyConnected::Input::WEIGHT) << ") Bias("
-               << node.getInputs().at(FullyConnected::Input::BIAS) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
+  std::string inputs =
+      "Weight(" + std::to_string(node.getInputs().at(FullyConnected::Input::WEIGHT).value()) +
+      ") Bias(" + std::to_string(node.getInputs().at(FullyConnected::Input::BIAS).value()) + ")";
+  dumpUnaryInputOp(node, inputs);
  }
  
  void OperationDumper::visit(const Gather &node)
  {
-  VERBOSE(LIR) << "* Gather" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Gather::Input::INPUT) << ") Indices("
-               << node.getInputs().at(Gather::Input::INDICES) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+  std::string indices =
+      "Indices(" + std::to_string(node.getInputs().at(Gather::Input::INDICES).value()) + ")";
+  dumpUnaryInputOp(node, indices);
  }
  
  void OperationDumper::visit(const HashtableLookup &node)
@@ -242,36 +173,15 @@ void OperationDumper::visit(const HashtableLookup &node)
  
  void OperationDumper::visit(const InstanceNorm &node)
  {
-  VERBOSE(LIR) << "* InstanceNorm" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : IFM(" << node.getInputs().at(InstanceNorm::Input::INPUT)
-               << ") Gamma(" << node.getInputs().at(InstanceNorm::Input::GAMMA) << ") Beta("
-               << node.getInputs().at(InstanceNorm::Input::BETA) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const L2Normalization &node)
-{
-  VERBOSE(LIR) << "* L2Normalization" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(L2Normalization::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+  std::string inputs =
+      "Gamma(" + std::to_string(node.getInputs().at(InstanceNorm::Input::GAMMA).value()) +
+      ") Beta(" + std::to_string(node.getInputs().at(InstanceNorm::Input::BETA).value()) + ")";
+  dumpUnaryInputOp(node, inputs);
  }
  
-void OperationDumper::visit(const L2Pool2D &node)
-{
-  VERBOSE(LIR) << "* L2Pool2D" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(L2Pool2D::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const L2Normalization &node) { dumpUnaryInputOp(node); }
  
-void OperationDumper::visit(const LocalResponseNormalization &node)
-{
-  VERBOSE(LIR) << "* LocalResponseNormalization" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input("
-               << node.getInputs().at(LocalResponseNormalization::Input::INPUT) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const LocalResponseNormalization &node) { dumpUnaryInputOp(node); }
  
  void OperationDumper::visit(const LSTM &node)
  {
@@ -307,93 +217,12 @@ void OperationDumper::visit(const LSTM &node)
                 << node.getInputs().at(LSTM::Output::OUTPUT) << ")" << std::endl;
  }
  
-void OperationDumper::visit(const Log &node)
-{
-  VERBOSE(LIR) << "* Log" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Log::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const LogicalAnd &node)
-{
-  VERBOSE(LIR) << "* LogicalAnd" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(LogicalAnd::Input::INPUT0) << ", "
-               << node.getInputs().at(LogicalAnd::Input::INPUT1) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const LogicalNot &node)
-{
-  VERBOSE(LIR) << "* LogicalNot" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(LogicalNot::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const LogicalOr &node)
-{
-  VERBOSE(LIR) << "* LogicalOr" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(LogicalOr::Input::INPUT0) << ", "
-               << node.getInputs().at(LogicalOr::Input::INPUT1) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Logistic &node)
-{
-  VERBOSE(LIR) << "* Logistic" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Logistic::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const MaxPool2D &node)
-{
-  std::string padding_type =
-      node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
-  VERBOSE(LIR) << "* MaxPool2D(" << padding_type << ")" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : IFM(" << node.getInputs().at(MaxPool2D::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Mul &node)
-{
-  VERBOSE(LIR) << "* Mul" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Mul::Input::LHS) << ", "
-               << node.getInputs().at(Mul::Input::RHS) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Neg &node)
-{
-  VERBOSE(LIR) << "* Neg" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Neg::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Pack &node)
-{
-  VERBOSE(LIR) << "* Pack" << std::endl;
-  std::string inputs;
-  const auto &input_indices = node.getInputs();
-  for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it)
-  {
-    inputs += std::to_string(it->value());
-    if (std::next(it) != std::end(input_indices))
-      inputs += ", ";
-  }
-  VERBOSE(LIR) << "  - Inputs : Inputs(" << inputs << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Pack &node) { dumpPackingOp(node); }
  
  void OperationDumper::visit(const Pad &node)
  {
-  VERBOSE(LIR) << "* Pad" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Pad::Input::INPUT) << ") Pad("
-               << node.getInputs().at(Pad::Input::PAD) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+  std::string pad = "Pad(" + std::to_string(node.getInputs().at(Pad::Input::PAD).value()) + ")";
+  dumpUnaryInputOp(node, pad);
  }
  
  void OperationDumper::visit(const Permute &node)
@@ -417,86 +246,46 @@ void OperationDumper::visit(const Permute &node)
    VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
  }
  
-void OperationDumper::visit(const Pow &node)
+void OperationDumper::visit(const Pool2D &node)
  {
-  VERBOSE(LIR) << "* Pow" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Pow::Input::LHS) << ", "
-               << node.getInputs().at(Pow::Input::RHS) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const PReLU &node)
-{
-  VERBOSE(LIR) << "* PReLU" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(PReLU::Input::INPUT) << ") Alpha("
-               << node.getInputs().at(PReLU::Input::ALPHA) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Reduce &node)
-{
-  VERBOSE(LIR) << "* " + node.name() << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Reduce::Input::INPUT) << ")"
+  std::string padding_type =
+      node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit";
+  VERBOSE(LIR) << "* " << node.name() << "(" << padding_type << ")" << std::endl;
+  VERBOSE(LIR) << "  - Inputs : IFM(" << node.getInputs().at(Pool2D::Input::INPUT) << ")"
                 << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+  VERBOSE(LIR) << "  - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
  }
  
-void OperationDumper::visit(const ReLU &node)
-{
-  VERBOSE(LIR) << "* ReLU" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(ReLU::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Pow &node) { dumpBinaryInputOp(node); }
  
-void OperationDumper::visit(const ReLU1 &node)
+void OperationDumper::visit(const PReLU &node)
  {
-  VERBOSE(LIR) << "* ReLU1" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(ReLU1::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+  std::string alpha =
+      "Alpha(" + std::to_string(node.getInputs().at(PReLU::Input::ALPHA).value()) + ")";
+  dumpUnaryInputOp(node, alpha);
  }
  
-void OperationDumper::visit(const ReLU6 &node)
-{
-  VERBOSE(LIR) << "* ReLU6" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(ReLU6::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Rank &node) { dumpUnaryInputOp(node); }
+
+void OperationDumper::visit(const Reduce &node) { dumpUnaryInputOp(node); }
  
  void OperationDumper::visit(const Reshape &node)
  {
-  VERBOSE(LIR) << "* Reshape" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Reshape::Input::INPUT) << ")";
    // optional param
-  if (node.getInputs().size() == 2)
-  {
-    VERBOSE(LIR) << " Shape(" << node.getInputs().at(Reshape::Input::SHAPE) << ")";
-  }
-  else
-  {
-    VERBOSE(LIR) << " Shape(not provided)";
-  }
-  VERBOSE(LIR) << std::endl;
-
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+  std::string shape =
+      node.getInputs().size() == 2
+          ? "Shape(" + std::to_string(node.getInputs().at(Reshape::Input::SHAPE).value()) + ")"
+          : "Shape(not provided)";
+  dumpUnaryInputOp(node, shape);
  }
  
-void OperationDumper::visit(const ResizeBilinear &node)
-{
-  VERBOSE(LIR) << "* ResizeBilinear" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(ResizeBilinear::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const ResizeBilinear &node) { dumpUnaryInputOp(node); }
  
  void OperationDumper::visit(const Reverse &node)
  {
-  VERBOSE(LIR) << "* Reverse" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Reverse::Input::INPUT) << ") Axis("
-               << node.getInputs().at(Reverse::Input::AXIS) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+  std::string axis =
+      "Axis(" + std::to_string(node.getInputs().at(Reverse::Input::AXIS).value()) + ")";
+  dumpUnaryInputOp(node, axis);
  }
  
  void OperationDumper::visit(const RNN &node)
@@ -512,162 +301,65 @@ void OperationDumper::visit(const RNN &node)
                 << std::endl;
  }
  
-void OperationDumper::visit(const Round &node)
-{
-  VERBOSE(LIR) << "* Round" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Round::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
  void OperationDumper::visit(const Range &node)
  {
    VERBOSE(LIR) << "* Range" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Range::Input::START) << ")"
+  VERBOSE(LIR) << "  - Inputs : Start(" << node.getInputs().at(Range::Input::START) << ")"
                 << " Limit(" << node.getInputs().at(Range::Input::LIMIT) << ")"
                 << " Delta(" << node.getInputs().at(Range::Input::DELTA) << ")" << std::endl;
    VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
  }
  
-void OperationDumper::visit(const RSQRT &node)
-{
-  VERBOSE(LIR) << "* RSQRT" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(RSQRT::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
  void OperationDumper::visit(const Select &node)
  {
    VERBOSE(LIR) << "* Select" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Select::Input::CONDITION) << ")"
+  VERBOSE(LIR) << "  - Inputs : Condition(" << node.getInputs().at(Select::Input::CONDITION) << ")"
                 << " Input_X(" << node.getInputs().at(Select::Input::INPUT_TRUE) << ")"
                 << " Input_Y(" << node.getInputs().at(Select::Input::INPUT_FALSE) << ")"
                 << std::endl;
    VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
  }
  
-void OperationDumper::visit(const ir::operation::Shape &node)
-{
-  VERBOSE(LIR) << "* Shape" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(ir::operation::Shape::Input::INPUT)
-               << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Sin &node)
-{
-  VERBOSE(LIR) << "* Sin" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Sin::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const ir::operation::Shape &node) { dumpUnaryInputOp(node); }
  
-void OperationDumper::visit(const Softmax &node)
-{
-  VERBOSE(LIR) << "* Softmax" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Softmax::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Softmax &node) { dumpUnaryInputOp(node); }
  
  void OperationDumper::visit(const SpaceToBatchND &node)
  {
-  VERBOSE(LIR) << "* SpaceToBatchND" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(SpaceToBatchND::Input::INPUT)
-               << ") BlockSize(" << node.getInputs().at(SpaceToBatchND::Input::BLOCK_SIZE)
-               << ") Paddings(" << node.getInputs().at(SpaceToBatchND::Input::PADDINGS) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+  std::string inputs =
+      "BlockSize(" +
+      std::to_string(node.getInputs().at(SpaceToBatchND::Input::BLOCK_SIZE).value()) +
+      ") Paddings(" + std::to_string(node.getInputs().at(SpaceToBatchND::Input::PADDINGS).value()) +
+      ")";
+  dumpUnaryInputOp(node, inputs);
  }
  
-void OperationDumper::visit(const SpaceToDepth &node)
-{
-  VERBOSE(LIR) << "* SpaceToDepth" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(SpaceToDepth::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const SpaceToDepth &node) { dumpUnaryInputOp(node); }
  
-void OperationDumper::visit(const Split &node)
-{
-  VERBOSE(LIR) << "* Split" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Split::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const SQRT &node)
-{
-  VERBOSE(LIR) << "* SQRT" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(SQRT::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Split &node) { dumpUnaryInputOp(node); }
  
-void OperationDumper::visit(const SquaredDifference &node)
-{
-  VERBOSE(LIR) << "* SquaredDifference" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(SquaredDifference::Input::LHS)
-               << ", " << node.getInputs().at(SquaredDifference::Input::RHS) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const SquaredDifference &node) { dumpBinaryInputOp(node); }
  
  void OperationDumper::visit(const StatelessRandomUniform &node)
  {
    VERBOSE(LIR) << "* StatelessRandomUniform" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(StatelessRandomUniform::Input::SHAPE)
-               << ", " << node.getInputs().at(StatelessRandomUniform::Input::SEED) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Squeeze &node)
-{
-  VERBOSE(LIR) << "* Squeeze" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Squeeze::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Slice &node)
-{
-  VERBOSE(LIR) << "* Slice" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Slice::Input::INPUT) << ")"
+  VERBOSE(LIR) << "  - Inputs : Shape(" << node.getInputs().at(StatelessRandomUniform::Input::SHAPE)
+               << " Seed(" << node.getInputs().at(StatelessRandomUniform::Input::SEED) << ")"
                 << std::endl;
    VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
  }
  
-void OperationDumper::visit(const StridedSlice &node)
-{
-  VERBOSE(LIR) << "* StridedSlice" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(StridedSlice::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Squeeze &node) { dumpUnaryInputOp(node); }
  
-void OperationDumper::visit(const Sub &node)
-{
-  VERBOSE(LIR) << "* Sub" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Sub::Input::LHS) << ", "
-               << node.getInputs().at(Sub::Input::RHS) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Slice &node) { dumpUnaryInputOp(node); }
  
-void OperationDumper::visit(const Tanh &node)
-{
-  VERBOSE(LIR) << "* TanH" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Tanh::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const StridedSlice &node) { dumpUnaryInputOp(node); }
  
  void OperationDumper::visit(const Tile &node)
  {
-  VERBOSE(LIR) << "* Tile" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Tile::Input::INPUT) << ", "
-               << node.getInputs().at(Tile::Input::MULTIPLES) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+  std::string multiples =
+      "Multiples(" + std::to_string(node.getInputs().at(Tile::Input::MULTIPLES).value()) + ")";
+  dumpUnaryInputOp(node, multiples);
  }
  
  void OperationDumper::visit(const TopKV2 &node)
@@ -692,17 +384,11 @@ void OperationDumper::visit(const TransposeConv &node)
    VERBOSE(LIR) << "  - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
  }
  
-void OperationDumper::visit(const Transpose &node)
-{
-  VERBOSE(LIR) << "* Transpose" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Transpose::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
+void OperationDumper::visit(const Transpose &node) { dumpUnaryInputOp(node); }
  
  void OperationDumper::visit(const Unpack &node)
  {
-  VERBOSE(LIR) << "* Unpack" << std::endl;
+  VERBOSE(LIR) << "* " << node.name() << std::endl;
    VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Unpack::Input::INPUT) << ")"
                 << std::endl;
    std::string outputs;
@@ -716,25 +402,9 @@ void OperationDumper::visit(const Unpack &node)
    VERBOSE(LIR) << "  - Outputs : Outputs(" << outputs << ")" << std::endl;
  }
  
-void OperationDumper::visit(const Min &node)
-{
-  VERBOSE(LIR) << "* Min" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Min::Input::LHS) << ", "
-               << node.getInputs().at(Min::Input::RHS) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
-void OperationDumper::visit(const Max &node)
-{
-  VERBOSE(LIR) << "* Max" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Max::Input::LHS) << ", "
-               << node.getInputs().at(Max::Input::RHS) << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
  void OperationDumper::visit(const OneHot &node)
  {
-  VERBOSE(LIR) << "* OneHot" << std::endl;
+  VERBOSE(LIR) << "* " << node.name() << std::endl;
    VERBOSE(LIR) << "  - Inputs : "
                 << "Indices(" << node.getInputs().at(OneHot::Input::INDICES) << ") " << std::endl;
    VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
@@ -742,7 +412,7 @@ void OperationDumper::visit(const OneHot &node)
  
  void OperationDumper::visit(const If &node)
  {
-  VERBOSE(LIR) << "* If" << std::endl;
+  VERBOSE(LIR) << "* " << node.name() << std::endl;
    std::string inputs;
    const auto &input_indices = node.getInputs();
    for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it)
@@ -767,7 +437,7 @@ void OperationDumper::visit(const If &node)
  
  void OperationDumper::visit(const While &node)
  {
-  VERBOSE(LIR) << "* While" << std::endl;
+  VERBOSE(LIR) << "* " << node.name() << std::endl;
    std::string inputs;
    const auto &input_indices = node.getInputs();
    for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it)
@@ -790,13 +460,5 @@ void OperationDumper::visit(const While &node)
    VERBOSE(LIR) << "  - Output : Outputs(" << outputs << ")" << std::endl;
  }
  
-void OperationDumper::visit(const ZerosLike &node)
-{
-  VERBOSE(LIR) << "* RoZerosLike" << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(ZerosLike::Input::INPUT) << ")"
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
-
  } // namespace ir
  } // namespace onert
diff --git a/runtime/onert/core/src/ir/OperationDumper.h b/runtime/onert/core/src/ir/OperationDumper.h

index d83f149..e8ab3b3 100644 (file)
--- a/runtime/onert/core/src/ir/OperationDumper.h
+++ b/runtime/onert/core/src/ir/OperationDumper.h
@@ -31,85 +31,61 @@ public:
    OperationDumper(const std::string &start_msg);
  
  public:
-  void visit(const operation::Abs &) override;
-  void visit(const operation::Add &node) override;
    void visit(const operation::ArgMax &) override;
-  void visit(const operation::AvgPool2D &node) override;
    void visit(const operation::BatchToSpaceND &node) override;
+  void visit(const operation::BinaryArithmetic &node) override;
    void visit(const operation::BroadcastTo &) override;
-  void visit(const operation::Cast &) override;
    void visit(const operation::Comparison &) override;
    void visit(const operation::Concat &node) override;
    void visit(const operation::Conv2D &node) override;
    void visit(const operation::ConvertFp16ToFp32 &node) override;
    void visit(const operation::ConvertFp32ToFp16 &node) override;
-  void visit(const operation::Cos &node) override;
    void visit(const operation::DepthToSpace &) override;
    void visit(const operation::DepthwiseConv2D &node) override;
-  void visit(const operation::Dequantize &) override;
-  void visit(const operation::Div &) override;
+  void visit(const operation::ElementwiseActivation &) override;
+  void visit(const operation::ElementwiseBinary &) override;
+  void visit(const operation::ElementwiseUnary &) override;
    void visit(const operation::EmbeddingLookup &) override;
-  void visit(const operation::Exp &) override;
    void visit(const operation::ExpandDims &) override;
-  void visit(const operation::Floor &) override;
    void visit(const operation::FullyConnected &node) override;
    void visit(const operation::Gather &) override;
    void visit(const operation::HashtableLookup &) override;
    void visit(const operation::InstanceNorm &) override;
    void visit(const operation::L2Normalization &) override;
-  void visit(const operation::L2Pool2D &) override;
    void visit(const operation::LocalResponseNormalization &) override;
-  void visit(const operation::Log &) override;
-  void visit(const operation::LogicalAnd &) override;
-  void visit(const operation::LogicalNot &) override;
-  void visit(const operation::LogicalOr &) override;
-  void visit(const operation::Logistic &) override;
    void visit(const operation::LSTM &) override;
-  void visit(const operation::MaxPool2D &node) override;
-  void visit(const operation::Mul &) override;
-  void visit(const operation::Neg &) override;
    void visit(const operation::Pack &) override;
    void visit(const operation::Pad &) override;
    void visit(const operation::Permute &node) override;
+  void visit(const operation::Pool2D &node) override;
    void visit(const operation::Pow &node) override;
    void visit(const operation::PReLU &) override;
    void visit(const operation::Range &) override;
+  void visit(const operation::Rank &) override;
    void visit(const operation::Reduce &) override;
-  void visit(const operation::ReLU &) override;
-  void visit(const operation::ReLU1 &) override;
-  void visit(const operation::ReLU6 &) override;
    void visit(const operation::Reshape &node) override;
    void visit(const operation::ResizeBilinear &) override;
    void visit(const operation::Reverse &) override;
    void visit(const operation::RNN &) override;
-  void visit(const operation::Round &) override;
-  void visit(const operation::RSQRT &) override;
    void visit(const operation::Select &node) override;
    void visit(const operation::Shape &node) override;
-  void visit(const operation::Sin &node) override;
    void visit(const operation::Softmax &node) override;
    void visit(const operation::SpaceToBatchND &) override;
    void visit(const operation::SpaceToDepth &) override;
    void visit(const operation::Split &) override;
-  void visit(const operation::SQRT &) override;
    void visit(const operation::SquaredDifference &) override;
    void visit(const operation::Squeeze &) override;
    void visit(const operation::Slice &) override;
    void visit(const operation::StridedSlice &) override;
    void visit(const operation::StatelessRandomUniform &) override;
-  void visit(const operation::Sub &) override;
-  void visit(const operation::Tanh &) override;
    void visit(const operation::Tile &) override;
    void visit(const operation::TopKV2 &) override;
    void visit(const operation::TransposeConv &) override;
    void visit(const operation::Transpose &) override;
    void visit(const operation::Unpack &) override;
-  void visit(const operation::Min &) override;
-  void visit(const operation::Max &) override;
    void visit(const operation::OneHot &) override;
    void visit(const operation::If &) override;
    void visit(const operation::While &) override;
-  void visit(const operation::ZerosLike &) override;
  };
  
  } // namespace ir
diff --git a/runtime/onert/core/src/ir/Padding.cc b/runtime/onert/core/src/ir/Padding.cc

index 3196991..d74f802 100644 (file)
--- a/runtime/onert/core/src/ir/Padding.cc
+++ b/runtime/onert/core/src/ir/Padding.cc
@@ -50,7 +50,7 @@ inline ExplicitPadding validPadding(void)
  }
  
  inline ExplicitPadding samePaddingUsingIFM(const FeatureShape &ifm_shape, const Stride &stride,
-                                           uint32_t kw, uint32_t kh)
+                                           uint32_t kw, uint32_t kh, uint32_t dwf, uint32_t dhf)
  {
    ExplicitPadding padding;
  
@@ -61,14 +61,19 @@ inline ExplicitPadding samePaddingUsingIFM(const FeatureShape &ifm_shape, const
    // padding_to_beginning = total_padding / 2
    // padding_to_end = (total_padding + 1)/2.
    //
+  const int32_t effective_filter_h_size = (kh - 1) * dhf + 1;
+  const int32_t effective_filter_w_size = (kw - 1) * dwf + 1;
+
    const int32_t vertical_expected_output = (ifm_shape.H + stride.vertical - 1) / stride.vertical;
    const int32_t horizontal_expected_output =
        (ifm_shape.W + stride.horizontal - 1) / stride.horizontal;
  
-  const int32_t vertical_needed_input = (vertical_expected_output - 1) * stride.vertical + kh;
+  const int32_t vertical_needed_input =
+      (vertical_expected_output - 1) * stride.vertical + effective_filter_h_size;
    const int32_t vertical_total_padding = std::max(0, vertical_needed_input - ifm_shape.H);
  
-  const int32_t horizontal_needed_input = (horizontal_expected_output - 1) * stride.horizontal + kw;
+  const int32_t horizontal_needed_input =
+      (horizontal_expected_output - 1) * stride.horizontal + effective_filter_w_size;
    const int32_t horizontal_total_padding = std::max(0, horizontal_needed_input - ifm_shape.W);
  
    padding.top = vertical_total_padding / 2;
@@ -80,7 +85,8 @@ inline ExplicitPadding samePaddingUsingIFM(const FeatureShape &ifm_shape, const
  }
  
  inline ExplicitPadding samePadding(const FeatureShape &ifm_shape, const FeatureShape &ofm_shape,
-                                   const Stride &stride, uint32_t kw, uint32_t kh)
+                                   const Stride &stride, uint32_t kw, uint32_t kh, uint32_t dwf,
+                                   uint32_t dhf)
  {
    const int32_t vertical_expected_output = (ifm_shape.H + stride.vertical - 1) / stride.vertical;
    const int32_t horizontal_expected_output =
@@ -92,7 +98,7 @@ inline ExplicitPadding samePadding(const FeatureShape &ifm_shape, const FeatureS
    UNUSED_RELEASE(vertical_expected_output);
    UNUSED_RELEASE(horizontal_expected_output);
  
-  return samePaddingUsingIFM(ifm_shape, stride, kw, kh);
+  return samePaddingUsingIFM(ifm_shape, stride, kw, kh, dwf, dhf);
  }
  
  } // namespace
@@ -130,7 +136,7 @@ Padding::Padding(uint32_t left, uint32_t right, uint32_t top, uint32_t bottom)
  
  const ExplicitPadding calculatePadding(const Padding &padding, const FeatureShape &ifm_shape,
                                         const FeatureShape &ofm_shape, const Stride &stride,
-                                       uint32_t kw, uint32_t kh)
+                                       uint32_t kw, uint32_t kh, uint32_t dwf, uint32_t dhf)
  {
    if (padding.type == PaddingType::EXPLICIT)
    {
@@ -138,7 +144,7 @@ const ExplicitPadding calculatePadding(const Padding &padding, const FeatureShap
    }
    else if (padding.type == PaddingType::SAME)
    {
-    return samePadding(ifm_shape, ofm_shape, stride, kw, kh);
+    return samePadding(ifm_shape, ofm_shape, stride, kw, kh, dwf, dhf);
    }
    else if (padding.type == PaddingType::VALID)
    {
diff --git a/runtime/onert/core/src/ir/operation/Abs.cc b/runtime/onert/core/src/ir/operation/Abs.cc

deleted file mode 100644 (file)

index b06705d..0000000
--- a/runtime/onert/core/src/ir/operation/Abs.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Abs.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Abs::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Abs::Abs(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/AvgPool2D.cc b/runtime/onert/core/src/ir/operation/AvgPool2D.cc

deleted file mode 100644 (file)

index 28d4fcb..0000000
--- a/runtime/onert/core/src/ir/operation/AvgPool2D.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/AvgPool2D.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void AvgPool2D::accept(OperationVisitor &v) const { v.visit(*this); }
-
-AvgPool2D::AvgPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
-                     const Param &param)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Add.cc b/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc

similarity index 50%

rename from runtime/onert/core/src/ir/operation/Add.cc

rename to runtime/onert/core/src/ir/operation/BinaryArithmetic.cc

index 2fa30f8..2b1422c 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Add.cc
+++ b/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -14,9 +14,10 @@
   * limitations under the License.
   */
  
-#include "ir/operation/Add.h"
+#include "ir/operation/BinaryArithmetic.h"
  
  #include <cassert>
+#include <unordered_map>
  
  #include "ir/OperationVisitor.h"
  
@@ -27,14 +28,25 @@ namespace ir
  namespace operation
  {
  
-void Add::accept(OperationVisitor &v) const { v.visit(*this); }
+void BinaryArithmetic::accept(OperationVisitor &v) const { v.visit(*this); }
  
-Add::Add(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
-         const Param &param)
+BinaryArithmetic::BinaryArithmetic(const OperandIndexSequence &inputs,
+                                   const OperandIndexSequence &outputs, const Param &param)
      : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
  {
  }
  
+std::string BinaryArithmetic::name() const
+{
+  using ArithmeticType = onert::ir::operation::BinaryArithmetic::ArithmeticType;
+  static const std::unordered_map<ArithmeticType, std::string> name_map{
+      {ArithmeticType::ADD, std::string{"Add"}},
+      {ArithmeticType::SUB, std::string{"Sub"}},
+      {ArithmeticType::MUL, std::string{"Mul"}},
+      {ArithmeticType::DIV, std::string{"Div"}}};
+  return name_map.at(_param.arithmetic_type);
+}
+
  } // namespace operation
  } // namespace ir
  } // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Cast.cc b/runtime/onert/core/src/ir/operation/Cast.cc

deleted file mode 100644 (file)

index 09d9c32..0000000
--- a/runtime/onert/core/src/ir/operation/Cast.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Cast.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Cast::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Cast::Cast(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Dequantize.cc b/runtime/onert/core/src/ir/operation/Dequantize.cc

deleted file mode 100644 (file)

index 14d6362..0000000
--- a/runtime/onert/core/src/ir/operation/Dequantize.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Dequantize.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Dequantize::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Dequantize::Dequantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Div.cc b/runtime/onert/core/src/ir/operation/Div.cc

deleted file mode 100644 (file)

index b095d98..0000000
--- a/runtime/onert/core/src/ir/operation/Div.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Div.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Div::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Div::Div(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
-         const Param &param)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc b/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc

new file mode 100644 (file)

index 0000000..f6718b6
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/ElementwiseActivation.h"
+
+#include <cassert>
+#include <unordered_map>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void ElementwiseActivation::accept(OperationVisitor &v) const { v.visit(*this); }
+
+ElementwiseActivation::ElementwiseActivation(const OperandIndexSequence &inputs,
+                                             const OperandIndexSequence &outputs,
+                                             const Param &param)
+    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+{
+  if (param.op_type == Type::LOGISTIC)
+  {
+    assert(param.alpha == 0.0f && param.beta == 0.0f && "Logistic will be supported only as "
+                                                        "sigmoid function(L=1, k=1, x0=0). So, do "
+                                                        "not use alpha and beta");
+  }
+  else if (param.op_type == Type::RELU)
+  {
+    assert(param.alpha >= param.beta && "ReLU's alpha must be equal or greater than beta");
+  }
+  else if (param.op_type == Type::TANH)
+  {
+    assert(param.alpha == 1.0f && param.beta == 1.0f && "f(x) = alpha * tanh(beta * x), Tanh is "
+                                                        "supported only the values of alpha and "
+                                                        "beta are 1.f");
+  }
+}
+
+std::string ElementwiseActivation::name() const
+{
+  using ElementwiseActivationType = onert::ir::operation::ElementwiseActivation::Type;
+  static const std::unordered_map<Type, std::string> name_map{
+      {ElementwiseActivationType::ELU, "ELU"},
+      {ElementwiseActivationType::LOGISTIC, "Logistic"},
+      {ElementwiseActivationType::RELU, "ReLU"},
+      {ElementwiseActivationType::TANH, "Tanh"},
+      {ElementwiseActivationType::LEAKY_RELU, "LeakyRelu"}};
+  return name_map.at(_param.op_type);
+}
+
+float ElementwiseActivation::infinity = std::numeric_limits<float>::infinity();
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc b/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc

new file mode 100644 (file)

index 0000000..3287fc0
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/ElementwiseBinary.h"
+
+#include <cassert>
+#include <unordered_map>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void ElementwiseBinary::accept(OperationVisitor &v) const { v.visit(*this); }
+
+ElementwiseBinary::ElementwiseBinary(const OperandIndexSequence &inputs,
+                                     const OperandIndexSequence &outputs, const Param &param)
+    : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
+{
+}
+
+std::string ElementwiseBinary::name() const
+{
+  using ElementwiseBinaryType = onert::ir::operation::ElementwiseBinary::ElementwiseBinaryType;
+  static const std::unordered_map<ElementwiseBinaryType, std::string> name_map{
+      {ElementwiseBinaryType::LOGICAL_AND, std::string{"LogicalAnd"}},
+      {ElementwiseBinaryType::LOGICAL_OR, std::string{"LogicalOr"}},
+      {ElementwiseBinaryType::MAX, std::string{"Max"}},
+      {ElementwiseBinaryType::MIN, std::string{"Min"}}};
+  return name_map.at(_param.op_type);
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc

new file mode 100644 (file)

index 0000000..7dfcd4a
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/ElementwiseUnary.h"
+
+#include <cassert>
+#include <unordered_map>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void ElementwiseUnary::accept(OperationVisitor &v) const { v.visit(*this); }
+
+ElementwiseUnary::ElementwiseUnary(const OperandIndexSequence &inputs,
+                                   const OperandIndexSequence &outputs, const Param &param)
+    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+{
+}
+
+std::string ElementwiseUnary::name() const
+{
+  using ElementwiseUnaryType = onert::ir::operation::ElementwiseUnary::Type;
+  static const std::unordered_map<ElementwiseUnaryType, std::string> name_map{
+      {ElementwiseUnaryType::ABS, std::string{"Abs"}},
+      {ElementwiseUnaryType::CAST, std::string{"Cast"}},
+      {ElementwiseUnaryType::COS, std::string{"Cos"}},
+      {ElementwiseUnaryType::DEQUANTIZE, std::string{"Dequantize"}},
+      {ElementwiseUnaryType::ERF, std::string{"Erf"}},
+      {ElementwiseUnaryType::EXP, std::string{"Exp"}},
+      {ElementwiseUnaryType::FLOOR, std::string{"Floor"}},
+      {ElementwiseUnaryType::LOG, std::string{"Log"}},
+      {ElementwiseUnaryType::LOGICAL_NOT, std::string{"LogicalNot"}},
+      {ElementwiseUnaryType::NEG, std::string{"Neg"}},
+      {ElementwiseUnaryType::QUANTIZE, std::string{"Quantize"}},
+      {ElementwiseUnaryType::ROUND, std::string{"Round"}},
+      {ElementwiseUnaryType::RSQRT, std::string{"RSqrt"}},
+      {ElementwiseUnaryType::SIN, std::string{"Sin"}},
+      {ElementwiseUnaryType::SQRT, std::string{"Sqrt"}},
+      {ElementwiseUnaryType::SQURE, std::string{"Squre"}},
+      {ElementwiseUnaryType::ZEROS_LIKE, std::string{"ZerosLike"}}};
+  return name_map.at(_param.op_type);
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Exp.cc b/runtime/onert/core/src/ir/operation/Exp.cc

deleted file mode 100644 (file)

index 0b22e08..0000000
--- a/runtime/onert/core/src/ir/operation/Exp.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Exp.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Exp::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Exp::Exp(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Floor.cc b/runtime/onert/core/src/ir/operation/Floor.cc

deleted file mode 100644 (file)

index dc01535..0000000
--- a/runtime/onert/core/src/ir/operation/Floor.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Floor.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Floor::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Floor::Floor(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/L2Pool2D.cc b/runtime/onert/core/src/ir/operation/L2Pool2D.cc

deleted file mode 100644 (file)

index 8f21b93..0000000
--- a/runtime/onert/core/src/ir/operation/L2Pool2D.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/L2Pool2D.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void L2Pool2D::accept(OperationVisitor &v) const { v.visit(*this); }
-
-L2Pool2D::L2Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
-                   const Param &param)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/LogicalAnd.cc b/runtime/onert/core/src/ir/operation/LogicalAnd.cc

deleted file mode 100644 (file)

index 0d50706..0000000
--- a/runtime/onert/core/src/ir/operation/LogicalAnd.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/LogicalAnd.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void LogicalAnd::accept(OperationVisitor &v) const { v.visit(*this); }
-
-LogicalAnd::LogicalAnd(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/LogicalNot.cc b/runtime/onert/core/src/ir/operation/LogicalNot.cc

deleted file mode 100644 (file)

index 8f11421..0000000
--- a/runtime/onert/core/src/ir/operation/LogicalNot.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/LogicalNot.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void LogicalNot::accept(OperationVisitor &v) const { v.visit(*this); }
-
-LogicalNot::LogicalNot(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/LogicalOr.cc b/runtime/onert/core/src/ir/operation/LogicalOr.cc

deleted file mode 100644 (file)

index d75207c..0000000
--- a/runtime/onert/core/src/ir/operation/LogicalOr.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/LogicalOr.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void LogicalOr::accept(OperationVisitor &v) const { v.visit(*this); }
-
-LogicalOr::LogicalOr(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Logistic.cc b/runtime/onert/core/src/ir/operation/Logistic.cc

deleted file mode 100644 (file)

index 77d9d17..0000000
--- a/runtime/onert/core/src/ir/operation/Logistic.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Logistic.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Logistic::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Logistic::Logistic(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Max.cc b/runtime/onert/core/src/ir/operation/Max.cc

deleted file mode 100644 (file)

index 281f9d4..0000000
--- a/runtime/onert/core/src/ir/operation/Max.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Max.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Max::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Max::Max(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/MaxPool2D.cc b/runtime/onert/core/src/ir/operation/MaxPool2D.cc

deleted file mode 100644 (file)

index eac53cc..0000000
--- a/runtime/onert/core/src/ir/operation/MaxPool2D.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/MaxPool2D.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void MaxPool2D::accept(OperationVisitor &v) const { v.visit(*this); }
-
-MaxPool2D::MaxPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
-                     const Param &param)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Min.cc b/runtime/onert/core/src/ir/operation/Min.cc

deleted file mode 100644 (file)

index 8be7f0c..0000000
--- a/runtime/onert/core/src/ir/operation/Min.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Min.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Min::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Min::Min(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Mul.cc b/runtime/onert/core/src/ir/operation/Mul.cc

deleted file mode 100644 (file)

index 03cdf1b..0000000
--- a/runtime/onert/core/src/ir/operation/Mul.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Mul.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Mul::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Mul::Mul(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
-         const Param &param)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Neg.cc b/runtime/onert/core/src/ir/operation/Neg.cc

deleted file mode 100644 (file)

index df623a1..0000000
--- a/runtime/onert/core/src/ir/operation/Neg.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Neg.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Neg::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Neg::Neg(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Pad.cc b/runtime/onert/core/src/ir/operation/Pad.cc

index aecc2d9..0c56e92 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Pad.cc
+++ b/runtime/onert/core/src/ir/operation/Pad.cc
@@ -27,8 +27,10 @@ namespace operation
  
  void Pad::accept(OperationVisitor &v) const { v.visit(*this); }
  
+// PAD: 2 inputs
+// PADV2: 3 inputs
  Pad::Pad(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
+    : Operation{OperandConstraint::createInRange(2u, 3u), inputs, outputs}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/Pool2D.cc b/runtime/onert/core/src/ir/operation/Pool2D.cc

new file mode 100644 (file)

index 0000000..761d14c
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Pool2D.cc
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Pool2D.h"
+
+#include <cassert>
+#include <unordered_map>
+
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+void Pool2D::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Pool2D::Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+               const Param &param)
+    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
+{
+}
+
+std::string Pool2D::name() const
+{
+  using PoolType = onert::ir::operation::Pool2D::PoolType;
+  static const std::unordered_map<PoolType, std::string> name_map{
+      {PoolType::AVG, "Avg" + std::string{toString(opcode())}},
+      {PoolType::L2, "L2" + std::string{toString(opcode())}},
+      {PoolType::MAX, "Max" + std::string{toString(opcode())}}};
+  return name_map.at(_param.op_type);
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/RSQRT.cc b/runtime/onert/core/src/ir/operation/RSQRT.cc

deleted file mode 100644 (file)

index 2bce1fa..0000000
--- a/runtime/onert/core/src/ir/operation/RSQRT.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/RSQRT.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void RSQRT::accept(OperationVisitor &v) const { v.visit(*this); }
-
-RSQRT::RSQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Cos.cc b/runtime/onert/core/src/ir/operation/Rank.cc

similarity index 83%

rename from runtime/onert/core/src/ir/operation/Cos.cc

rename to runtime/onert/core/src/ir/operation/Rank.cc

index 831a92d..c357e90 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Cos.cc
+++ b/runtime/onert/core/src/ir/operation/Rank.cc
@@ -14,7 +14,7 @@
   * limitations under the License.
   */
  
-#include "ir/operation/Cos.h"
+#include "ir/operation/Rank.h"
  
  #include <cassert>
  
@@ -27,9 +27,9 @@ namespace ir
  namespace operation
  {
  
-void Cos::accept(OperationVisitor &v) const { v.visit(*this); }
+void Rank::accept(OperationVisitor &v) const { v.visit(*this); }
  
-Cos::Cos(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
+Rank::Rank(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
      : Operation{OperandConstraint::createExact(1u), inputs, outputs}
  {
  }
diff --git a/runtime/onert/core/src/ir/operation/ReLU.cc b/runtime/onert/core/src/ir/operation/ReLU.cc

deleted file mode 100644 (file)

index f0c8847..0000000
--- a/runtime/onert/core/src/ir/operation/ReLU.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/ReLU.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void ReLU::accept(OperationVisitor &v) const { v.visit(*this); }
-
-ReLU::ReLU(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ReLU1.cc b/runtime/onert/core/src/ir/operation/ReLU1.cc

deleted file mode 100644 (file)

index 734f0b6..0000000
--- a/runtime/onert/core/src/ir/operation/ReLU1.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/ReLU1.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void ReLU1::accept(OperationVisitor &v) const { v.visit(*this); }
-
-ReLU1::ReLU1(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ReLU6.cc b/runtime/onert/core/src/ir/operation/ReLU6.cc

deleted file mode 100644 (file)

index 5972329..0000000
--- a/runtime/onert/core/src/ir/operation/ReLU6.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/ReLU6.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void ReLU6::accept(OperationVisitor &v) const { v.visit(*this); }
-
-ReLU6::ReLU6(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Round.cc b/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc

similarity index 70%

rename from runtime/onert/core/src/ir/operation/Round.cc

rename to runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc

index 16dfb2b..9f17af9 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Round.cc
+++ b/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc
@@ -14,7 +14,7 @@
   * limitations under the License.
   */
  
-#include "ir/operation/Round.h"
+#include "ir/operation/ResizeNearestNeighbor.h"
  
  #include <cassert>
  
@@ -27,10 +27,12 @@ namespace ir
  namespace operation
  {
  
-void Round::accept(OperationVisitor &v) const { v.visit(*this); }
+void ResizeNearestNeighbor::accept(OperationVisitor &v) const { v.visit(*this); }
  
-Round::Round(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
+ResizeNearestNeighbor::ResizeNearestNeighbor(const OperandIndexSequence &inputs,
+                                             const OperandIndexSequence &outputs,
+                                             const Param &param)
+    : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param}
  {
  }
  
diff --git a/runtime/onert/core/src/ir/operation/SQRT.cc b/runtime/onert/core/src/ir/operation/SQRT.cc

deleted file mode 100644 (file)

index ad887d8..0000000
--- a/runtime/onert/core/src/ir/operation/SQRT.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/SQRT.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void SQRT::accept(OperationVisitor &v) const { v.visit(*this); }
-
-SQRT::SQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Sub.cc b/runtime/onert/core/src/ir/operation/Sub.cc

deleted file mode 100644 (file)

index d710716..0000000
--- a/runtime/onert/core/src/ir/operation/Sub.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Sub.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Sub::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Sub::Sub(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
-         const Param &param)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Tanh.cc b/runtime/onert/core/src/ir/operation/Tanh.cc

deleted file mode 100644 (file)

index 8fab0c0..0000000
--- a/runtime/onert/core/src/ir/operation/Tanh.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/Tanh.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void Tanh::accept(OperationVisitor &v) const { v.visit(*this); }
-
-Tanh::Tanh(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ZerosLike.cc b/runtime/onert/core/src/ir/operation/ZerosLike.cc

deleted file mode 100644 (file)

index 5f49b98..0000000
--- a/runtime/onert/core/src/ir/operation/ZerosLike.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ir/operation/ZerosLike.h"
-
-#include <cassert>
-
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
-{
-
-void ZerosLike::accept(OperationVisitor &v) const { v.visit(*this); }
-
-ZerosLike::ZerosLike(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(1u), inputs, outputs}
-{
-}
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/runtime/onert/core/src/ir/pass/PermutationOperationPass.h b/runtime/onert/core/src/ir/pass/PermutationOperationPass.h

deleted file mode 100644 (file)

index 6dec9ea..0000000
--- a/runtime/onert/core/src/ir/pass/PermutationOperationPass.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_GRAPH_PASS_PERMUTATION_OPERATION_PASS_H__
-#define __ONERT_GRAPH_PASS_PERMUTATION_OPERATION_PASS_H__
-
-#include "ir/OperationVisitor.h"
-#include "LoweredOperationPass.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace pass
-{
-
-class PermutationOperationPass : public LoweredOperationPass, public OperationVisitor
-{
-public:
-  using LoweredOperationPass::LoweredOperationPass;
-
-public:
-  std::string id() final { return "PermutationOperationPass"; }
-
-public:
-  void callback(const OperationIndex &i, Operation &n) final;
-
-public:
-  void visit(const operation::Add &) final;
-  void visit(const operation::Comparison &) final;
-  void visit(const operation::Concat &) final;
-  void visit(const operation::Div &) final;
-  void visit(const operation::LogicalAnd &) final;
-  void visit(const operation::LogicalNot &) final;
-  void visit(const operation::LogicalOr &) final;
-  void visit(const operation::Max &) final;
-  void visit(const operation::Min &) final;
-  void visit(const operation::Mul &) final;
-  void visit(const operation::Pack &) final;
-  void visit(const operation::PReLU &) final;
-  void visit(const operation::SquaredDifference &) final;
-  void visit(const operation::Sub &) final;
-  void visit(const operation::Unpack &) final;
-  void visit(const operation::FullyConnected &) final;
-  void visit(const operation::Gather &) final;
-  void visit(const operation::Reshape &) final;
-
-private:
-  void applyExpandRanks(const Operation &);
-  void changeToKeepLayout(const Operation &);
-};
-
-} // namespace pass
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_GRAPH_PASS_PERMUTATION_OPERATION_PASS_H__
diff --git a/runtime/onert/core/src/util/EventRecorder.cc b/runtime/onert/core/src/util/EventRecorder.cc

index ec7f921..13a599b 100644 (file)
--- a/runtime/onert/core/src/util/EventRecorder.cc
+++ b/runtime/onert/core/src/util/EventRecorder.cc
@@ -21,7 +21,12 @@
  #include <unordered_map>
  #include <json/json.h>
  #include <assert.h>
+#include <utility>
+#include <map>
+#include <set>
+#include <stdint.h>
  
+// json type for Chrome Event Trace
  namespace
  {
  
@@ -110,6 +115,290 @@ std::string object(const CounterEvent &evt)
  
  } // namespace
  
+// md table type
+namespace
+{
+
+void writeMDTableRow(std::ostream &os, const std::vector<std::string> &list)
+{
+  os << "| ";
+  for (auto &key : list)
+  {
+    os << key << " | ";
+  }
+  os << "\n";
+}
+
+struct MDContent
+{
+  std::string name;
+  uint64_t begin_ts;
+  uint64_t end_ts;
+  uint32_t min_rss;
+  uint32_t max_rss;
+  uint32_t min_page_reclaims;
+  uint32_t max_page_reclaims;
+
+  MDContent()
+      : begin_ts(0), end_ts(0), min_rss(UINT32_MAX), max_rss(0), min_page_reclaims(UINT32_MAX),
+        max_page_reclaims(0)
+  {
+    // DO NOTHING
+  }
+
+  virtual ~MDContent() = default;
+
+  void updateRss(uint32_t rss)
+  {
+    if (min_rss == UINT32_MAX)
+      min_rss = rss;
+    if (max_rss == 0)
+      max_rss = rss;
+
+    if (min_rss > rss)
+      min_rss = rss;
+    else if (max_rss < rss)
+      max_rss = rss;
+  }
+
+  void updateMinflt(uint32_t minflt)
+  {
+    if (min_page_reclaims == UINT32_MAX)
+      min_page_reclaims = minflt;
+    if (max_page_reclaims == 0)
+      max_page_reclaims = minflt;
+
+    if (min_page_reclaims > minflt)
+      min_page_reclaims = minflt;
+    else if (max_page_reclaims < minflt)
+      max_page_reclaims = minflt;
+  }
+
+  virtual void write(std::ostream &os) const = 0;
+};
+
+struct OpSeq : public MDContent
+{
+  std::string backend;
+  uint64_t graph_latency;
+
+  struct OpSeqCmp
+  {
+    bool operator()(const OpSeq &lhs, const OpSeq &rhs) const
+    {
+      return lhs.begin_ts < rhs.begin_ts;
+    }
+    bool operator()(const OpSeq &lhs, const OpSeq &rhs) { return lhs.begin_ts < rhs.begin_ts; }
+    bool operator()(OpSeq &lhs, OpSeq &rhs) { return lhs.begin_ts < rhs.begin_ts; }
+  };
+
+  void write(std::ostream &os) const override
+  {
+    uint64_t opseq_latency = end_ts - begin_ts;
+    double opseq_per = static_cast<double>(opseq_latency) / graph_latency * 100.0;
+    writeMDTableRow(os, {name, backend, std::to_string(opseq_latency), std::to_string(opseq_per),
+                         std::to_string(min_rss), std::to_string(max_rss),
+                         std::to_string(min_page_reclaims), std::to_string(max_page_reclaims)});
+  }
+};
+
+struct Graph : public MDContent
+{
+  std::set<OpSeq, OpSeq::OpSeqCmp> opseqs;
+
+  void setOpSeqs(const std::map<std::string, OpSeq> &name_to_opseq)
+  {
+    uint64_t graph_latency = end_ts - begin_ts;
+    for (auto it : name_to_opseq)
+    {
+      auto opseq = it.second;
+      opseq.graph_latency = graph_latency;
+
+      opseqs.insert(opseq);
+
+      updateRss(opseq.min_rss);
+      updateRss(opseq.max_rss);
+      updateMinflt(opseq.min_page_reclaims);
+      updateMinflt(opseq.max_page_reclaims);
+    }
+  }
+
+  void write(std::ostream &os) const override
+  {
+    static std::vector<std::string> graph_headers{"latency(us)", "rss_min(kb)", "rss_max(kb)",
+                                                  "page_reclaims_min", "page_reclaims_max"};
+
+    static std::vector<std::string> graph_headers_line{"-----------", "-------", "-------",
+                                                       "-----------------", "-----------------"};
+
+    // Graph's Header
+    writeMDTableRow(os, graph_headers);
+    writeMDTableRow(os, graph_headers_line);
+
+    // Graph's contents
+    writeMDTableRow(os, {std::to_string(end_ts - begin_ts), std::to_string(min_rss),
+                         std::to_string(max_rss), std::to_string(min_page_reclaims),
+                         std::to_string(max_page_reclaims)});
+
+    os << "\n";
+
+    static std::vector<std::string> opseq_headers{
+        "OpSeq name",  "backend",     "latency(us)",       "latency(%)",
+        "rss_min(kb)", "rss_max(kb)", "page_reclaims_min", "page_reclaims_max"};
+
+    static std::vector<std::string> opseq_headers_line{
+        "----------", "-------", "-----------",       "-----------",
+        "-------",    "-------", "-----------------", "-----------------"};
+
+    os << "## OpSequences \n";
+
+    // OpSeq's Header
+    writeMDTableRow(os, opseq_headers);
+    writeMDTableRow(os, opseq_headers_line);
+
+    // OpSeq's contents
+    for (auto opseq : opseqs)
+    {
+      opseq.write(os);
+    }
+
+    os << "\n";
+  }
+};
+
+struct MDTableBuilder
+{
+  MDTableBuilder(const std::vector<DurationEvent> &duration_events,
+                 const std::vector<CounterEvent> &counter_events)
+      : _duration_events(duration_events), _counter_events(counter_events)
+  {
+    for (const auto &evt : _counter_events)
+    {
+      uint64_t ts = std::stoull(evt.ts);
+      auto &name = evt.name;
+      assert(name.compare("maxrss") == 0 || name.compare("minflt") == 0);
+      assert(evt.values.size() == 1);
+      auto &val = evt.values.begin()->second;
+      if (_ts_to_values.find(ts) == _ts_to_values.end())
+      {
+        std::pair<uint32_t, uint32_t> values;
+        if (name.compare("maxrss") == 0)
+          values.first = std::stoul(val);
+        else
+          values.second = std::stoul(val);
+        _ts_to_values.insert({ts, values});
+      }
+      else
+      {
+        auto &values = _ts_to_values.at(ts);
+        if (name.compare("maxrss") == 0)
+          values.first = std::stoul(val);
+        else
+          values.second = std::stoul(val);
+      }
+    }
+  }
+
+  MDTableBuilder &build()
+  {
+    for (auto &it : divideGraph())
+    {
+      size_t begin_idx = it.first;
+      size_t end_idx = it.second;
+      std::map<std::string, OpSeq> name_to_opseq;
+      for (size_t i = begin_idx + 1; i < end_idx; ++i)
+      {
+        const auto &evt = _duration_events[i];
+        assert(evt.name.compare("Graph") != 0);
+        assert(evt.ph.compare("B") == 0 || evt.ph.compare("E") == 0);
+        if (evt.ph.compare("B") == 0)
+        {
+          assert(name_to_opseq.find(evt.name) == name_to_opseq.end());
+          name_to_opseq.insert({evt.name, makeOpSeq(evt)});
+        }
+        else
+        {
+          assert(name_to_opseq.find(evt.name) != name_to_opseq.end());
+          auto &opseq = name_to_opseq.at(evt.name);
+          updateOpSeq(opseq, evt);
+        }
+      }
+
+      _graphs.emplace_back(makeGraph(begin_idx, end_idx, name_to_opseq));
+    }
+
+    return *this;
+  }
+
+  std::vector<std::pair<size_t, size_t>> divideGraph()
+  {
+    std::vector<std::pair<size_t, size_t>> graph_idx_list; // pair<begin_idx, end_idx>
+    for (size_t i = 0, begin_idx = 0; i < _duration_events.size(); ++i)
+    {
+      const auto &evt = _duration_events.at(i);
+      if (evt.name.compare("Graph") == 0)
+      {
+        if (evt.ph.compare("B") == 0)
+          begin_idx = i;
+        else
+          graph_idx_list.emplace_back(begin_idx, i);
+      }
+    }
+    return graph_idx_list;
+  }
+
+  OpSeq makeOpSeq(const DurationEvent &evt)
+  {
+    OpSeq opseq;
+    opseq.name = evt.name;
+    opseq.begin_ts = std::stoull(evt.ts);
+    opseq.updateRss(_ts_to_values.at(opseq.begin_ts).first);
+    opseq.updateMinflt(_ts_to_values.at(opseq.begin_ts).second);
+    opseq.backend = evt.tid;
+    return opseq;
+  }
+
+  void updateOpSeq(OpSeq &opseq, const DurationEvent &evt)
+  {
+    opseq.end_ts = std::stoull(evt.ts);
+    opseq.updateRss(_ts_to_values.at(opseq.end_ts).first);
+    opseq.updateMinflt(_ts_to_values.at(opseq.end_ts).second);
+  }
+
+  Graph makeGraph(size_t begin_idx, size_t end_idx,
+                  const std::map<std::string, OpSeq> &name_to_opseq)
+  {
+    Graph graph;
+    graph.name = "Graph";
+    graph.begin_ts = std::stoull(_duration_events[begin_idx].ts);
+    graph.updateRss(_ts_to_values.at(graph.begin_ts).first);
+    graph.updateMinflt(_ts_to_values.at(graph.begin_ts).second);
+    graph.end_ts = std::stoull(_duration_events[end_idx].ts);
+    graph.updateRss(_ts_to_values.at(graph.end_ts).first);
+    graph.updateMinflt(_ts_to_values.at(graph.end_ts).second);
+    graph.setOpSeqs(name_to_opseq);
+    return graph;
+  }
+
+  void write(std::ostream &os)
+  {
+    // Write contents
+    for (size_t i = 0; i < _graphs.size(); ++i)
+    {
+      os << "# Graph " << i << "\n";
+      _graphs.at(i).write(os);
+    }
+  }
+
+  const std::vector<DurationEvent> &_duration_events;
+  const std::vector<CounterEvent> &_counter_events;
+  // timestamp to std::pair<maxrss, minflt>
+  std::unordered_map<uint64_t, std::pair<uint32_t, uint32_t>> _ts_to_values;
+  std::vector<Graph> _graphs;
+};
+
+} // namespace
+
  void EventRecorder::emit(const DurationEvent &evt)
  {
    std::lock_guard<std::mutex> lock{_mu};
@@ -136,6 +425,9 @@ void EventRecorder::writeToFile(std::ostream &os)
      case WriteFormat::SNPE_BENCHMARK:
        writeSNPEBenchmark(os);
        break;
+    case WriteFormat::MD_TABLE:
+      writeMDTable(os);
+      break;
      default:
        assert(!"Invalid value");
        break;
@@ -258,3 +550,8 @@ void EventRecorder::writeChromeTrace(std::ostream &os)
    os << "  ]\n";
    os << "}\n";
  }
+
+void EventRecorder::writeMDTable(std::ostream &os)
+{
+  MDTableBuilder(_duration_events, _counter_events).build().write(os);
+}
diff --git a/runtime/onert/core/src/util/EventRecorder.h b/runtime/onert/core/src/util/EventRecorder.h

index 6eea069..37ec1a0 100644 (file)
--- a/runtime/onert/core/src/util/EventRecorder.h
+++ b/runtime/onert/core/src/util/EventRecorder.h
@@ -53,7 +53,8 @@ public:
    enum class WriteFormat
    {
      CHROME_TRACING,
-    SNPE_BENCHMARK
+    SNPE_BENCHMARK,
+    MD_TABLE,
    };
  
  public:
@@ -71,6 +72,7 @@ public:
  private:
    void writeSNPEBenchmark(std::ostream &os);
    void writeChromeTrace(std::ostream &os);
+  void writeMDTable(std::ostream &os);
  
  private:
    std::mutex _mu;
diff --git a/runtime/onert/core/src/util/ShapeInference.cc b/runtime/onert/core/src/util/ShapeInference.cc

index 9a24f8c..95c1504 100644 (file)
--- a/runtime/onert/core/src/util/ShapeInference.cc
+++ b/runtime/onert/core/src/util/ShapeInference.cc
@@ -18,8 +18,6 @@
  #include "util/Utils.h"
  #include "ir/InternalType.h"
  #include "ir/Shape.h"
-#include "ir/operation/AvgPool2D.h"
-#include "ir/operation/MaxPool2D.h"
  #include "util/ShapeInference.h"
  #include "util/logging.h"
  
@@ -81,10 +79,12 @@ ir::Shape broadcastShapes(const ir::Shape &lhs_shape, const ir::Shape &rhs_shape
  // Calculate output height and width of convolution-like operation
  std::pair<int, int> calcConvLikeHeightAndWidth(const int in_h, const int in_w, const int ker_h,
                                                 const int ker_w, const ir::Padding pad,
-                                               const ir::Stride stride)
+                                               const ir::Stride stride,
+                                               const ir::Dilation dilation = {1, 1})
  {
    int32_t out_h = 0, out_w = 0;
-
+  int32_t effective_filter_w_size = (ker_w - 1) * dilation.width_factor + 1;
+  int32_t effective_filter_h_size = (ker_h - 1) * dilation.height_factor + 1;
    switch (pad.type)
    {
      case ir::PaddingType::SAME:
@@ -92,12 +92,15 @@ std::pair<int, int> calcConvLikeHeightAndWidth(const int in_h, const int in_w, c
        out_w = ceil_div(in_w, stride.horizontal);
        break;
      case ir::PaddingType::VALID:
-      out_h = ceil_div(in_h - ker_h + 1, stride.vertical);
-      out_w = ceil_div(in_w - ker_w + 1, stride.horizontal);
+      out_h = ceil_div(in_h - effective_filter_h_size + 1, stride.vertical);
+      out_w = ceil_div(in_w - effective_filter_w_size + 1, stride.horizontal);
        break;
      case ir::PaddingType::EXPLICIT:
-      out_h = (in_h + pad.param.top + pad.param.bottom - ker_h) / stride.vertical + 1;
-      out_w = (in_w + pad.param.left + pad.param.right - ker_w) / stride.horizontal + 1;
+      out_h =
+          (in_h + pad.param.top + pad.param.bottom - effective_filter_h_size) / stride.vertical + 1;
+      out_w =
+          (in_w + pad.param.left + pad.param.right - effective_filter_w_size) / stride.horizontal +
+          1;
        break;
      default:
        assert(false);
@@ -126,17 +129,6 @@ ir::Shape inferArgMaxShape(const ir::Shape &input_shape, int axis, int rank)
    return out_shape;
  }
  
-ir::Shape inferAvgPoolShape(const ir::Shape &in_shape, const ir::operation::AvgPool2D::Param &param,
-                            const ir::Layout layout)
-{
-  assert(layout == ir::Layout::NHWC);
-  auto ifm_shape = in_shape.asFeature(layout);
-  const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, param.kh, param.kw,
-                                                  param.padding, param.stride);
-  // Pooling don't change number of channels and batch size
-  return ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, ifm_shape.C};
-}
-
  ir::Shape inferReduceShape(const ir::Shape &input_shape, const std::vector<int> &axes,
                             bool keep_dims)
  {
@@ -320,7 +312,7 @@ ir::Shape inferConv2DShape(const ir::Shape &in_shape, const ir::Shape &ker_shape
    assert(ifm_shape.C == kf_shape.C);
  
    const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, kf_shape.H, kf_shape.W,
-                                                  param.padding, param.stride);
+                                                  param.padding, param.stride, param.dilation);
  
    return ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, kf_shape.N};
  }
@@ -411,17 +403,6 @@ ir::Shape inferGatherShape(const ir::Shape &input_shape, const ir::Shape &indice
    return out_shape;
  }
  
-ir::Shape inferMaxPoolShape(const ir::Shape &in_shape, const ir::operation::MaxPool2D::Param &param,
-                            const ir::Layout layout)
-{
-  assert(layout == ir::Layout::NHWC);
-  auto ifm_shape = in_shape.asFeature(layout);
-  const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, param.kh, param.kw,
-                                                  param.padding, param.stride);
-  // Pooling don't change number of channels and batch size
-  return ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, ifm_shape.C};
-}
-
  ir::Shape inferOnehotShape(const ir::Shape &input_shape, const int depth, int axis)
  {
    assert(depth >= 0);
@@ -486,6 +467,17 @@ ir::Shape inferPadShape(const ir::Shape &in_shape, const int32_t *pad_buf, const
    return ret;
  }
  
+ir::Shape inferPoolShape(const ir::Shape &in_shape, const ir::operation::Pool2D::Param &param,
+                         const ir::Layout layout)
+{
+  assert(layout == ir::Layout::NHWC);
+  auto ifm_shape = in_shape.asFeature(layout);
+  const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, param.kh, param.kw,
+                                                  param.padding, param.stride);
+  // Pooling don't change number of channels and batch size
+  return ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, ifm_shape.C};
+}
+
  ir::Shape inferResizeBilinearShape(const ir::Shape &in_shape, const int32_t output_height,
                                     const int32_t output_width)
  {
diff --git a/runtime/onert/frontend/base_loader/include/base_loader.h b/runtime/onert/frontend/base_loader/include/base_loader.h

index 0f6a2a5..480452e 100644 (file)
--- a/runtime/onert/frontend/base_loader/include/base_loader.h
+++ b/runtime/onert/frontend/base_loader/include/base_loader.h
@@ -105,40 +105,39 @@ protected:
    template <typename Param, typename OptionsType>
    void loadStridesAndPaddings(Param &param, const OptionsType *options);
    // Load Pool2D param
-  template <typename Param> void loadPool2D(Param &param, const Pool2DOptions *options);
+  template <typename Param> void loadPool2DOptions(Param &param, const Pool2DOptions *options);
  
    // Operations
    void loadConv2D(const Operator *op, ir::Graph &subg);
    void loadDepthwiseConv2D(const Operator *op, ir::Graph &subg);
    void loadTransposeConv(const Operator *op, ir::Graph &subg);
-  void loadAvgPool2D(const Operator *op, ir::Graph &subg);
+  void loadPool2D(const Operator *op, ir::Graph &subg, ir::operation::Pool2D::PoolType op_type);
    void loadReshape(const Operator *op, ir::Graph &subg);
    void loadSoftmax(const Operator *op, ir::Graph &subg);
-  void loadMaxPool2D(const Operator *op, ir::Graph &subg);
    void loadConcatenation(const Operator *op, ir::Graph &subg);
    void loadFill(const Operator *op, ir::Graph &subg);
    void loadFC(const Operator *op, ir::Graph &subg);
-  void loadAdd(const Operator *op, ir::Graph &subg);
-  void loadSub(const Operator *op, ir::Graph &subg);
-  void loadMul(const Operator *op, ir::Graph &subg);
-  void loadDiv(const Operator *op, ir::Graph &subg);
+  template <ir::operation::BinaryArithmetic::ArithmeticType op_type>
+  void loadBinaryArithmetic(const Operator *op, ir::Graph &subg);
+  void loadAddV2(const Operator *op, ir::Graph &subg);
    void loadPack(const Operator *op, ir::Graph &subg);
-  void loadRelu(const Operator *op, ir::Graph &subg);
-  void loadRelu6(const Operator *op, ir::Graph &subg);
    void loadResizeBilinear(const Operator *op, ir::Graph &subg);
-  void loadRsqrt(const Operator *op, ir::Graph &subg);
+  void loadResizeNearestNeighbor(const Operator *op, ir::Graph &subg);
    void loadSelect(const Operator *op, ir::Graph &subg);
-  void loadSqrt(const Operator *op, ir::Graph &subg);
    void loadSquaredDifference(const Operator *op, ir::Graph &subg);
-  void loadTanh(const Operator *op, ir::Graph &subg);
    void loadTranspose(const Operator *op, ir::Graph &subg);
-  void loadReduce(const Operator *op, ir::Graph &subg,
-                  ir::operation::Reduce::ReduceType reduce_type);
+  template <ir::operation::Reduce::ReduceType reduce_type>
+  void loadReduce(const Operator *op, ir::Graph &subg);
    void loadReduceAll(const Operator *op, ir::Graph &subg);
    void loadReverseV2(const Operator *op, ir::Graph &subg);
    void loadPad(const Operator *op, ir::Graph &subg);
-  void loadLogistic(const Operator *op, ir::Graph &subg);
-  void loadExp(const Operator *op, ir::Graph &subg);
+  void loadElementwiseActivation(const Operator *op, ir::Graph &subg,
+                                 ir::operation::ElementwiseActivation::Type op_type,
+                                 float alpha = 0.f, float beta = 0.f);
+  template <ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type>
+  void loadElementwiseBinary(const Operator *op, ir::Graph &subg);
+  void loadElementwiseUnary(const Operator *op, ir::Graph &subg,
+                            ir::operation::ElementwiseUnary::Type op_type);
    void loadExpandDims(const Operator *op, ir::Graph &subg);
    void loadGather(const Operator *op, ir::Graph &subg);
    void loadCustom(const Operator *op, ir::Graph &subg);
@@ -152,35 +151,25 @@ protected:
    void loadSlice(const Operator *op, ir::Graph &subg);
    void loadStridedSlice(const Operator *op, ir::Graph &subg);
    void loadUnpack(const Operator *op, ir::Graph &subg);
-  void loadMinimum(const Operator *op, ir::Graph &subg);
-  void loadMaximum(const Operator *op, ir::Graph &subg);
-  void loadCast(const Operator *op, ir::Graph &subg);
    void loadComparison(const Operator *op, ir::Graph &subg);
    void loadEinsum(const Operator *op, ir::Graph &subg);
    void loadOneHot(const Operator *op, ir::Graph &subg);
-  void loadAbs(const Operator *op, ir::Graph &subg);
-  void loadCos(const Operator *op, ir::Graph &subg);
-  void loadSin(const Operator *op, ir::Graph &subg);
    void loadShape(const Operator *op, ir::Graph &subg);
    void loadIf(const Operator *op, ir::Graph &subg);
    void loadWhile(const Operator *op, ir::Graph &subg);
-  void loadNeg(const Operator *op, ir::Graph &subg);
-  void loadLog(const Operator *op, ir::Graph &subg);
    void loadArgMax(const Operator *op, ir::Graph &subg);
-  void loadRound(const Operator *op, ir::Graph &subg);
    void loadPow(const Operator *op, ir::Graph &subg);
-  void loadLogicalNot(const Operator *op, ir::Graph &subg);
-  void loadZerosLike(const Operator *op, ir::Graph &subg);
    void loadTile(const Operator *op, ir::Graph &subg);
-  void loadLogicalOr(const Operator *op, ir::Graph &subg);
    void loadRange(const Operator *op, ir::Graph &subg);
+  void loadRank(const Operator *op, ir::Graph &subg);
    void loadMatrixBandPart(const Operator *op, ir::Graph &subg);
    void loadBroadcastTo(const Operator *op, ir::Graph &subg);
    void loadFusedBatchNorm(const Operator *op, ir::Graph &subg);
    void loadLogSoftmax(const Operator *op, ir::Graph &subg);
-  void loadQuantize(const Operator *op, ir::Graph &subg);
    void loadSpaceToDepth(const Operator *op, ir::Graph &subg);
    void loadStatelessRandomUniform(const Operator *op, ir::Graph &subg);
+  void loadL2Normalization(const Operator *op, ir::Graph &subg);
+  void loadLeakyRelu(const Operator *op, ir::Graph &subg);
  
  protected:
    // Base address for mapped region for loading (if needed)
@@ -194,6 +183,7 @@ protected:
    const Model *_model;
    // Maps Tensor indices to onert Operands.
    std::vector<ir::OperandIndex> _tensor_to_operand;
+  std::unordered_map<ir::OperandIndex, std::string> _tensor_names;
    // Verifier
    std::unique_ptr<Verifier> _verifier;
  };
@@ -466,8 +456,8 @@ ir::OperandIndex BaseLoader<LoaderDomain, SpecificLoader>::loadOperand(const Ten
      subg.setOperandValue(operand_index, std::move(data_obj));
    }
  
-  // Name unused
-  // auto name = tensor->name();
+  _tensor_names.emplace(operand_index, tensor->name()->str());
+
    // Variablie
    if (tensor->is_variable())
      throw std::runtime_error("Variable tensor not supported!");
@@ -518,8 +508,8 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadStridesAndPaddings(Param &par
  
  template <typename LoaderDomain, typename SpecificLoader>
  template <typename Param>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadPool2D(Param &param,
-                                                          const Pool2DOptions *options)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadPool2DOptions(Param &param,
+                                                                 const Pool2DOptions *options)
  {
    // Strides and Paddings
    loadStridesAndPaddings(param, options);
@@ -543,7 +533,10 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadConv2D(const Operator *op, ir
    const auto *options = op->builtin_options_as_Conv2DOptions();
    param.activation = convertActivation(options->fused_activation_function());
    loadStridesAndPaddings(param, options);
-  // Dilation h/w factor unused
+
+  param.dilation.width_factor = options->dilation_w_factor();
+  param.dilation.height_factor = options->dilation_h_factor();
+
    std::unique_ptr<ir::Operation> new_op(new ir::operation::Conv2D(inputs, outputs, param));
    subg.addOperation(std::move(new_op));
  }
@@ -585,19 +578,21 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadTransposeConv(const Operator
  }
  
  template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadAvgPool2D(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadPool2D(const Operator *op, ir::Graph &subg,
+                                                          ir::operation::Pool2D::PoolType op_type)
  {
    ir::OperandIndexSequence inputs;
    ir::OperandIndexSequence outputs;
  
    loadOperationIO(op, inputs, outputs);
  
-  ir::operation::AvgPool2D::Param param;
+  ir::operation::Pool2D::Param param;
+  param.op_type = op_type;
    const auto *options = op->builtin_options_as_Pool2DOptions();
  
-  loadPool2D(param, options);
+  loadPool2DOptions(param, options);
  
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::AvgPool2D(inputs, outputs, param));
+  std::unique_ptr<ir::Operation> new_op(new ir::operation::Pool2D(inputs, outputs, param));
    subg.addOperation(std::move(new_op));
  }
  
@@ -645,23 +640,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadSoftmax(const Operator *op, i
  }
  
  template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadMaxPool2D(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  ir::operation::MaxPool2D::Param param;
-  const auto *options = op->builtin_options_as_Pool2DOptions();
-
-  loadPool2D(param, options);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::MaxPool2D(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
  void BaseLoader<LoaderDomain, SpecificLoader>::loadConcatenation(const Operator *op,
                                                                   ir::Graph &subg)
  {
@@ -719,70 +697,82 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadFC(const Operator *op, ir::Gr
  }
  
  template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadAdd(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  ir::operation::Add::Param param;
-  const auto *options = op->builtin_options_as_AddOptions();
-
-  param.activation = convertActivation(options->fused_activation_function());
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Add(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSub(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  ir::operation::Sub::Param param;
-  const auto *options = op->builtin_options_as_SubOptions();
-
-  param.activation = convertActivation(options->fused_activation_function());
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Sub(inputs, outputs, param));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadMul(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadAddV2(const Operator *op, ir::Graph &subg)
  {
    ir::OperandIndexSequence inputs;
    ir::OperandIndexSequence outputs;
  
    loadOperationIO(op, inputs, outputs);
  
-  ir::operation::Mul::Param param;
-  const auto *options = op->builtin_options_as_MulOptions();
+  ir::operation::BinaryArithmetic::Param param;
+  param.arithmetic_type = ir::operation::BinaryArithmetic::ArithmeticType::ADD;
  
-  param.activation = convertActivation(options->fused_activation_function());
+  if (op->custom_options() == nullptr)
+  {
+    param.activation = ir::Activation::NONE;
+  }
+  else
+  {
+    size_t custom_op_data_size = op->custom_options()->size();
+    auto custom_op_data = op->custom_options()->Data();
+    auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size);
+    auto attr_map = data_root.AsMap();
+    const auto fused_activation_func = static_cast<typename LoaderDomain::ActivationFunctionType>(
+        attr_map["fused_activation_function"].AsInt8());
+    param.activation = convertActivation(fused_activation_func);
+  }
  
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Mul(inputs, outputs, param));
+  std::unique_ptr<ir::Operation> new_op(
+      new ir::operation::BinaryArithmetic(inputs, outputs, param));
    subg.addOperation(std::move(new_op));
  }
  
  template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadDiv(const Operator *op, ir::Graph &subg)
+template <ir::operation::BinaryArithmetic::ArithmeticType op_type>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadBinaryArithmetic(const Operator *op,
+                                                                    ir::Graph &subg)
  {
    ir::OperandIndexSequence inputs;
    ir::OperandIndexSequence outputs;
  
    loadOperationIO(op, inputs, outputs);
  
-  ir::operation::Div::Param param;
-  const auto *options = op->builtin_options_as_DivOptions();
-
-  param.activation = convertActivation(options->fused_activation_function());
+  ir::operation::BinaryArithmetic::Param param;
+  param.arithmetic_type = op_type;
+  switch (op_type)
+  {
+    case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+    {
+      const auto *add_options = op->builtin_options_as_AddOptions();
+      param.activation = convertActivation(add_options->fused_activation_function());
+      break;
+    }
+    case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+    {
+      const auto *sub_options = op->builtin_options_as_SubOptions();
+      param.activation = convertActivation(sub_options->fused_activation_function());
+      break;
+    }
+    case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+    {
+      const auto *mul_options = op->builtin_options_as_MulOptions();
+      param.activation = convertActivation(mul_options->fused_activation_function());
+      break;
+    }
+    case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+    {
+      const auto *div_options = op->builtin_options_as_DivOptions();
+      param.activation = convertActivation(div_options->fused_activation_function());
+      break;
+    }
+    default:
+      assert(false &&
+             "The function 'loadBinaryArithmetic' supports only BinaryArithmetic operations");
+      break;
+  }
  
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Div(inputs, outputs, param));
+  std::unique_ptr<ir::Operation> new_op(
+      new ir::operation::BinaryArithmetic(inputs, outputs, param));
    subg.addOperation(std::move(new_op));
  }
  
@@ -805,26 +795,22 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadPack(const Operator *op, ir::
  }
  
  template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadRelu(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadElementwiseActivation(
+    const Operator *op, ir::Graph &subg, ir::operation::ElementwiseActivation::Type op_type,
+    float alpha, float beta)
  {
    ir::OperandIndexSequence inputs;
    ir::OperandIndexSequence outputs;
  
    loadOperationIO(op, inputs, outputs);
  
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::ReLU(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
+  ir::operation::ElementwiseActivation::Param param;
+  param.op_type = op_type;
+  param.alpha = alpha;
+  param.beta = beta;
  
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadRelu6(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::ReLU6(inputs, outputs));
+  std::unique_ptr<ir::Operation> new_op(
+      new ir::operation::ElementwiseActivation(inputs, outputs, param));
    subg.addOperation(std::move(new_op));
  }
  
@@ -856,38 +842,40 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadResizeBilinear(const Operator
  }
  
  template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadRsqrt(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadResizeNearestNeighbor(const Operator *op,
+                                                                         ir::Graph &subg)
  {
    ir::OperandIndexSequence inputs;
    ir::OperandIndexSequence outputs;
  
    loadOperationIO(op, inputs, outputs);
+  auto input = inputs.at(0);
+  auto size = inputs.at(1);
  
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::RSQRT(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
+  if (!subg.operands().at(size).isConstant())
+    throw std::runtime_error("ResizeNearestNeighbor: non-constant 'size' is not supported.");
  
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSelect(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
+  std::vector<std::int32_t> size_v = subg.operands().at(size).template asVector<std::int32_t>();
  
-  loadOperationIO(op, inputs, outputs);
+  ir::operation::ResizeNearestNeighbor::Param param;
+  param.height_out = size_v[0];
+  param.width_out = size_v[1];
+  param.align_corners = op->builtin_options_as_ResizeNearestNeighborOptions()->align_corners();
  
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Select(inputs, outputs));
+  std::unique_ptr<ir::Operation> new_op(
+      new ir::operation::ResizeNearestNeighbor({input}, outputs, param));
    subg.addOperation(std::move(new_op));
  }
  
  template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSqrt(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadSelect(const Operator *op, ir::Graph &subg)
  {
    ir::OperandIndexSequence inputs;
    ir::OperandIndexSequence outputs;
  
    loadOperationIO(op, inputs, outputs);
  
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::SQRT(inputs, outputs));
+  std::unique_ptr<ir::Operation> new_op(new ir::operation::Select(inputs, outputs));
    subg.addOperation(std::move(new_op));
  }
  
@@ -905,18 +893,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadSquaredDifference(const Opera
  }
  
  template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadTanh(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Tanh(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
  void BaseLoader<LoaderDomain, SpecificLoader>::loadTranspose(const Operator *op, ir::Graph &subg)
  {
    ir::OperandIndexSequence inputs;
@@ -937,8 +913,8 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadTranspose(const Operator *op,
  }
  
  template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadReduce(
-    const Operator *op, ir::Graph &subg, ir::operation::Reduce::ReduceType reduce_type)
+template <ir::operation::Reduce::ReduceType reduce_type>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadReduce(const Operator *op, ir::Graph &subg)
  {
    ir::OperandIndexSequence inputs;
    ir::OperandIndexSequence outputs;
@@ -1005,26 +981,49 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadPad(const Operator *op, ir::G
  }
  
  template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadLogistic(const Operator *op, ir::Graph &subg)
+template <ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadElementwiseBinary(const Operator *op,
+                                                                     ir::Graph &subg)
  {
    ir::OperandIndexSequence inputs;
    ir::OperandIndexSequence outputs;
  
    loadOperationIO(op, inputs, outputs);
  
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Logistic(inputs, outputs));
+  ir::operation::ElementwiseBinary::Param param;
+  param.op_type = op_type;
+
+  std::unique_ptr<ir::Operation> new_op(
+      new ir::operation::ElementwiseBinary(inputs, outputs, param));
    subg.addOperation(std::move(new_op));
  }
  
  template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadExp(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadElementwiseUnary(
+    const Operator *op, ir::Graph &subg, ir::operation::ElementwiseUnary::Type op_type)
  {
    ir::OperandIndexSequence inputs;
    ir::OperandIndexSequence outputs;
  
    loadOperationIO(op, inputs, outputs);
  
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Exp(inputs, outputs));
+  ir::operation::ElementwiseUnary::Param param;
+  param.op_type = op_type;
+
+  if (op_type == ir::operation::ElementwiseUnary::Type::CAST)
+  {
+    auto qasymm8ToUint8 = [](ir::Operand &operand) {
+      if (operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM)
+      {
+        operand.type(ir::DataType::UINT8);
+      }
+    };
+    qasymm8ToUint8(subg.operands().at(inputs.at(ir::operation::ElementwiseUnary::Input::INPUT)));
+    qasymm8ToUint8(subg.operands().at(outputs.at(0)));
+  }
+
+  std::unique_ptr<ir::Operation> new_op(
+      new ir::operation::ElementwiseUnary(inputs, outputs, param));
    subg.addOperation(std::move(new_op));
  }
  
@@ -1177,6 +1176,17 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadStatelessRandomUniform(const
  }
  
  template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadRank(const Operator *op, ir::Graph &subg)
+{
+  ir::OperandIndexSequence inputs;
+  ir::OperandIndexSequence outputs;
+  loadOperationIO(op, inputs, outputs);
+
+  std::unique_ptr<ir::Operation> new_op(new ir::operation::Rank(inputs, outputs));
+  subg.addOperation(std::move(new_op));
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
  void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir::Graph &subg)
  {
    ir::OperandIndexSequence inputs;
@@ -1197,7 +1207,8 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir
      Einsum,
      BroadcastTo,
      FusedBatchNorm,
-    StatelessRandomUniform
+    StatelessRandomUniform,
+    Erf
    };
  
    // Mapping from custom op name string to BuiltinOP enum
@@ -1210,6 +1221,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir
        {"FusedBatchNormV3", BuiltinOP::FusedBatchNorm},
        {"BroadcastTo", BuiltinOP::BroadcastTo},
        {"StatelessRandomUniform", BuiltinOP::StatelessRandomUniform},
+      {"Erf", BuiltinOP::Erf},
    };
  
    try
@@ -1219,7 +1231,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir
      switch (custom_op_id)
      {
        case BuiltinOP::AddV2:
-        loadAdd(op, subg);
+        loadAddV2(op, subg);
          break;
        case BuiltinOP::ReduceAll:
          loadReduceAll(op, subg);
@@ -1242,6 +1254,9 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir
        case BuiltinOP::StatelessRandomUniform:
          loadStatelessRandomUniform(op, subg);
          break;
+      case BuiltinOP::Erf:
+        loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ERF);
+        break;
        default:
          throw std::runtime_error{
              "Loader: Custom OP map is defined but operation loader function is not defined"};
@@ -1396,51 +1411,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadUnpack(const Operator *op, ir
  }
  
  template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadMinimum(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Min(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadMaximum(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Max(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadCast(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  auto qasymm8ToUint8 = [](ir::Operand &operand) {
-    if (operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM)
-    {
-      operand.type(ir::DataType::UINT8);
-    }
-  };
-  qasymm8ToUint8(subg.operands().at(inputs.at(ir::operation::Cast::Input::INPUT)));
-  qasymm8ToUint8(subg.operands().at(outputs.at(0)));
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Cast(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
  void BaseLoader<LoaderDomain, SpecificLoader>::loadComparison(const Operator *op, ir::Graph &subg)
  {
    ir::OperandIndexSequence inputs;
@@ -1562,42 +1532,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOneHot(const Operator *op, ir
  }
  
  template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadAbs(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Abs(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadCos(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Cos(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadSin(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Sin(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
  void BaseLoader<LoaderDomain, SpecificLoader>::loadShape(const Operator *op, ir::Graph &subg)
  {
    ir::OperandIndexSequence inputs;
@@ -1652,18 +1586,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadWhile(const Operator *op, ir:
  }
  
  template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadNeg(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Neg(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
  void BaseLoader<LoaderDomain, SpecificLoader>::loadArgMax(const Operator *op, ir::Graph &subg)
  {
    ir::OperandIndexSequence inputs;
@@ -1697,30 +1619,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadArgMax(const Operator *op, ir
  }
  
  template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadLog(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Log(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadRound(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Round(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
  void BaseLoader<LoaderDomain, SpecificLoader>::loadPow(const Operator *op, ir::Graph &subg)
  {
    ir::OperandIndexSequence inputs;
@@ -1733,31 +1631,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadPow(const Operator *op, ir::G
  }
  
  template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadLogicalNot(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::LogicalNot(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadZerosLike(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::ZerosLike(inputs, outputs));
-
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
  void BaseLoader<LoaderDomain, SpecificLoader>::loadRange(const Operator *op, ir::Graph &subg)
  {
    ir::OperandIndexSequence inputs;
@@ -1787,18 +1660,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadTile(const Operator *op, ir::
  }
  
  template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadLogicalOr(const Operator *op, ir::Graph &subg)
-{
-  ir::OperandIndexSequence inputs;
-  ir::OperandIndexSequence outputs;
-
-  loadOperationIO(op, inputs, outputs);
-
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::LogicalOr(inputs, outputs));
-  subg.addOperation(std::move(new_op));
-}
-
-template <typename LoaderDomain, typename SpecificLoader>
  void BaseLoader<LoaderDomain, SpecificLoader>::loadLogSoftmax(const Operator *op, ir::Graph &subg)
  {
    ir::OperandIndexSequence inputs;
@@ -1817,18 +1678,27 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadLogSoftmax(const Operator *op
  }
  
  template <typename LoaderDomain, typename SpecificLoader>
-void BaseLoader<LoaderDomain, SpecificLoader>::loadQuantize(const Operator *op, ir::Graph &subg)
+void BaseLoader<LoaderDomain, SpecificLoader>::loadL2Normalization(const Operator *op,
+                                                                   ir::Graph &subg)
  {
    ir::OperandIndexSequence inputs;
    ir::OperandIndexSequence outputs;
  
    loadOperationIO(op, inputs, outputs);
  
-  std::unique_ptr<ir::Operation> new_op(new ir::operation::Quantize(inputs, outputs));
+  std::unique_ptr<ir::Operation> new_op(new ir::operation::L2Normalization(inputs, outputs));
    subg.addOperation(std::move(new_op));
  }
  
  template <typename LoaderDomain, typename SpecificLoader>
+void BaseLoader<LoaderDomain, SpecificLoader>::loadLeakyRelu(const Operator *op, ir::Graph &subg)
+{
+  float alpha = op->builtin_options_as_LeakyReluOptions()->alpha();
+  loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::LEAKY_RELU, alpha,
+                            1.f);
+}
+
+template <typename LoaderDomain, typename SpecificLoader>
  void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op, ir::Graph &subg)
  {
    const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
@@ -1839,7 +1709,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
        loadConv2D(op, subg);
        return;
      case BuiltinOperator::BuiltinOperator_AVERAGE_POOL_2D:
-      loadAvgPool2D(op, subg);
+      loadPool2D(op, subg, ir::operation::Pool2D::PoolType::AVG);
        return;
      case BuiltinOperator::BuiltinOperator_DEPTHWISE_CONV_2D:
        loadDepthwiseConv2D(op, subg);
@@ -1854,7 +1724,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
        loadSoftmax(op, subg);
        return;
      case BuiltinOperator::BuiltinOperator_MAX_POOL_2D:
-      loadMaxPool2D(op, subg);
+      loadPool2D(op, subg, ir::operation::Pool2D::PoolType::MAX);
        return;
      case BuiltinOperator::BuiltinOperator_CONCATENATION:
        loadConcatenation(op, subg);
@@ -1863,31 +1733,40 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
        loadFC(op, subg);
        return;
      case BuiltinOperator::BuiltinOperator_ADD:
-      loadAdd(op, subg);
+      loadBinaryArithmetic<ir::operation::BinaryArithmetic::ArithmeticType::ADD>(op, subg);
        return;
      case BuiltinOperator::BuiltinOperator_SUB:
-      loadSub(op, subg);
+      loadBinaryArithmetic<ir::operation::BinaryArithmetic::ArithmeticType::SUB>(op, subg);
        return;
      case BuiltinOperator::BuiltinOperator_MUL:
-      loadMul(op, subg);
+      loadBinaryArithmetic<ir::operation::BinaryArithmetic::ArithmeticType::MUL>(op, subg);
        return;
      case BuiltinOperator::BuiltinOperator_DIV:
-      loadDiv(op, subg);
+      loadBinaryArithmetic<ir::operation::BinaryArithmetic::ArithmeticType::DIV>(op, subg);
        return;
      case BuiltinOperator::BuiltinOperator_PACK:
        loadPack(op, subg);
        return;
      case BuiltinOperator::BuiltinOperator_RELU:
-      loadRelu(op, subg);
+      loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::RELU,
+                                ir::operation::ElementwiseActivation::infinity, 0.f);
+      return;
+    case BuiltinOperator::BuiltinOperator_RELU_N1_TO_1:
+      loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::RELU, 1.f,
+                                -1.f);
        return;
      case BuiltinOperator::BuiltinOperator_RELU6:
-      loadRelu6(op, subg);
+      loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::RELU, 6.f,
+                                0.f);
        return;
      case BuiltinOperator::BuiltinOperator_RESIZE_BILINEAR:
        loadResizeBilinear(op, subg);
        return;
+    case BuiltinOperator::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR:
+      loadResizeNearestNeighbor(op, subg);
+      return;
      case BuiltinOperator::BuiltinOperator_RSQRT:
-      loadRsqrt(op, subg);
+      loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::RSQRT);
        return;
      case BuiltinOperator::BuiltinOperator_SELECT:
        loadSelect(op, subg);
@@ -1897,37 +1776,39 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
        loadSelect(op, subg);
        return;
      case BuiltinOperator::BuiltinOperator_SQRT:
-      loadSqrt(op, subg);
+      loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::SQRT);
        return;
      case BuiltinOperator::BuiltinOperator_SQUARED_DIFFERENCE:
        loadSquaredDifference(op, subg);
        return;
      case BuiltinOperator::BuiltinOperator_TANH:
-      loadTanh(op, subg);
+      loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::TANH, 1.f,
+                                1.f);
        return;
      case BuiltinOperator::BuiltinOperator_TRANSPOSE:
        loadTranspose(op, subg);
        return;
      case BuiltinOperator::BuiltinOperator_MEAN:
-      loadReduce(op, subg, ir::operation::Reduce::ReduceType::MEAN);
+      loadReduce<ir::operation::Reduce::ReduceType::MEAN>(op, subg);
        return;
      case BuiltinOperator::BuiltinOperator_REDUCE_ANY:
-      loadReduce(op, subg, ir::operation::Reduce::ReduceType::ANY);
+      loadReduce<ir::operation::Reduce::ReduceType::ANY>(op, subg);
        return;
      case BuiltinOperator::BuiltinOperator_REDUCE_MAX:
-      loadReduce(op, subg, ir::operation::Reduce::ReduceType::MAX);
+      loadReduce<ir::operation::Reduce::ReduceType::MAX>(op, subg);
        return;
      case BuiltinOperator::BuiltinOperator_REVERSE_V2:
        loadReverseV2(op, subg);
        return;
      case BuiltinOperator::BuiltinOperator_PAD:
+    case BuiltinOperator::BuiltinOperator_PADV2:
        loadPad(op, subg);
        return;
      case BuiltinOperator::BuiltinOperator_LOGISTIC:
-      loadLogistic(op, subg);
+      loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::LOGISTIC);
        return;
      case BuiltinOperator::BuiltinOperator_EXP:
-      loadExp(op, subg);
+      loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::EXP);
        return;
      case BuiltinOperator::BuiltinOperator_EXPAND_DIMS:
        loadExpandDims(op, subg);
@@ -1942,7 +1823,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
        loadBatchToSpaceND(op, subg);
        return;
      case BuiltinOperator::BuiltinOperator_SUM:
-      loadReduce(op, subg, ir::operation::Reduce::ReduceType::SUM);
+      loadReduce<ir::operation::Reduce::ReduceType::SUM>(op, subg);
        return;
      case BuiltinOperator::BuiltinOperator_CUSTOM:
        loadCustom(op, subg);
@@ -1969,13 +1850,13 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
        loadUnpack(op, subg);
        return;
      case BuiltinOperator::BuiltinOperator_MINIMUM:
-      loadMinimum(op, subg);
+      loadElementwiseBinary<ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN>(op, subg);
        return;
      case BuiltinOperator::BuiltinOperator_MAXIMUM:
-      loadMaximum(op, subg);
+      loadElementwiseBinary<ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX>(op, subg);
        return;
      case BuiltinOperator::BuiltinOperator_CAST:
-      loadCast(op, subg);
+      loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::CAST);
        return;
      case BuiltinOperator::BuiltinOperator_EQUAL:
      case BuiltinOperator::BuiltinOperator_NOT_EQUAL:
@@ -1989,19 +1870,19 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
        loadOneHot(op, subg);
        return;
      case BuiltinOperator::BuiltinOperator_ABS:
-      loadAbs(op, subg);
+      loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ABS);
        return;
      case BuiltinOperator::BuiltinOperator_COS:
-      loadCos(op, subg);
+      loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::COS);
        return;
      case BuiltinOperator::BuiltinOperator_SIN:
-      loadSin(op, subg);
+      loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::SIN);
        return;
      case BuiltinOperator::BuiltinOperator_SHAPE:
        loadShape(op, subg);
        return;
      case BuiltinOperator::BuiltinOperator_REDUCE_PROD:
-      loadReduce(op, subg, ir::operation::Reduce::ReduceType::PROD);
+      loadReduce<ir::operation::Reduce::ReduceType::PROD>(op, subg);
        return;
      case BuiltinOperator::BuiltinOperator_IF:
        loadIf(op, subg);
@@ -2010,31 +1891,32 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
        loadWhile(op, subg);
        return;
      case BuiltinOperator::BuiltinOperator_NEG:
-      loadNeg(op, subg);
+      loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::NEG);
        return;
      case BuiltinOperator::BuiltinOperator_ARG_MAX:
        loadArgMax(op, subg);
        return;
      case BuiltinOperator::BuiltinOperator_LOG:
-      loadLog(op, subg);
+      loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::LOG);
        return;
      case BuiltinOperator::BuiltinOperator_ROUND:
-      loadRound(op, subg);
+      loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ROUND);
        return;
      case BuiltinOperator::BuiltinOperator_POW:
        loadPow(op, subg);
        return;
      case BuiltinOperator::BuiltinOperator_LOGICAL_NOT:
-      loadLogicalNot(op, subg);
+      loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::LOGICAL_NOT);
        return;
      case BuiltinOperator::BuiltinOperator_LOGICAL_OR:
-      loadLogicalOr(op, subg);
+      loadElementwiseBinary<ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR>(
+          op, subg);
        return;
      case BuiltinOperator::BuiltinOperator_FILL:
        loadFill(op, subg);
        return;
      case BuiltinOperator::BuiltinOperator_ZEROS_LIKE:
-      loadZerosLike(op, subg);
+      loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ZEROS_LIKE);
        return;
      case BuiltinOperator::BuiltinOperator_TILE:
        loadTile(op, subg);
@@ -2049,11 +1931,20 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op,
        loadLogSoftmax(op, subg);
        return;
      case BuiltinOperator::BuiltinOperator_QUANTIZE:
-      loadQuantize(op, subg);
+      loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::QUANTIZE);
        return;
      case BuiltinOperator::BuiltinOperator_SPACE_TO_DEPTH:
        loadSpaceToDepth(op, subg);
        return;
+    case BuiltinOperator::BuiltinOperator_L2_NORMALIZATION:
+      loadL2Normalization(op, subg);
+      break;
+    case BuiltinOperator::BuiltinOperator_LEAKY_RELU:
+      loadLeakyRelu(op, subg);
+      return;
+    case BuiltinOperator::BuiltinOperator_RANK:
+      loadRank(op, subg);
+      return;
      default:
        throw std::runtime_error(
            std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op)));
diff --git a/runtime/onert/frontend/circle/src/circle_loader.cc b/runtime/onert/frontend/circle/src/circle_loader.cc

index 96dd469..92a9ee7 100644 (file)
--- a/runtime/onert/frontend/circle/src/circle_loader.cc
+++ b/runtime/onert/frontend/circle/src/circle_loader.cc
@@ -103,12 +103,14 @@ public:
      // Set inputs
      for (const std::int32_t input_ind : *circle_subg->inputs())
      {
-      subg->addInput(tensorIdxToOperandIdx(input_ind));
+      subg->addInput(tensorIdxToOperandIdx(input_ind),
+                     _tensor_names.at(_tensor_to_operand[input_ind]));
      }
      // Set outputs
      for (const std::int32_t output_ind : *circle_subg->outputs())
      {
-      subg->addOutput(tensorIdxToOperandIdx(output_ind));
+      subg->addOutput(tensorIdxToOperandIdx(output_ind),
+                      _tensor_names.at(_tensor_to_operand[output_ind]));
      }
      // Create operations
      for (const auto *op : *circle_subg->operators())
diff --git a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc

index 8ff6cbb..8e3d83d 100644 (file)
--- a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc
@@ -83,6 +83,189 @@ uint32_t getUint32Scalar(Operands &operands, const OperandIndex index)
  }
  
  OperationFactory::Generator
+getElementwiseActivationGenerator(const onert::ir::operation::ElementwiseActivation::Type op_type,
+                                  float alpha = 0.f, float beta = 0.f)
+{
+  return [op_type, alpha, beta](const OperationFactory::Param &init_param, Operands &) {
+    assert(init_param.input_count == 1);
+    assert(init_param.output_count == 1);
+
+    // Each input should be interpreted as follows:
+    //
+    //  0 -> Input Tensor Index
+
+    OperandIndexSequence inputs{init_param.inputs[0]};
+    OperandIndexSequence outputs{init_param.outputs[0]};
+
+    operation::ElementwiseActivation::Param param;
+    param.op_type = op_type;
+    param.alpha = alpha;
+    param.beta = beta;
+
+    return new operation::ElementwiseActivation{inputs, outputs, param};
+  };
+}
+
+OperationFactory::Generator getElementwiseBinaryGenerator(
+    const onert::ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type)
+{
+  return [op_type](const OperationFactory::Param &init_param, Operands &) {
+    assert(init_param.input_count == 2);
+    assert(init_param.output_count == 1);
+
+    // Each input should be interpreted as follows:
+    //
+    //  0 -> Lefthand side operand
+    //  1 -> Righthand side operand
+
+    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+    OperandIndexSequence outputs{init_param.outputs[0]};
+
+    operation::ElementwiseBinary::Param param;
+    param.op_type = op_type;
+
+    return new operation::ElementwiseBinary{inputs, outputs, param};
+  };
+}
+
+OperationFactory::Generator
+getElementwiseUnaryGenerator(const onert::ir::operation::ElementwiseUnary::Type op_type)
+{
+  return [op_type](const OperationFactory::Param &init_param, Operands &operands) {
+    assert(init_param.input_count == 1);
+    assert(init_param.output_count == 1);
+
+    // Each input should be interpreted as follows:
+    //
+    //  0 ->  Input Tensor Index
+
+    OperandIndexSequence inputs{init_param.inputs[0]};
+    OperandIndexSequence outputs{init_param.outputs[0]};
+
+    operation::ElementwiseUnary::Param param;
+    param.op_type = op_type;
+
+    if (op_type == operation::ElementwiseUnary::Type::CAST)
+    {
+      // NNAPI uses QUANT_UINT8_ASYMM to represent UINT8 type for ANEURALNETWORKS_CAST's
+      // input/output
+      if (operands.at(inputs.at(0)).typeInfo().type() == DataType::QUANT_UINT8_ASYMM)
+      {
+        replaceDataType(operands, inputs.at(0), DataType::UINT8);
+      }
+      if (operands.at(outputs.at(0)).typeInfo().type() == DataType::QUANT_UINT8_ASYMM)
+      {
+        replaceDataType(operands, outputs.at(0), DataType::UINT8);
+      }
+    }
+
+    return new operation::ElementwiseUnary{inputs, outputs, param};
+  };
+}
+
+OperationFactory::Generator
+getBinaryArithmeticGenerator(const onert::ir::operation::BinaryArithmetic::ArithmeticType op_type)
+{
+  return [op_type](const OperationFactory::Param &init_param, Operands &operands) {
+    assert(init_param.input_count == 3);
+    assert(init_param.output_count == 1);
+
+    // Each input should be interpreted as follows:
+    //
+    //  0 -> Lefthand side operand
+    //  1 -> Righthand side operand
+
+    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
+    OperandIndexSequence outputs{init_param.outputs[0]};
+
+    operation::BinaryArithmetic::Param param;
+    param.arithmetic_type = op_type;
+    const auto activation_index = OperandIndex{init_param.inputs[2]};
+    param.activation =
+        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+
+    return new operation::BinaryArithmetic{inputs, outputs, param};
+  };
+}
+
+OperationFactory::Generator
+getPool2DGenerator(const onert::ir::operation::Pool2D::PoolType pool_type)
+{
+  return [pool_type](const OperationFactory::Param &init_param, Operands &operands) {
+    assert(init_param.input_count == 7 || init_param.input_count == 10);
+    assert(init_param.output_count == 1);
+
+    // In common
+    //  0 -> IFM Tensor Index
+    OperandIndexSequence inputs{init_param.inputs[0]};
+    OperandIndexSequence outputs{init_param.outputs[0]};
+
+    operation::Pool2D::Param param;
+    param.op_type = pool_type;
+    if (init_param.input_count == 7) // support implicit padding
+    {
+      // Each input should be interpreted as follows:
+      //
+      //  1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
+      //  2 -> Horizontal (over width) Stride Index
+      //  3 -> Vertial (over height) Stride Index
+      //  4 -> Filter Width Index
+      //  5 -> Filter Height Index
+      //  6 -> FuseCode (activation) Index
+
+      const auto padding_index = OperandIndex{init_param.inputs[1]};
+      const auto hstride_index = OperandIndex{init_param.inputs[2]};
+      const auto vstride_index = OperandIndex{init_param.inputs[3]};
+      const auto kw_index = OperandIndex{init_param.inputs[4]};
+      const auto kh_index = OperandIndex{init_param.inputs[5]};
+      const auto activation_index = OperandIndex{init_param.inputs[6]};
+
+      param.padding.type =
+          NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
+      param.stride = makeStride(operands, hstride_index, vstride_index);
+      param.kw = getUint32Scalar(operands, kw_index);
+      param.kh = operands.at(kh_index).asScalar<uint32_t>();
+      param.activation =
+          NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+    }
+    else // support explicit padding
+    {
+      // Each input should be interpreted as follows:
+      //
+      //  1 -> Padding_left index
+      //  2 -> Padding_right index
+      //  3 -> Padding_top index
+      //  4 -> Padding_bottom index
+      //  5 -> Horizontal (over width) Stride Index
+      //  6 -> Vertial (over height) Stride Index
+      //  7 -> Filter Width Index
+      //  8 -> Filter Height Index
+      //  9 -> FuseCode (activation) Index
+
+      const auto padding_left_index = OperandIndex{init_param.inputs[1]};
+      const auto padding_right_index = OperandIndex{init_param.inputs[2]};
+      const auto padding_top_index = OperandIndex{init_param.inputs[3]};
+      const auto padding_bottom_index = OperandIndex{init_param.inputs[4]};
+      const auto hstride_index = OperandIndex{init_param.inputs[5]};
+      const auto vstride_index = OperandIndex{init_param.inputs[6]};
+      const auto kw_index = OperandIndex{init_param.inputs[7]};
+      const auto kh_index = OperandIndex{init_param.inputs[8]};
+      const auto activation_index = OperandIndex{init_param.inputs[9]};
+
+      param.padding.type = PaddingType::EXPLICIT;
+      param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
+                                                padding_top_index, padding_bottom_index);
+      param.stride = makeStride(operands, hstride_index, vstride_index);
+      param.kw = getUint32Scalar(operands, kw_index);
+      param.kh = getUint32Scalar(operands, kh_index);
+      param.activation =
+          NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+    }
+    return new operation::Pool2D{inputs, outputs, param};
+  };
+}
+
+OperationFactory::Generator
  getReduceGenerator(const onert::ir::operation::Reduce::ReduceType reduce_type)
  {
    return [reduce_type](const OperationFactory::Param &init_param, Operands &operands) {
@@ -133,79 +316,24 @@ Operation *createSimpleBinaryOp(const OperationFactory::Param &init_param, Opera
    return new T{inputs, outputs};
  }
  
-// A generator function for binary ops with no params
-template <typename T>
-Operation *createPool2DOp(const OperationFactory::Param &init_param, Operands &operands)
+OperationFactory::Generator getComparisonGenerator(operation::Comparison::ComparisonType type)
  {
-  assert(init_param.input_count == 7 || init_param.input_count == 10);
-  assert(init_param.output_count == 1);
+  return [type](const OperationFactory::Param &init_param, Operands &) -> Operation * {
+    assert(init_param.input_count == 2 && init_param.output_count == 1);
  
-  // In common
-  //  0 -> IFM Tensor Index
-  OperandIndexSequence inputs{init_param.inputs[0]};
-  OperandIndexSequence outputs{init_param.outputs[0]};
+    OperandIndexSequence outputs{init_param.outputs[0]};
  
-  typename T::Param param;
-  if (init_param.input_count == 7) // support implicit padding
-  {
      // Each input should be interpreted as follows:
      //
-    //  1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
-    //  2 -> Horizontal (over width) Stride Index
-    //  3 -> Vertial (over height) Stride Index
-    //  4 -> Filter Width Index
-    //  5 -> Filter Height Index
-    //  6 -> FuseCode (activation) Index
-
-    const auto padding_index = OperandIndex{init_param.inputs[1]};
-    const auto hstride_index = OperandIndex{init_param.inputs[2]};
-    const auto vstride_index = OperandIndex{init_param.inputs[3]};
-    const auto kw_index = OperandIndex{init_param.inputs[4]};
-    const auto kh_index = OperandIndex{init_param.inputs[5]};
-    const auto activation_index = OperandIndex{init_param.inputs[6]};
+    //  0 -> input0 Tensor Index
+    //  1 -> input1 Tensor Index
+    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
  
-    param.padding.type =
-        NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
-    param.stride = makeStride(operands, hstride_index, vstride_index);
-    param.kw = getUint32Scalar(operands, kw_index);
-    param.kh = operands.at(kh_index).asScalar<uint32_t>();
-    param.activation =
-        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
-  }
-  else // support explicit padding
-  {
-    // Each input should be interpreted as follows:
-    //
-    //  1 -> Padding_left index
-    //  2 -> Padding_right index
-    //  3 -> Padding_top index
-    //  4 -> Padding_bottom index
-    //  5 -> Horizontal (over width) Stride Index
-    //  6 -> Vertial (over height) Stride Index
-    //  7 -> Filter Width Index
-    //  8 -> Filter Height Index
-    //  9 -> FuseCode (activation) Index
-
-    const auto padding_left_index = OperandIndex{init_param.inputs[1]};
-    const auto padding_right_index = OperandIndex{init_param.inputs[2]};
-    const auto padding_top_index = OperandIndex{init_param.inputs[3]};
-    const auto padding_bottom_index = OperandIndex{init_param.inputs[4]};
-    const auto hstride_index = OperandIndex{init_param.inputs[5]};
-    const auto vstride_index = OperandIndex{init_param.inputs[6]};
-    const auto kw_index = OperandIndex{init_param.inputs[7]};
-    const auto kh_index = OperandIndex{init_param.inputs[8]};
-    const auto activation_index = OperandIndex{init_param.inputs[9]};
-
-    param.padding.type = PaddingType::EXPLICIT;
-    param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
-                                              padding_top_index, padding_bottom_index);
-    param.stride = makeStride(operands, hstride_index, vstride_index);
-    param.kw = getUint32Scalar(operands, kw_index);
-    param.kh = getUint32Scalar(operands, kh_index);
-    param.activation =
-        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
-  }
-  return new T{inputs, outputs, param};
+    operation::Comparison::Param param;
+    param.comparison_type = type;
+
+    return new operation::Comparison{inputs, outputs, param};
+  };
  }
  
  } // namespace
@@ -295,9 +423,9 @@ OperationFactory::OperationFactory()
      return new operation::DepthwiseConv2D{inputs, outputs, param};
    };
  
-  _map[ANEURALNETWORKS_MAX_POOL_2D] = createPool2DOp<operation::MaxPool2D>;
+  _map[ANEURALNETWORKS_MAX_POOL_2D] = getPool2DGenerator(operation::Pool2D::PoolType::MAX);
  
-  _map[ANEURALNETWORKS_AVERAGE_POOL_2D] = createPool2DOp<operation::AvgPool2D>;
+  _map[ANEURALNETWORKS_AVERAGE_POOL_2D] = getPool2DGenerator(operation::Pool2D::PoolType::AVG);
  
    _map[ANEURALNETWORKS_CONCATENATION] = [](const OperationFactory::Param &init_param,
                                             Operands &operands) {
@@ -383,27 +511,8 @@ OperationFactory::OperationFactory()
      return new operation::Softmax{inputs, outputs, param};
    };
  
-  _map[ANEURALNETWORKS_CAST] = [](const OperationFactory::Param &init_param, Operands &operands) {
-    assert(init_param.input_count == 1 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //  0 -> input Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0]};
-
-    // NNAPI uses QUANT_UINT8_ASYMM to represent UINT8 type for ANEURALNETWORKS_CAST's input/output
-    if (operands.at(inputs.at(0)).typeInfo().type() == DataType::QUANT_UINT8_ASYMM)
-    {
-      replaceDataType(operands, inputs.at(0), DataType::UINT8);
-    }
-    if (operands.at(outputs.at(0)).typeInfo().type() == DataType::QUANT_UINT8_ASYMM)
-    {
-      replaceDataType(operands, outputs.at(0), DataType::UINT8);
-    }
-
-    return new operation::Cast{inputs, outputs};
-  };
+  _map[ANEURALNETWORKS_CAST] =
+      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::CAST);
  
    // ANEURALNETWORKS_CAST_EX is deprecated
    // TODO Remove ANEURALNETWORKS_CAST_EX
@@ -416,7 +525,8 @@ OperationFactory::OperationFactory()
      // inputCount is either 7 or 10 acccording to NN API specification.
      //  - Padding is implicit when inputCount is 7
      //  - Padding is explicit when inputCount is 10
-    assert(init_param.input_count == 7 || init_param.input_count == 10);
+    assert(init_param.input_count == 7 || init_param.input_count == 10 ||
+           init_param.input_count == 13);
      assert(init_param.output_count == 1);
  
      //  0 -> IFM Tensor Index
@@ -427,7 +537,6 @@ OperationFactory::OperationFactory()
      OperandIndexSequence outputs{init_param.outputs[0]};
  
      Conv2D::Param param;
-
      if (init_param.input_count == 7) // support implicit padding
      {
        // Each input should be interpreted as follows:
@@ -445,6 +554,10 @@ OperationFactory::OperationFactory()
        param.padding.type =
            NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>());
        param.stride = makeStride(operands, hstride_index, vstride_index);
+
+      param.dilation.width_factor = 1;
+      param.dilation.height_factor = 1;
+
        param.activation =
            NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
      }
@@ -472,34 +585,62 @@ OperationFactory::OperationFactory()
        param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
                                                  padding_top_index, padding_bottom_index);
        param.stride = makeStride(operands, hstride_index, vstride_index);
+
+      param.dilation.width_factor = 1;
+      param.dilation.height_factor = 1;
+
        param.activation =
            NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
      }
+    else if (init_param.input_count == 13) // support dilation
+    {
+      // Each input should be interpreted as follows:
+      //
+      //  3 -> Padding_left Index
+      //  4 -> Padding_right Index
+      //  5 -> Padding_top Index
+      //  6 -> Padding_bottom Index
+      //  7 -> Stride (width) Index
+      //  8 -> Stride (height) Index
+      //  9 -> Activation Index
+      //  11 -> Dilation (width_factor) Index
+      //  12 -> Dilation (height_factor) INdex
  
-    return new Conv2D{inputs, outputs, param};
-  };
-
-  _map[ANEURALNETWORKS_ADD] = [](const OperationFactory::Param &init_param, Operands &operands) {
-    assert(init_param.input_count == 3);
-    assert(init_param.output_count == 1);
+      const auto padding_left_index = OperandIndex{init_param.inputs[3]};
+      const auto padding_right_index = OperandIndex{init_param.inputs[4]};
+      const auto padding_top_index = OperandIndex{init_param.inputs[5]};
+      const auto padding_bottom_index = OperandIndex{init_param.inputs[6]};
+      const auto hstride_index = OperandIndex{init_param.inputs[7]};
+      const auto vstride_index = OperandIndex{init_param.inputs[8]};
+      const auto activation_index = OperandIndex{init_param.inputs[9]};
+      const auto width_factor_index = OperandIndex{init_param.inputs[11]};
+      const auto height_factor_index = OperandIndex{init_param.inputs[12]};
  
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> Lefthand side operand
-    //  1 -> Righthand side operand
+      param.padding.type = PaddingType::EXPLICIT;
+      param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index,
+                                                padding_top_index, padding_bottom_index);
+      param.stride = makeStride(operands, hstride_index, vstride_index);
  
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-    OperandIndexSequence outputs{init_param.outputs[0]};
+      auto width_factor = operands.at(width_factor_index).asScalar<int32_t>();
+      auto height_factor = operands.at(height_factor_index).asScalar<int32_t>();
  
-    operation::Add::Param param;
+      param.dilation.width_factor = width_factor;
+      param.dilation.height_factor = height_factor;
  
-    const auto activation_index = OperandIndex{init_param.inputs[2]};
-    param.activation =
-        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+      param.activation =
+          NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
+    }
+    else
+    {
+      throw std::runtime_error{"Conv2D: unsupported input operand count"};
+    }
  
-    return new operation::Add{inputs, outputs, param};
+    return new Conv2D{inputs, outputs, param};
    };
  
+  _map[ANEURALNETWORKS_ADD] =
+      getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::ADD);
+
    _map[ANEURALNETWORKS_ADDV2_EX] = _map[ANEURALNETWORKS_ADD];
  
    _map[ANEURALNETWORKS_REDUCE_SUM] =
@@ -509,26 +650,8 @@ OperationFactory::OperationFactory()
    // TODO Remove ANEURALNETWORKS_REDUCE_SUM_EX
    _map[ANEURALNETWORKS_REDUCE_SUM_EX] = _map[ANEURALNETWORKS_REDUCE_SUM];
  
-  _map[ANEURALNETWORKS_SUB] = [](const OperationFactory::Param &init_param, Operands &operands) {
-    assert(init_param.input_count == 3);
-    assert(init_param.output_count == 1);
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> Lefthand side operand
-    //  1 -> Righthand side operand
-
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    operation::Sub::Param param;
-
-    const auto activation_index = OperandIndex{init_param.inputs[2]};
-    param.activation =
-        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
-
-    return new operation::Sub{inputs, outputs, param};
-  };
+  _map[ANEURALNETWORKS_SUB] =
+      getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::SUB);
  
    _map[ANEURALNETWORKS_SLICE] = [](const OperationFactory::Param &init_param, Operands &) {
      assert(init_param.input_count == 3 && init_param.output_count == 1);
@@ -611,27 +734,8 @@ OperationFactory::OperationFactory()
      return new operation::Transpose{inputs, outputs, param};
    };
  
-  _map[ANEURALNETWORKS_MUL] = [](const OperationFactory::Param &init_param, Operands &operands) {
-    assert(init_param.input_count == 3 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> LHS Tensor Index
-    //  1 -> RHS Tensor Index
-    //  2 -> Activation Index
-
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    operation::Mul::Param param;
-
-    const auto activation_index = OperandIndex{init_param.inputs[2]};
-    param.activation =
-        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
-
-    return new operation::Mul{inputs, outputs, param};
-  };
+  _map[ANEURALNETWORKS_MUL] =
+      getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::MUL);
  
    _map[ANEURALNETWORKS_SQUEEZE] = [](const OperationFactory::Param &init_param,
                                       Operands &operands) {
@@ -672,34 +776,18 @@ OperationFactory::OperationFactory()
      return new operation::Squeeze{inputs, outputs, param};
    };
  
-  _map[ANEURALNETWORKS_TANH] = CreateSimpleUnaryOp<operation::Tanh>;
+  _map[ANEURALNETWORKS_TANH] = getElementwiseActivationGenerator(
+      onert::ir::operation::ElementwiseActivation::Type::TANH, 1.f, 1.f);
  
-  _map[ANEURALNETWORKS_LOG] = CreateSimpleUnaryOp<operation::Log>;
+  _map[ANEURALNETWORKS_LOG] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOG);
  
-  _map[ANEURALNETWORKS_LOGISTIC] = CreateSimpleUnaryOp<operation::Logistic>;
+  _map[ANEURALNETWORKS_LOGISTIC] = getElementwiseActivationGenerator(
+      onert::ir::operation::ElementwiseActivation::Type::LOGISTIC);
  
-  _map[ANEURALNETWORKS_DIV] = [](const OperationFactory::Param &init_param, Operands &operands) {
-    assert(init_param.input_count == 3 && init_param.output_count == 1);
+  _map[ANEURALNETWORKS_DIV] =
+      getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::DIV);
  
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> LHS Tensor Index
-    //  1 -> RHS Tensor Index
-    //  2 -> Activation Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    operation::Div::Param param;
-
-    const auto activation_index = OperandIndex{init_param.inputs[2]};
-    param.activation =
-        NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>());
-
-    return new operation::Div{inputs, outputs, param};
-  };
-
-  _map[ANEURALNETWORKS_EXP] = CreateSimpleUnaryOp<operation::Exp>;
+  _map[ANEURALNETWORKS_EXP] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::EXP);
  
    // ANEURALNETWORKS_EXP_EX is deprecated
    // TODO Remove ANEURALNETWORKS_EXP_EX
@@ -710,39 +798,17 @@ OperationFactory::OperationFactory()
    //  1 -> Axis Tensor Index
    _map[ANEURALNETWORKS_EXPAND_DIMS] = createSimpleBinaryOp<operation::ExpandDims>;
  
-  _map[ANEURALNETWORKS_GREATER] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> input0 Tensor Index
-    //  1 -> input1 Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    operation::Comparison::Param param;
-    param.comparison_type = operation::Comparison::ComparisonType::Greater;
-
-    return new operation::Comparison{inputs, outputs, param};
-  };
-
-  _map[ANEURALNETWORKS_GREATER_EQUAL] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> input0 Tensor Index
-    //  1 -> input1 Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    operation::Comparison::Param param;
-    param.comparison_type = operation::Comparison::ComparisonType::GreaterEqual;
-
-    return new operation::Comparison{inputs, outputs, param};
-  };
+  _map[ANEURALNETWORKS_GREATER] =
+      getComparisonGenerator(operation::Comparison::ComparisonType::Greater);
+  _map[ANEURALNETWORKS_GREATER_EQUAL] =
+      getComparisonGenerator(operation::Comparison::ComparisonType::GreaterEqual);
+  _map[ANEURALNETWORKS_LESS] = getComparisonGenerator(operation::Comparison::ComparisonType::Less);
+  _map[ANEURALNETWORKS_LESS_EQUAL] =
+      getComparisonGenerator(operation::Comparison::ComparisonType::LessEqual);
+  _map[ANEURALNETWORKS_NOT_EQUAL] =
+      getComparisonGenerator(operation::Comparison::ComparisonType::NotEqual);
+  _map[ANEURALNETWORKS_EQUAL] =
+      getComparisonGenerator(operation::Comparison::ComparisonType::Equal);
  
    // ANEURALNETWORKS_GREATER_EQUAL_EX is deprecated
    // TODO Remove ANEURALNETWORKS_GREATER_EQUAL_EX
@@ -767,40 +833,6 @@ OperationFactory::OperationFactory()
      return new operation::Comparison{inputs, outputs, param};
    };
  
-  _map[ANEURALNETWORKS_LESS] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> input0 Tensor Index
-    //  1 -> input1 Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    operation::Comparison::Param param;
-    param.comparison_type = operation::Comparison::ComparisonType::Less;
-
-    return new operation::Comparison{inputs, outputs, param};
-  };
-
-  _map[ANEURALNETWORKS_LESS_EQUAL] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> input0 Tensor Index
-    //  1 -> input1 Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    operation::Comparison::Param param;
-    param.comparison_type = operation::Comparison::ComparisonType::LessEqual;
-
-    return new operation::Comparison{inputs, outputs, param};
-  };
-
    // ANEURALNETWORKS_LESS_EX is deprecated
    // TODO Remove ANEURALNETWORKS_LESS_EX
    _map[ANEURALNETWORKS_LESS_EX] = [](const OperationFactory::Param &init_param,
@@ -837,23 +869,6 @@ OperationFactory::OperationFactory()
    // TODO Remove ANEURALNETWORKS_REDUCE_MAX_EX
    _map[ANEURALNETWORKS_REDUCE_MAX_EX] = _map[ANEURALNETWORKS_REDUCE_MAX];
  
-  _map[ANEURALNETWORKS_NOT_EQUAL] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> input1 Tensor Index
-    //  1 -> input2 Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    operation::Comparison::Param param;
-    param.comparison_type = operation::Comparison::ComparisonType::NotEqual;
-
-    return new operation::Comparison{inputs, outputs, param};
-  };
-
    // ANEURALNETWORKS_NOT_EQUAL_EX is deprecated
    // TODO Remove ANEURALNETWORKS_NOT_EQUAL_EX
    _map[ANEURALNETWORKS_NOT_EQUAL_EX] = [](const OperationFactory::Param &init_param,
@@ -877,7 +892,8 @@ OperationFactory::OperationFactory()
      return new operation::Comparison{inputs, outputs, param};
    };
  
-  _map[ANEURALNETWORKS_LOGICAL_AND] = createSimpleBinaryOp<operation::LogicalAnd>;
+  _map[ANEURALNETWORKS_LOGICAL_AND] = getElementwiseBinaryGenerator(
+      operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND);
  
    // ANEURALNETWORKS_LOGICAL_AND_EX is deprecated
    // TODO Remove ANEURALNETWORKS_LOGICAL_AND_EX
@@ -898,10 +914,14 @@ OperationFactory::OperationFactory()
      replaceDataType(operands, inputs.at(1), DataType::BOOL8);
      replaceDataType(operands, outputs.at(0), DataType::BOOL8);
  
-    return new operation::LogicalAnd{inputs, outputs};
+    operation::ElementwiseBinary::Param param;
+    param.op_type = operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND;
+
+    return new operation::ElementwiseBinary{inputs, outputs, param};
    };
  
-  _map[ANEURALNETWORKS_RSQRT] = CreateSimpleUnaryOp<operation::RSQRT>;
+  _map[ANEURALNETWORKS_RSQRT] =
+      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::RSQRT);
  
    _map[ANEURALNETWORKS_SELECT] = [](const OperationFactory::Param &init_param, Operands &) {
      assert(init_param.input_count == 3 && init_param.output_count == 1);
@@ -937,7 +957,9 @@ OperationFactory::OperationFactory()
    // TODO Remove ANEURALNETWORKS_RSQRT_EX
    _map[ANEURALNETWORKS_RSQRT_EX] = _map[ANEURALNETWORKS_RSQRT];
  
-  _map[ANEURALNETWORKS_RELU] = CreateSimpleUnaryOp<operation::ReLU>;
+  _map[ANEURALNETWORKS_RELU] =
+      getElementwiseActivationGenerator(onert::ir::operation::ElementwiseActivation::Type::RELU,
+                                        onert::ir::operation::ElementwiseActivation::infinity, 0);
  
    _map[ANEURALNETWORKS_RESIZE_BILINEAR] = [](const OperationFactory::Param &init_param,
                                               Operands &operands) {
@@ -960,9 +982,11 @@ OperationFactory::OperationFactory()
      return new operation::ResizeBilinear{inputs, outputs, param};
    };
  
-  _map[ANEURALNETWORKS_RELU1] = CreateSimpleUnaryOp<operation::ReLU1>;
+  _map[ANEURALNETWORKS_RELU1] = getElementwiseActivationGenerator(
+      onert::ir::operation::ElementwiseActivation::Type::RELU, 1.f, -1.f);
  
-  _map[ANEURALNETWORKS_RELU6] = CreateSimpleUnaryOp<operation::ReLU6>;
+  _map[ANEURALNETWORKS_RELU6] = getElementwiseActivationGenerator(
+      onert::ir::operation::ElementwiseActivation::Type::RELU, 6.f, 0.f);
  
    _map[ANEURALNETWORKS_REVERSE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
      assert(init_param.input_count == 2 && init_param.output_count == 1);
@@ -1009,17 +1033,8 @@ OperationFactory::OperationFactory()
      return new operation::RNN{inputs, outputs, param};
    };
  
-  _map[ANEURALNETWORKS_FLOOR] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 1 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //  0 -> input Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0]};
-
-    return new operation::Floor{inputs, outputs};
-  };
+  _map[ANEURALNETWORKS_FLOOR] =
+      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::FLOOR);
  
    _map[ANEURALNETWORKS_SPACE_TO_BATCH_ND] = [](const OperationFactory::Param &init_param,
                                                 Operands &) {
@@ -1059,7 +1074,7 @@ OperationFactory::OperationFactory()
      return new operation::SpaceToDepth{inputs, outputs, param};
    };
  
-  _map[ANEURALNETWORKS_L2_POOL_2D] = createPool2DOp<operation::L2Pool2D>;
+  _map[ANEURALNETWORKS_L2_POOL_2D] = getPool2DGenerator(operation::Pool2D::PoolType::L2);
  
    _map[ANEURALNETWORKS_EMBEDDING_LOOKUP] = [](const OperationFactory::Param &init_param,
                                                Operands &) {
@@ -1157,35 +1172,15 @@ OperationFactory::OperationFactory()
      return new operation::TransposeConv{inputs, outputs, param};
    };
  
-  _map[ANEURALNETWORKS_SQRT] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 1 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //  0 -> input Tensor Index
-
-    OperandIndexSequence inputs{init_param.inputs[0]};
-    return new operation::SQRT{inputs, outputs};
-  };
+  _map[ANEURALNETWORKS_SQRT] =
+      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SQRT);
  
    // ANEURALNETWORKS_SQRT_EX is deprecated
    // TODO Remove ANEURALNETWORKS_SQRT_EX
    _map[ANEURALNETWORKS_SQRT_EX] = _map[ANEURALNETWORKS_SQRT];
  
-  _map[ANEURALNETWORKS_LOGICAL_OR] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> input0 Tensor Index
-    //  1 -> input1 Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    return new operation::LogicalOr{inputs, outputs};
-  };
+  _map[ANEURALNETWORKS_LOGICAL_OR] = getElementwiseBinaryGenerator(
+      operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR);
  
    // ANEURALNETWORKS_LOGICAL_OR_EX is deprecated
    // TODO Remove ANEURALNETWORKS_LOGICAL_OR_EX
@@ -1206,10 +1201,14 @@ OperationFactory::OperationFactory()
      replaceDataType(operands, inputs.at(1), DataType::BOOL8);
      replaceDataType(operands, outputs.at(0), DataType::BOOL8);
  
-    return new operation::LogicalOr{inputs, outputs};
+    operation::ElementwiseBinary::Param param;
+    param.op_type = operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR;
+
+    return new operation::ElementwiseBinary{inputs, outputs, param};
    };
  
-  _map[ANEURALNETWORKS_LOGICAL_NOT] = CreateSimpleUnaryOp<operation::LogicalNot>;
+  _map[ANEURALNETWORKS_LOGICAL_NOT] =
+      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOGICAL_NOT);
  
    // ANEURALNETWORKS_LOGICAL_NOT_EX is deprecated
    // TODO Remove ANEURALNETWORKS_LOGICAL_NOT_EX
@@ -1228,7 +1227,10 @@ OperationFactory::OperationFactory()
      replaceDataType(operands, inputs.at(0), DataType::BOOL8);
      replaceDataType(operands, outputs.at(0), DataType::BOOL8);
  
-    return new operation::LogicalNot{inputs, outputs};
+    operation::ElementwiseUnary::Param param;
+    param.op_type = operation::ElementwiseUnary::Type::LOGICAL_NOT;
+
+    return new operation::ElementwiseUnary{inputs, outputs, param};
    };
  
    _map[ANEURALNETWORKS_LSTM] = [](const OperationFactory::Param &init_param, Operands &operands) {
@@ -1306,23 +1308,6 @@ OperationFactory::OperationFactory()
      return new operation::LSTM{inputs, outputs, param};
    };
  
-  _map[ANEURALNETWORKS_EQUAL] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> input0 Tensor Index
-    //  1 -> input1 Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    operation::Comparison::Param param;
-    param.comparison_type = operation::Comparison::ComparisonType::Equal;
-
-    return new operation::Comparison{inputs, outputs, param};
-  };
-
    // ANEURALNETWORKS_EQUAL_EX is deprecated
    // TODO Remove ANEURALNETWORKS_EQUAL_EX
    _map[ANEURALNETWORKS_EQUAL_EX] = [](const OperationFactory::Param &init_param,
@@ -1409,13 +1394,13 @@ OperationFactory::OperationFactory()
    // TODO Remove ANEURALNETWORKS_GATHER_EX
    _map[ANEURALNETWORKS_GATHER_EX] = _map[ANEURALNETWORKS_GATHER];
  
-  _map[ANEURALNETWORKS_NEG] = CreateSimpleUnaryOp<operation::Neg>;
+  _map[ANEURALNETWORKS_NEG] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::NEG);
  
    // ANEURALNETWORKS_NEG_EX is deprecated
    // TODO Remove ANEURALNETWORKS_NEG_EX
    _map[ANEURALNETWORKS_NEG_EX] = _map[ANEURALNETWORKS_NEG];
  
-  _map[ANEURALNETWORKS_ABS] = CreateSimpleUnaryOp<operation::Abs>;
+  _map[ANEURALNETWORKS_ABS] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ABS);
  
    // ANEURALNETWORKS_ABS_EX is deprecated
    // TODO Remove ANEURALNETWORKS_ABS_EX
@@ -1434,6 +1419,8 @@ OperationFactory::OperationFactory()
  
      operation::ArgMax::Param param;
      param.axis = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<std::int32_t>();
+    // NNAPI ARGMAX output type is always int32
+    param.output_type = DataType::INT32;
  
      return new operation::ArgMax{inputs, outputs, param};
    };
@@ -1442,7 +1429,8 @@ OperationFactory::OperationFactory()
    // TODO Remove ANEURALNETWORKS_ARGMAX_EX
    _map[ANEURALNETWORKS_ARGMAX_EX] = _map[ANEURALNETWORKS_ARGMAX];
  
-  _map[ANEURALNETWORKS_DEQUANTIZE] = CreateSimpleUnaryOp<operation::Dequantize>;
+  _map[ANEURALNETWORKS_DEQUANTIZE] =
+      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::DEQUANTIZE);
  
    _map[ANEURALNETWORKS_MEAN] = [](const OperationFactory::Param &init_param, Operands &operands) {
      assert(init_param.input_count == 3 && init_param.output_count == 1);
@@ -1600,9 +1588,11 @@ OperationFactory::OperationFactory()
  
    _map[ANEURALNETWORKS_PAD_V2] = _map[ANEURALNETWORKS_PAD];
  
-  _map[ANEURALNETWORKS_MINIMUM] = createSimpleBinaryOp<operation::Min>;
+  _map[ANEURALNETWORKS_MINIMUM] =
+      getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MIN);
  
-  _map[ANEURALNETWORKS_MAXIMUM] = createSimpleBinaryOp<operation::Max>;
+  _map[ANEURALNETWORKS_MAXIMUM] =
+      getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MAX);
  
    _map[ANEURALNETWORKS_ONE_HOT_EX] = [](const OperationFactory::Param &init_param,
                                          Operands &operands) {
@@ -1628,23 +1618,10 @@ OperationFactory::OperationFactory()
      return new operation::OneHot{inputs, outputs, param};
    };
  
-  _map[ANEURALNETWORKS_COS_EX] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 1 && init_param.output_count == 1);
-
-    OperandIndexSequence inputs{init_param.inputs[0]};
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    return new operation::Cos{inputs, outputs};
-  };
+  _map[ANEURALNETWORKS_COS_EX] =
+      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::COS);
  
-  _map[ANEURALNETWORKS_SIN] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 1 && init_param.output_count == 1);
-
-    OperandIndexSequence inputs{init_param.inputs[0]};
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    return new operation::Sin{inputs, outputs};
-  };
+  _map[ANEURALNETWORKS_SIN] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SIN);
  
    _map[ANEURALNETWORKS_SHAPE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
      assert(init_param.input_count == 1 && init_param.output_count == 1);
@@ -1658,17 +1635,8 @@ OperationFactory::OperationFactory()
    _map[ANEURALNETWORKS_REDUCE_PROD] =
        getReduceGenerator(onert::ir::operation::Reduce::ReduceType::PROD);
  
-  _map[ANEURALNETWORKS_ROUND_EX] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 1 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //  0 -> input Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0]};
-
-    return new operation::Round{inputs, outputs};
-  };
+  _map[ANEURALNETWORKS_ROUND_EX] =
+      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ROUND);
  
    _map[ANEURALNETWORKS_RANGE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
      assert(init_param.input_count == 3 && init_param.output_count == 1);
@@ -1695,18 +1663,8 @@ OperationFactory::OperationFactory()
    //  1 -> A 1-D tensor, specifying the value
    _map[ANEURALNETWORKS_FILL_EX] = createSimpleBinaryOp<operation::Fill>;
  
-  _map[ANEURALNETWORKS_ZEROS_LIKE_EX] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 1 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //  0 -> input Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0]};
-
-    return new operation::ZerosLike{inputs, outputs};
-  };
-
+  _map[ANEURALNETWORKS_ZEROS_LIKE_EX] =
+      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ZEROS_LIKE);
    // Each input should be interpreted as follows:
    //  0 -> Input Tensor Index
    //  1 -> Multiple Tensor Index
@@ -1845,14 +1803,8 @@ OperationFactory::OperationFactory()
      return new operation::LogSoftmax{inputs, outputs, param};
    };
  
-  _map[ANEURALNETWORKS_QUANTIZE] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 1 && init_param.output_count == 1);
-
-    OperandIndexSequence inputs{init_param.inputs[0]};
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    return new operation::Quantize{inputs, outputs};
-  };
+  _map[ANEURALNETWORKS_QUANTIZE] =
+      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::QUANTIZE);
  }
  
  Operation *OperationFactory::create(ANeuralNetworksOperationType type,
diff --git a/runtime/onert/frontend/tflite/src/tflite_loader.cc b/runtime/onert/frontend/tflite/src/tflite_loader.cc

index 86c2c6b..7eef157 100644 (file)
--- a/runtime/onert/frontend/tflite/src/tflite_loader.cc
+++ b/runtime/onert/frontend/tflite/src/tflite_loader.cc
@@ -90,12 +90,14 @@ public:
      // Set inputs
      for (const std::int32_t input_ind : *tflite_subg->inputs())
      {
-      subg->addInput(tensorIdxToOperandIdx(input_ind));
+      subg->addInput(tensorIdxToOperandIdx(input_ind),
+                     _tensor_names.at(_tensor_to_operand[input_ind]));
      }
      // Set outputs
      for (const std::int32_t output_ind : *tflite_subg->outputs())
      {
-      subg->addOutput(tensorIdxToOperandIdx(output_ind));
+      subg->addOutput(tensorIdxToOperandIdx(output_ind),
+                      _tensor_names.at(_tensor_to_operand[output_ind]));
      }
      // Create operations
      for (const auto *op : *tflite_subg->operators())
diff --git a/runtime/onert/test/core/compiler/Scheduler.cc b/runtime/onert/test/core/compiler/Scheduler.cc

index 94f51dd..50f3964 100644 (file)
--- a/runtime/onert/test/core/compiler/Scheduler.cc
+++ b/runtime/onert/test/core/compiler/Scheduler.cc
@@ -22,9 +22,7 @@
  #include <ir/TypeInfo.h>
  #include <ir/DataType.h>
  
-#include <ir/operation/Add.h>
-#include <ir/operation/Sub.h>
-#include <ir/operation/Mul.h>
+#include <ir/operation/BinaryArithmetic.h>
  #include <ir/operation/FullyConnected.h>
  
  #include <gtest/gtest.h>
@@ -209,8 +207,7 @@ using OIS = OperandIndexSequence;
  template <typename NodeT, typename... Types>
  OperationIndex create(std::shared_ptr<Graph> graph, Types &&... args)
  {
-  typename NodeT::Param op_params{Activation::NONE};
-  auto op = std::make_unique<NodeT>(std::forward<Types>(args)..., op_params);
+  auto op = std::make_unique<NodeT>(std::forward<Types>(args)...);
    auto op_idx = graph->addOperation(std::move(op));
    // For now in scheduler test all operations in tested graphs has same size (for simplicity)
    assert(calcOpSize(graph, op_idx) == OPERATION_SIZE);
@@ -227,17 +224,20 @@ std::shared_ptr<Graph> createStraightGraph()
    auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
    auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
    auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  create<Add>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx});
+  BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE};
+  create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params);
  
    // Create sub node
    auto sub_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
    auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  create<Sub>(graph, OIS{add_out_idx, sub_const_idx}, OIS{sub_out_idx});
+  BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
+  create<BinaryArithmetic>(graph, OIS{add_out_idx, sub_const_idx}, OIS{sub_out_idx}, sub_op_params);
  
    // Create mul node
    auto mul_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
    auto mul_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  create<Mul>(graph, OIS{sub_out_idx, mul_const_idx}, OIS{mul_out_idx});
+  BinaryArithmetic::Param mul_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
+  create<BinaryArithmetic>(graph, OIS{sub_out_idx, mul_const_idx}, OIS{mul_out_idx}, mul_op_params);
  
    graph->finishBuilding();
    return graph;
@@ -261,31 +261,39 @@ std::shared_ptr<Graph> createBranchedGraph()
    auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
    auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
    auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  create<Add>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx});
+  BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE};
+  create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params);
  
    // Create mul1 node
    auto mul1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
    auto mul1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  create<Mul>(graph, OIS{add_out_idx, mul1_const_idx}, OIS{mul1_out_idx});
+  BinaryArithmetic::Param mul1_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
+  create<BinaryArithmetic>(graph, OIS{add_out_idx, mul1_const_idx}, OIS{mul1_out_idx},
+                           mul1_op_params);
  
    // Create mul2 node
    auto mul2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
    auto mul2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  create<Mul>(graph, OIS{mul1_out_idx, mul2_const_idx}, OIS{mul2_out_idx});
+  BinaryArithmetic::Param mul2_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
+  create<BinaryArithmetic>(graph, OIS{mul1_out_idx, mul2_const_idx}, OIS{mul2_out_idx},
+                           mul2_op_params);
  
    // Create fc1 node
    auto fc1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
    auto fc1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  create<FullyConnected>(graph, OIS{add_out_idx, fc1_const_idx}, OIS{fc1_out_idx});
+  FullyConnected::Param fc1_op_params{Activation::NONE};
+  create<FullyConnected>(graph, OIS{add_out_idx, fc1_const_idx}, OIS{fc1_out_idx}, fc1_op_params);
  
    // Create fc2 node
    auto fc2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
    auto fc2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  create<FullyConnected>(graph, OIS{fc1_out_idx, fc2_const_idx}, OIS{fc2_out_idx});
+  FullyConnected::Param fc2_op_params{Activation::NONE};
+  create<FullyConnected>(graph, OIS{fc1_out_idx, fc2_const_idx}, OIS{fc2_out_idx}, fc2_op_params);
  
-  // Create add2 node
+  // Create sub node
    auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
-  create<Sub>(graph, OIS{mul2_out_idx, fc2_out_idx}, OIS{sub_out_idx});
+  BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
+  create<BinaryArithmetic>(graph, OIS{mul2_out_idx, fc2_out_idx}, OIS{sub_out_idx}, sub_op_params);
  
    graph->finishBuilding();
    return graph;
diff --git a/runtime/onert/test/core/exec/ExecInstance.cc b/runtime/onert/test/core/exec/ExecInstance.cc

index 0fcf372..806b47e 100644 (file)
--- a/runtime/onert/test/core/exec/ExecInstance.cc
+++ b/runtime/onert/test/core/exec/ExecInstance.cc
@@ -20,7 +20,7 @@
  #include "ir/Graph.h"
  #include "compiler/Compiler.h"
  #include "exec/Execution.h"
-#include "ir/operation/Add.h"
+#include "ir/operation/BinaryArithmetic.h"
  
  namespace
  {
@@ -54,16 +54,20 @@ public:
          .at(operand_rhs2)
          .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
      // 2nd add operations (result2 <= result1 + rhs2)
-    operation::Add::Param param1;
+    operation::BinaryArithmetic::Param param1;
+    param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
      param1.activation = Activation::NONE;
      auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
      auto output_set1 = OperandIndexSequence{operand_result1};
-    graph->addOperation(std::make_unique<operation::Add>(input_set1, output_set1, param1));
-    operation::Add::Param param2;
+    graph->addOperation(
+        std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
+    operation::BinaryArithmetic::Param param2;
+    param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
      param2.activation = Activation::NONE;
      auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
      auto output_set2 = OperandIndexSequence{operand_result2};
-    graph->addOperation(std::make_unique<operation::Add>(input_set2, output_set2, param2));
+    graph->addOperation(
+        std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
      // Identify model inputs and outputs
      graph->addInput(operand_lhs);
      graph->addInput(operand_rhs1);
diff --git a/runtime/onert/test/core/interp/ExecManager.cc b/runtime/onert/test/core/interp/ExecManager.cc

index 2e295ef..09190bc 100644 (file)
--- a/runtime/onert/test/core/interp/ExecManager.cc
+++ b/runtime/onert/test/core/interp/ExecManager.cc
@@ -21,7 +21,7 @@
  #include "ir/Graph.h"
  #include "interp/InterpExecutor.h"
  #include "exec/Execution.h"
-#include "ir/operation/Add.h"
+#include "ir/operation/BinaryArithmetic.h"
  
  namespace
  {
@@ -57,11 +57,13 @@ protected:
  
      // Add operations
  
-    operation::Add::Param param;
+    operation::BinaryArithmetic::Param param;
+    param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
      param.activation = Activation::NONE;
      auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
      auto output_set = OperandIndexSequence{operand_result};
-    _graph->addOperation(std::make_unique<operation::Add>(input_set, output_set, param));
+    _graph->addOperation(
+        std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
  
      // Identify model inputs and outputs
  
@@ -112,17 +114,21 @@ protected:
  
      // 2nd add operations (result2 <= result1 + rhs2)
  
-    operation::Add::Param param1;
+    operation::BinaryArithmetic::Param param1;
+    param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
      param1.activation = Activation::NONE;
      auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
      auto output_set1 = OperandIndexSequence{operand_result1};
-    _graph->addOperation(std::make_unique<operation::Add>(input_set1, output_set1, param1));
+    _graph->addOperation(
+        std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
  
-    operation::Add::Param param2;
+    operation::BinaryArithmetic::Param param2;
+    param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
      param2.activation = Activation::NONE;
      auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
      auto output_set2 = OperandIndexSequence{operand_result2};
-    _graph->addOperation(std::make_unique<operation::Add>(input_set2, output_set2, param2));
+    _graph->addOperation(
+        std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
  
      // Identify model inputs and outputs
  
@@ -170,11 +176,13 @@ protected:
  
      // Add operations
  
-    operation::Add::Param param;
+    operation::BinaryArithmetic::Param param;
+    param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
      param.activation = Activation::NONE;
      auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
      auto output_set = OperandIndexSequence{operand_result};
-    _graph->addOperation(std::make_unique<operation::Add>(input_set, output_set, param));
+    _graph->addOperation(
+        std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
  
      // Identify model inputs and outputs
  
diff --git a/runtime/onert/test/util/ShapeInference.cc b/runtime/onert/test/util/ShapeInference.cc

index 63a948d..aab33fa 100644 (file)
--- a/runtime/onert/test/util/ShapeInference.cc
+++ b/runtime/onert/test/util/ShapeInference.cc
@@ -47,8 +47,9 @@ TEST(ShapeInference, Pool2DNodeSame)
    Stride stride{3, 7};
    Padding padding{PaddingType::SAME};
  
-  operation::AvgPool2D::Param avg_pool_param{3, 6, stride, padding, Activation::NONE};
-  auto infered_out_shape = onert::shape_inference::inferAvgPoolShape(in_shape, avg_pool_param);
+  operation::Pool2D::Param avg_pool_param{
+      operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+  auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
  
    ASSERT_EQ(infered_out_shape.rank(), 4);
    ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
@@ -56,8 +57,9 @@ TEST(ShapeInference, Pool2DNodeSame)
    ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
    ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
  
-  operation::MaxPool2D::Param max_pool_param{3, 6, stride, padding, Activation::NONE};
-  infered_out_shape = onert::shape_inference::inferMaxPoolShape(in_shape, max_pool_param);
+  operation::Pool2D::Param max_pool_param{
+      operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+  infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
  
    ASSERT_EQ(infered_out_shape.rank(), 4);
    ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
@@ -72,8 +74,9 @@ TEST(ShapeInference, Pool2DNodeValid)
    Stride stride{3, 7};
    Padding padding{PaddingType::VALID};
  
-  operation::AvgPool2D::Param avg_pool_param{3, 6, stride, padding, Activation::NONE};
-  auto infered_out_shape = onert::shape_inference::inferAvgPoolShape(in_shape, avg_pool_param);
+  operation::Pool2D::Param avg_pool_param{
+      operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+  auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
  
    ASSERT_EQ(infered_out_shape.rank(), 4);
    ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
@@ -81,8 +84,9 @@ TEST(ShapeInference, Pool2DNodeValid)
    ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
    ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
  
-  operation::MaxPool2D::Param max_pool_param{3, 6, stride, padding, Activation::NONE};
-  infered_out_shape = onert::shape_inference::inferMaxPoolShape(in_shape, max_pool_param);
+  operation::Pool2D::Param max_pool_param{
+      operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+  infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
  
    ASSERT_EQ(infered_out_shape.rank(), 4);
    ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
@@ -98,8 +102,9 @@ TEST(ShapeInference, Pool2DNodeExplicit)
    Stride stride{3, 7};
    Padding padding{4, 3, 2, 1};
  
-  operation::AvgPool2D::Param avg_pool_param{3, 6, stride, padding, Activation::NONE};
-  auto infered_out_shape = onert::shape_inference::inferAvgPoolShape(in_shape, avg_pool_param);
+  operation::Pool2D::Param avg_pool_param{
+      operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+  auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
  
    ASSERT_EQ(infered_out_shape.rank(), 4);
    ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
@@ -107,8 +112,9 @@ TEST(ShapeInference, Pool2DNodeExplicit)
    ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
    ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
  
-  operation::MaxPool2D::Param max_pool_param{3, 6, stride, padding, Activation::NONE};
-  infered_out_shape = onert::shape_inference::inferMaxPoolShape(in_shape, max_pool_param);
+  operation::Pool2D::Param max_pool_param{
+      operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+  infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
  
    ASSERT_EQ(infered_out_shape.rank(), 4);
    ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
@@ -122,7 +128,8 @@ TEST(ShapeInference, Conv2D)
    Shape in_shape{10, 6, 12, 20};
    Shape ker_shape{30, 3, 6, 20};
  
-  operation::Conv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, Activation::NONE};
+  operation::Conv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, Activation::NONE,
+                                 Dilation{1, 1}};
    auto infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
  
    ASSERT_EQ(infered_out_shape.rank(), 4);
@@ -131,7 +138,8 @@ TEST(ShapeInference, Conv2D)
    ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
    ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
  
-  param = operation::Conv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, Activation::NONE};
+  param = operation::Conv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, Activation::NONE,
+                                   Dilation{1, 1}};
    infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
  
    ASSERT_EQ(infered_out_shape.rank(), 4);
@@ -140,7 +148,8 @@ TEST(ShapeInference, Conv2D)
    ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
    ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
  
-  param = operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE};
+  param =
+      operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE, Dilation{1, 1}};
    infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
  
    ASSERT_EQ(infered_out_shape.rank(), 4);
diff --git a/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_cl b/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_cl

index 3a6b40d..984dbfa 100644 (file)
--- a/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_cl
+++ b/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_cl
@@ -38,6 +38,14 @@ GeneratedTests.conv_quant8_channels_weights_as_inputs
  GeneratedTests.conv_quant8_large_weights_as_inputs
  GeneratedTests.conv_quant8_overflow_weights_as_inputs
  GeneratedTests.conv_quant8_weights_as_inputs
+GeneratedTests.conv2d_dilation_nnfw
+GeneratedTests.conv2d_dilation_nnfw_quant8
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8
+GeneratedTests.conv2d_dilation_nnfw_2
+GeneratedTests.conv2d_dilation_nnfw_quant8_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8_2
  GeneratedTests.cos_ex_1D_float_nnfw
  GeneratedTests.cos_ex_4D_float_nnfw
  GeneratedTests.cos_ex_dynamic_nnfw
@@ -77,6 +85,8 @@ GeneratedTests.log_softmax_nnfw
  GeneratedTests.log_softmax_nnfw_2
  GeneratedTests.log_softmax_nnfw_3
  GeneratedTests.log_softmax_nnfw_4
+GeneratedTests.log_softmax_nnfw_5
+GeneratedTests.log_softmax_nnfw_quant8
  GeneratedTests.logical_not
  GeneratedTests.logical_not_1D_nnfw
  GeneratedTests.logical_not_4D_nnfw
diff --git a/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon b/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon

index f4bd48b..a7bedf1 100644 (file)
--- a/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon
+++ b/tests/nnapi/nnapi_gtest.skip.aarch64-linux.acl_neon
@@ -37,6 +37,14 @@ GeneratedTests.conv_quant8_channels_weights_as_inputs
  GeneratedTests.conv_quant8_large_weights_as_inputs
  GeneratedTests.conv_quant8_overflow_weights_as_inputs
  GeneratedTests.conv_quant8_weights_as_inputs
+GeneratedTests.conv2d_dilation_nnfw
+GeneratedTests.conv2d_dilation_nnfw_quant8
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8
+GeneratedTests.conv2d_dilation_nnfw_2
+GeneratedTests.conv2d_dilation_nnfw_quant8_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8_2
  GeneratedTests.cos_ex_1D_float_nnfw
  GeneratedTests.cos_ex_4D_float_nnfw
  GeneratedTests.cos_ex_dynamic_nnfw
@@ -80,6 +88,8 @@ GeneratedTests.log_softmax_nnfw
  GeneratedTests.log_softmax_nnfw_2
  GeneratedTests.log_softmax_nnfw_3
  GeneratedTests.log_softmax_nnfw_4
+GeneratedTests.log_softmax_nnfw_5
+GeneratedTests.log_softmax_nnfw_quant8
  GeneratedTests.logical_not
  GeneratedTests.logical_not_1D_nnfw
  GeneratedTests.logical_not_4D_nnfw
diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_cl b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_cl

index 3a6b40d..984dbfa 100644 (file)
--- a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_cl
+++ b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_cl
@@ -38,6 +38,14 @@ GeneratedTests.conv_quant8_channels_weights_as_inputs
  GeneratedTests.conv_quant8_large_weights_as_inputs
  GeneratedTests.conv_quant8_overflow_weights_as_inputs
  GeneratedTests.conv_quant8_weights_as_inputs
+GeneratedTests.conv2d_dilation_nnfw
+GeneratedTests.conv2d_dilation_nnfw_quant8
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8
+GeneratedTests.conv2d_dilation_nnfw_2
+GeneratedTests.conv2d_dilation_nnfw_quant8_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8_2
  GeneratedTests.cos_ex_1D_float_nnfw
  GeneratedTests.cos_ex_4D_float_nnfw
  GeneratedTests.cos_ex_dynamic_nnfw
@@ -77,6 +85,8 @@ GeneratedTests.log_softmax_nnfw
  GeneratedTests.log_softmax_nnfw_2
  GeneratedTests.log_softmax_nnfw_3
  GeneratedTests.log_softmax_nnfw_4
+GeneratedTests.log_softmax_nnfw_5
+GeneratedTests.log_softmax_nnfw_quant8
  GeneratedTests.logical_not
  GeneratedTests.logical_not_1D_nnfw
  GeneratedTests.logical_not_4D_nnfw
diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon

index fcd8b3e..036c869 100644 (file)
--- a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon
+++ b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon
@@ -37,6 +37,14 @@ GeneratedTests.conv_quant8_channels_weights_as_inputs
  GeneratedTests.conv_quant8_large_weights_as_inputs
  GeneratedTests.conv_quant8_overflow_weights_as_inputs
  GeneratedTests.conv_quant8_weights_as_inputs
+GeneratedTests.conv2d_dilation_nnfw
+GeneratedTests.conv2d_dilation_nnfw_quant8
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8
+GeneratedTests.conv2d_dilation_nnfw_2
+GeneratedTests.conv2d_dilation_nnfw_quant8_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8_2
  GeneratedTests.cos_ex_1D_float_nnfw
  GeneratedTests.cos_ex_4D_float_nnfw
  GeneratedTests.cos_ex_dynamic_nnfw
@@ -79,6 +87,8 @@ GeneratedTests.log_softmax_nnfw
  GeneratedTests.log_softmax_nnfw_2
  GeneratedTests.log_softmax_nnfw_3
  GeneratedTests.log_softmax_nnfw_4
+GeneratedTests.log_softmax_nnfw_5
+GeneratedTests.log_softmax_nnfw_quant8
  GeneratedTests.logical_not
  GeneratedTests.logical_not_1D_nnfw
  GeneratedTests.logical_not_4D_nnfw
diff --git a/tests/nnapi/nnapi_gtest.skip.noarch.interp b/tests/nnapi/nnapi_gtest.skip.noarch.interp

index a0ae9d3..9e51e75 100644 (file)
--- a/tests/nnapi/nnapi_gtest.skip.noarch.interp
+++ b/tests/nnapi/nnapi_gtest.skip.noarch.interp
@@ -85,6 +85,14 @@ GeneratedTests.conv_quant8_large_weights_as_inputs
  GeneratedTests.conv_quant8_overflow
  GeneratedTests.conv_quant8_overflow_weights_as_inputs
  GeneratedTests.conv_quant8_weights_as_inputs
+GeneratedTests.conv2d_dilation_nnfw
+GeneratedTests.conv2d_dilation_nnfw_quant8
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8
+GeneratedTests.conv2d_dilation_nnfw_2
+GeneratedTests.conv2d_dilation_nnfw_quant8_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_2
+GeneratedTests.conv2d_dilation_nnfw_weight_as_input_quant8_2
  GeneratedTests.cos_ex_1D_float_nnfw
  GeneratedTests.cos_ex_4D_float_nnfw
  GeneratedTests.cos_ex_dynamic_nnfw
@@ -216,6 +224,8 @@ GeneratedTests.log_softmax_nnfw
  GeneratedTests.log_softmax_nnfw_2
  GeneratedTests.log_softmax_nnfw_3
  GeneratedTests.log_softmax_nnfw_4
+GeneratedTests.log_softmax_nnfw_5
+GeneratedTests.log_softmax_nnfw_quant8
  GeneratedTests.logical_and_1D_nnfw
  GeneratedTests.logical_and_2D_nnfw
  GeneratedTests.logical_and_3D_nnfw
diff --git a/tests/nnapi/specs/V1_2/conv2d_dilation_nnfw.mod.py b/tests/nnapi/specs/V1_2/conv2d_dilation_nnfw.mod.py

new file mode 100644 (file)

index 0000000..95ef901
--- /dev/null
+++ b/tests/nnapi/specs/V1_2/conv2d_dilation_nnfw.mod.py
@@ -0,0 +1,69 @@
+#
+# Copyright (C) 2018 The Android Open Source Project
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+layout = BoolScalar("layout", False) # NHWC
+
+# TEST 1: dilation set to 1 (default)
+i1 = Input("op1", "TENSOR_FLOAT32", "{1, 3, 3, 1}")
+f1 = Parameter("op2", "TENSOR_FLOAT32", "{1, 2, 2, 1}", [.25, .25, .25, .25])
+b1 = Parameter("op3", "TENSOR_FLOAT32", "{1}", [0])
+o1 = Output("op4", "TENSOR_FLOAT32", "{1, 2, 2, 1}")
+Model().Operation("CONV_2D", i1, f1, b1, 0, 0, 0, 0, 1, 1, 0, layout, 1, 1).To(o1)
+
+# Additional data type
+quant8 = DataTypeConverter().Identify({
+    i1: ("TENSOR_QUANT8_ASYMM", 0.5, 0),
+    f1: ("TENSOR_QUANT8_ASYMM", 0.125, 0),
+    b1: ("TENSOR_INT32", 0.0625, 0),
+    o1: ("TENSOR_QUANT8_ASYMM", 0.125, 0)
+})
+
+# Instantiate an example
+example = Example({
+    i1: [1.0, 1.0, 1.0, 1.0, 0.5, 1.0, 1.0, 1.0, 1.0],
+    o1: [.875, .875, .875, .875]
+}).AddInput(f1, b1).AddVariations("relaxed", quant8, "float16")
+
+
+# TEST 2: dilation set to 3
+i2 = Input("op1", "TENSOR_FLOAT32", "{1, 9, 9, 1}")
+f2 = Parameter("op2", "TENSOR_FLOAT32", "{1, 3, 3, 1}", [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0])
+b2 = Parameter("op3", "TENSOR_FLOAT32", "{1}", [0])
+o2 = Output("op4", "TENSOR_FLOAT32", "{1, 3, 3, 1}")
+Model().Operation("CONV_2D", i2, f2, b2, 0, 0, 0, 0, 1, 1, 0, layout, 3, 3).To(o2)
+
+# Additional data type
+quant8 = DataTypeConverter().Identify({
+    i2: ("TENSOR_QUANT8_ASYMM", 0.5, 0),
+    f2: ("TENSOR_QUANT8_ASYMM", 0.125, 0),
+    b2: ("TENSOR_INT32", 0.0625, 0),
+    o2: ("TENSOR_QUANT8_ASYMM", 0.125, 0)
+})
+
+# Instantiate an example
+example = Example({
+    i2: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+         0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0,
+         0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0,
+         0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0,
+         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+    o2: [5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0]
+}).AddInput(f2, b2).AddVariations("relaxed", quant8, "float16")
diff --git a/tests/nnapi/specs/V1_2/log_softmax_nnfw.mod.py b/tests/nnapi/specs/V1_2/log_softmax_nnfw.mod.py

index 347e1b0..580fd9e 100644 (file)
--- a/tests/nnapi/specs/V1_2/log_softmax_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/log_softmax_nnfw.mod.py
@@ -71,3 +71,26 @@ test(
      output_data=[-4.14297, -10.14297, -2.14297, -.142971,
                   -7.00104, -12.00104, -.00104087, -9.00104],
  )
+
+def quant8_test(input0, output0, input_data, beta, axis, output_data):
+  model = Model().Operation("LOG_SOFTMAX", input0, beta, axis).To(output0)
+  quant8 = DataTypeConverter().Identify({
+    input0: ["TENSOR_QUANT8_ASYMM", 10 / 255.0],
+    output0: ["TENSOR_QUANT8_ASYMM", 16 / 256.0, 255],
+  })
+
+  Example({
+      input0: input_data,
+      output0: output_data,
+  }, model=model).AddVariations(quant8)
+
+quant8_test(
+    input0=Input("input0", "TENSOR_FLOAT32", "{1, 1, 2, 4}"),
+    output0=Output("output0", "TENSOR_FLOAT32", "{1, 1, 2, 4}"),
+    input_data=[0, 6, 2, 4,
+                3, 2, 10, 1],
+    beta=1.0,
+    axis=3,
+    output_data=[-6.145078, -.145078, -4.145078, -2.145078,
+                 -7.001370, -8.001370, -.001370, -9.001370],
+)
diff --git a/tests/nnfw_api/CMakeLists.txt b/tests/nnfw_api/CMakeLists.txt

index 6e0696d..aa3a942 100644 (file)
--- a/tests/nnfw_api/CMakeLists.txt
+++ b/tests/nnfw_api/CMakeLists.txt
@@ -14,6 +14,11 @@ file(GLOB_RECURSE RUNTIME_NNFW_API_TEST_SRC "src/*.cc" "src/*.cpp")
  
  add_executable(${RUNTIME_NNFW_API_TEST} ${RUNTIME_NNFW_API_TEST_SRC})
  
+nnfw_find_package(ARMCompute QUIET)
+if(ARMCompute_FOUND)
+  target_compile_definitions(${RUNTIME_NNFW_API_TEST} PRIVATE TEST_ACL_BACKEND)
+endif(ARMCompute_FOUND)
+
  set(RUNTIME_NNFW_API_TEST_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/include
                                    ${CMAKE_CURRENT_SOURCE_DIR}/src)
  target_include_directories(${RUNTIME_NNFW_API_TEST} PRIVATE ${RUNTIME_NNFW_API_TEST_INCLUDE})
diff --git a/tests/nnfw_api/src/CircleGen.cc b/tests/nnfw_api/src/CircleGen.cc

new file mode 100644 (file)

index 0000000..19cb95f
--- /dev/null
+++ b/tests/nnfw_api/src/CircleGen.cc
@@ -0,0 +1,240 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleGen.h"
+
+CircleGen::CircleGen() : _subgraph_contexts(1) // Create primary subgraph
+{
+  // 0th buffer is always the empty buffer for non-const tensors
+  addBuffer(nullptr, 0);
+}
+
+template <typename T> uint32_t addBuffer(const std::vector<T> &buf_vec)
+{
+  auto buf = reinterpret_cast<const uint8_t *>(buf_vec.data());
+  auto size = buf_vec.size() * sizeof(T);
+  return addBuffer(buf, size);
+}
+
+uint32_t CircleGen::addBuffer(const uint8_t *buf, size_t size)
+{
+  uint32_t ind = _buffers.size();
+  _buffers.emplace_back(buildBuffer(buf, size));
+  return ind;
+}
+
+uint32_t CircleGen::addTensor(const TensorParams &params)
+{
+  int ind = curSubgCtx().tensors.size();
+  curSubgCtx().tensors.emplace_back(buildTensor(params));
+  return ind;
+}
+
+void CircleGen::setInputsAndOutputs(const std::vector<int> &inputs, const std::vector<int> &outputs)
+{
+  curSubgCtx().inputs = inputs;
+  curSubgCtx().outputs = outputs;
+}
+
+uint32_t CircleGen::nextSubgraph()
+{
+  uint32_t ind = _subgraph_contexts.size();
+  _subgraph_contexts.push_back({});
+  return ind;
+}
+
+CircleBuffer CircleGen::finish()
+{
+  std::vector<flatbuffers::Offset<circle::SubGraph>> subgraphs;
+  for (auto &ctx : _subgraph_contexts)
+    subgraphs.push_back(buildSubGraph(ctx));
+  auto model =
+      circle::CreateModelDirect(_fbb, 3, &_opcodes, &subgraphs, "CircleGen generated", &_buffers);
+  _fbb.Finish(model);
+  return CircleBuffer{std::move(_fbb)};
+}
+
+// ===== Add Operator methods begin =====
+
+uint32_t CircleGen::addOperatorAdd(const OperatorParams &params,
+                                   circle::ActivationFunctionType actfn)
+{
+  auto options = circle::CreateAddOptions(_fbb, actfn).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_ADD,
+                                circle::BuiltinOptions_AddOptions, options);
+}
+
+uint32_t CircleGen::addOperatorAveragePool2D(const OperatorParams &params, circle::Padding padding,
+                                             int stride_w, int stride_h, int filter_w, int filter_h,
+                                             circle::ActivationFunctionType actfn)
+{
+  auto options =
+      circle::CreatePool2DOptions(_fbb, padding, stride_w, stride_h, filter_w, filter_h, actfn)
+          .Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_AVERAGE_POOL_2D,
+                                circle::BuiltinOptions_Pool2DOptions, options);
+}
+
+uint32_t CircleGen::addOperatorConcatenation(const OperatorParams &params, int axis,
+                                             circle::ActivationFunctionType actfn)
+{
+  auto options = circle::CreateConcatenationOptions(_fbb, axis, actfn).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_CONCATENATION,
+                                circle::BuiltinOptions_ConcatenationOptions, options);
+}
+
+uint32_t CircleGen::addOperatorCos(const OperatorParams &params)
+{
+  auto options = circle::CreateCosOptions(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_COS,
+                                circle::BuiltinOptions_CosOptions, options);
+}
+
+uint32_t CircleGen::addOperatorL2Normalization(const OperatorParams &params)
+{
+  auto options = circle::CreateL2NormOptions(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_L2_NORMALIZATION,
+                                circle::BuiltinOptions_L2NormOptions, options);
+}
+
+uint32_t CircleGen::addOperatorLess(const OperatorParams &params)
+{
+  auto options = circle::CreateLessOptions(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_LESS,
+                                circle::BuiltinOptions_LessOptions, options);
+}
+
+uint32_t CircleGen::addOperatorLeakyRelu(const OperatorParams &params, float alpha)
+{
+  auto options = circle::CreateLeakyReluOptions(_fbb, alpha).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_LEAKY_RELU,
+                                circle::BuiltinOptions_LeakyReluOptions, options);
+}
+
+uint32_t CircleGen::addOperatorNeg(const OperatorParams &params)
+{
+  auto options = circle::CreatePadOptions(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_NEG,
+                                circle::BuiltinOptions_NegOptions, options);
+}
+
+uint32_t CircleGen::addOperatorPad(const OperatorParams &params)
+{
+  auto options = circle::CreatePadOptions(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_PAD,
+                                circle::BuiltinOptions_PadOptions, options);
+}
+
+uint32_t CircleGen::addOperatorPadV2(const OperatorParams &params)
+{
+  auto options = circle::CreatePadOptions(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_PADV2,
+                                circle::BuiltinOptions_PadV2Options, options);
+}
+
+uint32_t CircleGen::addOperatorRank(const OperatorParams &params)
+{
+  auto options = circle::CreateRankOptions(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_RANK,
+                                circle::BuiltinOptions_RankOptions, options);
+}
+
+uint32_t CircleGen::addOperatorResizeNearestNeighbor(const OperatorParams &params)
+{
+  auto options = circle::CreateResizeNearestNeighborOptions(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
+                                circle::BuiltinOptions_ResizeNearestNeighborOptions, options);
+}
+
+uint32_t CircleGen::addOperatorWhile(const OperatorParams &params, uint32_t cond_subg,
+                                     uint32_t body_subg)
+{
+  auto options = circle::CreateWhileOptions(_fbb, cond_subg, body_subg).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_WHILE,
+                                circle::BuiltinOptions_WhileOptions, options);
+}
+
+// NOTE Please add addOperator functions ABOVE this lie
+//
+// %  How to add a new addOperatorXXX fuction
+// 0. Copy code from one of the existing addOperatorXXX function
+// 1. Change the function signature (need BuiltinOperator params)
+// 2. Change enum BuiltinOperator
+// 3. Change enum BuiltinOptions
+// 4. Change CreateXXXOptions accordingly
+
+// ===== Add Operator methods end =====
+
+uint32_t CircleGen::addOperatorWithOptions(const OperatorParams &params,
+                                           circle::BuiltinOperator opcode,
+                                           circle::BuiltinOptions options_type,
+                                           flatbuffers::Offset<void> options)
+{
+  uint32_t opcode_ind = addOperatorCode(opcode);
+  auto op = circle::CreateOperatorDirect(_fbb, opcode_ind, &params.inputs, &params.outputs,
+                                         options_type, options);
+
+  uint32_t ind = curSubgCtx().operators.size();
+  curSubgCtx().operators.emplace_back(op);
+  return ind;
+}
+
+uint32_t CircleGen::addOperatorCode(circle::BuiltinOperator opcode)
+{
+  // TODO If the same OperatorCode is registered already, just return it
+  uint32_t ind = _opcodes.size();
+  _opcodes.emplace_back(circle::CreateOperatorCode(_fbb, opcode));
+  return ind;
+}
+
+flatbuffers::Offset<circle::Buffer> CircleGen::buildBuffer(const uint8_t *buf, size_t size)
+{
+  if (buf == nullptr && size == 0)
+    return circle::CreateBuffer(_fbb);
+  auto buffer = _fbb.CreateVector(buf, size);
+  return circle::CreateBuffer(_fbb, buffer);
+}
+
+flatbuffers::Offset<circle::Tensor> CircleGen::buildTensor(const TensorParams &params)
+{
+  auto shape = _fbb.CreateVector(params.shape);
+  auto name = _fbb.CreateString(params.name);
+  return circle::CreateTensor(_fbb, shape, params.tensor_type, params.buffer, name,
+                              0 /* QuantParam */, false /* is_variable */, 0 /* sparsity */,
+                              0 /* shape_signature */);
+}
+
+flatbuffers::Offset<circle::SubGraph> CircleGen::buildSubGraph(const SubgraphContext &ctx)
+{
+  return circle::CreateSubGraphDirect(_fbb, &ctx.tensors, &ctx.inputs, &ctx.outputs, &ctx.operators,
+                                      nullptr);
+}
diff --git a/tests/nnfw_api/src/CircleGen.h b/tests/nnfw_api/src/CircleGen.h

index 899c800..09ca5a5 100644 (file)
--- a/tests/nnfw_api/src/CircleGen.h
+++ b/tests/nnfw_api/src/CircleGen.h
@@ -36,8 +36,8 @@ public:
      _fbb.Finished(); // The build must have been finished, so check that here
    }
  
-  uint8_t *buffer() { return _fbb.GetBufferPointer(); }
-  size_t size() { return _fbb.GetSize(); }
+  uint8_t *buffer() const { return _fbb.GetBufferPointer(); }
+  size_t size() const { return _fbb.GetSize(); }
  
  private:
    flatbuffers::FlatBufferBuilder _fbb;
@@ -67,12 +67,16 @@ public:
      int version = 1;
    };
  
-public:
-  CircleGen()
+  struct SubgraphContext
    {
-    // 0th buffer is always the empty buffer for non-const tensors
-    addBuffer(nullptr, 0);
-  }
+    std::vector<int> inputs;
+    std::vector<int> outputs;
+    std::vector<flatbuffers::Offset<circle::Tensor>> tensors;
+    std::vector<flatbuffers::Offset<circle::Operator>> operators;
+  };
+
+public:
+  CircleGen();
  
    template <typename T> uint32_t addBuffer(const std::vector<T> &buf_vec)
    {
@@ -80,122 +84,50 @@ public:
      auto size = buf_vec.size() * sizeof(T);
      return addBuffer(buf, size);
    }
-
-  uint32_t addBuffer(const uint8_t *buf, size_t size)
-  {
-    uint32_t ind = _buffers.size();
-    _buffers.emplace_back(buildBuffer(buf, size));
-    return ind;
-  }
-
-  uint32_t addTensor(const TensorParams &params)
-  {
-    int ind = _tensors.size();
-    _tensors.emplace_back(buildTensor(params));
-    return ind;
-  }
-
-  uint32_t setInputsAndOutputs(const std::vector<int> &inputs, const std::vector<int> &outputs)
-  {
-    _inputs = inputs;
-    _outputs = outputs;
-  }
-
-  CircleBuffer finish()
-  {
-    // TODO Support multiple subgraphs, for now only single subgraph model is supported.
-    std::vector<flatbuffers::Offset<circle::SubGraph>> subgraphs{buildSubGraph()};
-    auto model =
-        circle::CreateModelDirect(_fbb, 3, &_opcodes, &subgraphs, "CircleGen generated", &_buffers);
-    _fbb.Finish(model);
-    return CircleBuffer{std::move(_fbb)};
-  }
+  uint32_t addBuffer(const uint8_t *buf, size_t size);
+  uint32_t addTensor(const TensorParams &params);
+  void setInputsAndOutputs(const std::vector<int> &inputs, const std::vector<int> &outputs);
+  uint32_t nextSubgraph();
+  CircleBuffer finish();
  
    // ===== Add Operator methods begin =====
  
-  uint32_t addOperatorAdd(const OperatorParams &params, circle::ActivationFunctionType actfn)
-  {
-    auto options = circle::CreateAddOptions(_fbb, actfn).Union();
-    return addOperatorWithOptions(params, circle::BuiltinOperator_ADD,
-                                  circle::BuiltinOptions_AddOptions, options);
-  }
-
+  uint32_t addOperatorAdd(const OperatorParams &params, circle::ActivationFunctionType actfn);
    uint32_t addOperatorAveragePool2D(const OperatorParams &params, circle::Padding padding,
                                      int stride_w, int stride_h, int filter_w, int filter_h,
-                                    circle::ActivationFunctionType actfn)
-  {
-    auto options =
-        circle::CreatePool2DOptions(_fbb, padding, stride_w, stride_h, filter_w, filter_h, actfn)
-            .Union();
-    return addOperatorWithOptions(params, circle::BuiltinOperator_AVERAGE_POOL_2D,
-                                  circle::BuiltinOptions_Pool2DOptions, options);
-  }
+                                    circle::ActivationFunctionType actfn);
+  uint32_t addOperatorConcatenation(const OperatorParams &params, int axis,
+                                    circle::ActivationFunctionType actfn);
+  uint32_t addOperatorCos(const OperatorParams &params);
+  uint32_t addOperatorL2Normalization(const OperatorParams &params);
+  uint32_t addOperatorLeakyRelu(const OperatorParams &params, float alpha);
+  uint32_t addOperatorLess(const OperatorParams &params);
+  uint32_t addOperatorNeg(const OperatorParams &params);
+  uint32_t addOperatorPad(const OperatorParams &params);
+  uint32_t addOperatorPadV2(const OperatorParams &params);
+  uint32_t addOperatorRank(const OperatorParams &params);
+  uint32_t addOperatorResizeNearestNeighbor(const OperatorParams &params);
+  uint32_t addOperatorWhile(const OperatorParams &params, uint32_t cond_subg, uint32_t body_subg);
  
    // NOTE Please add addOperator functions ABOVE this lie
-  //
-  // %  How to add a new addOperatorXXX fuction
-  // 0. Copy code from one of the existing addOperatorXXX function
-  // 1. Change the function signature (need BuiltinOperator params)
-  // 2. Change enum BuiltinOperator
-  // 3. Change enum BuiltinOptions
-  // 4. Change CreateXXXOptions accordingly
-
    // ===== Add Operator methods end =====
  
  private:
    uint32_t addOperatorWithOptions(const OperatorParams &params, circle::BuiltinOperator opcode,
                                    circle::BuiltinOptions options_type,
-                                  flatbuffers::Offset<void> options)
-  {
-    uint32_t opcode_ind = addOperatorCode(opcode);
-    auto op = circle::CreateOperatorDirect(_fbb, opcode_ind, &params.inputs, &params.outputs,
-                                           options_type, options);
+                                  flatbuffers::Offset<void> options);
+  uint32_t addOperatorCode(circle::BuiltinOperator opcode);
+  flatbuffers::Offset<circle::Buffer> buildBuffer(const uint8_t *buf, size_t size);
+  flatbuffers::Offset<circle::Tensor> buildTensor(const TensorParams &params);
+  flatbuffers::Offset<circle::SubGraph> buildSubGraph(const SubgraphContext &ctx);
  
-    uint32_t ind = _operators.size();
-    _operators.emplace_back(op);
-    return ind;
-  }
-
-  uint32_t addOperatorCode(circle::BuiltinOperator opcode)
-  {
-    // TODO If the same OperatorCode is registered already, just return it
-    uint32_t ind = _opcodes.size();
-    _opcodes.emplace_back(circle::CreateOperatorCode(_fbb, opcode));
-    return ind;
-  }
-
-  flatbuffers::Offset<circle::Buffer> buildBuffer(const uint8_t *buf, size_t size)
-  {
-    if (buf == nullptr && size == 0)
-      return circle::CreateBuffer(_fbb);
-    auto buffer = _fbb.CreateVector(buf, size);
-    return circle::CreateBuffer(_fbb, buffer);
-  }
-
-  flatbuffers::Offset<circle::Tensor> buildTensor(const TensorParams &params)
-  {
-    auto shape = _fbb.CreateVector(params.shape);
-    auto name = _fbb.CreateString(params.name);
-    return circle::CreateTensor(_fbb, shape, params.tensor_type, params.buffer, name,
-                                0 /* QuantParam */, false /* is_variable */, 0 /* sparsity */,
-                                0 /* shape_signature */);
-  }
-
-  flatbuffers::Offset<circle::SubGraph> buildSubGraph()
-  {
-    return circle::CreateSubGraphDirect(_fbb, &_tensors, &_inputs, &_outputs, &_operators, nullptr);
-  }
+  SubgraphContext &curSubgCtx() { return _subgraph_contexts.back(); }
  
  private:
    flatbuffers::FlatBufferBuilder _fbb{1024};
    std::vector<flatbuffers::Offset<circle::Buffer>> _buffers;
    std::vector<flatbuffers::Offset<circle::OperatorCode>> _opcodes;
-
-  // per-subgraph
-  std::vector<int> _inputs;
-  std::vector<int> _outputs;
-  std::vector<flatbuffers::Offset<circle::Tensor>> _tensors;
-  std::vector<flatbuffers::Offset<circle::Operator>> _operators;
+  std::vector<SubgraphContext> _subgraph_contexts;
  };
  
  #endif // __NNFW_API_TEST_CIRCLE_GEN_H__
diff --git a/tests/nnfw_api/src/GenModelTest.h b/tests/nnfw_api/src/GenModelTest.h

new file mode 100644 (file)

index 0000000..530ccdd
--- /dev/null
+++ b/tests/nnfw_api/src/GenModelTest.h
@@ -0,0 +1,220 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <nnfw_internal.h>
+
+#include <fstream>
+#include <string>
+
+#include "CircleGen.h"
+#include "fixtures.h"
+
+struct TestCaseData
+{
+  /**
+   * @brief A vector of input buffers
+   *
+   * @todo support other types as well as float
+   */
+  std::vector<std::vector<float>> inputs;
+  /**
+   * @brief A vector of output buffers
+   *
+   * @todo support other types as well as float
+   */
+  std::vector<std::vector<float>> outputs;
+};
+
+class GenModelTestContext
+{
+public:
+  GenModelTestContext(CircleBuffer &&cbuf) : _cbuf{std::move(cbuf)}, _backends{"cpu"} {}
+
+  /**
+   * @brief  Return circle buffer
+   *
+   * @return CircleBuffer& the circle buffer
+   */
+  const CircleBuffer &cbuf() const { return _cbuf; }
+
+  /**
+   * @brief Return test cases
+   *
+   * @return std::vector<TestCaseData>& the test cases
+   */
+  const std::vector<TestCaseData> &test_cases() const { return _test_cases; }
+
+  /**
+   * @brief Return backends
+   *
+   * @return const std::vector<std::string>& the backends to be tested
+   */
+  const std::vector<std::string> &backends() const { return _backends; }
+
+  /**
+   * @brief Return test is defined to fail on compile
+   *
+   * @return bool test is defined to fail on compile
+   */
+  const bool fail_compile() const { return _fail_compile; }
+
+  /**
+   * @brief Add a test case
+   *
+   * @param tc the test case to be added
+   */
+  void addTestCase(const TestCaseData &tc) { _test_cases.emplace_back(tc); }
+
+  /**
+   * @brief Add a test case
+   *
+   * @param tc the test case to be added
+   */
+  void setBackends(const std::vector<std::string> &backends)
+  {
+    _backends.clear();
+
+    for (auto backend : backends)
+    {
+#ifdef TEST_ACL_BACKEND
+      if (backend == "acl_cl" || backend == "acl_neon")
+      {
+        _backends.push_back(backend);
+      }
+#endif
+      if (backend == "cpu")
+      {
+        _backends.push_back(backend);
+      }
+    }
+  }
+
+  /**
+   * @brief Set the Test Fail
+   */
+  void setCompileFail() { _fail_compile = true; }
+
+private:
+  CircleBuffer _cbuf;
+  std::vector<TestCaseData> _test_cases;
+  std::vector<std::string> _backends;
+  bool _fail_compile{false};
+};
+
+/**
+ * @brief Generated Model test fixture for a one time inference
+ *
+ * This fixture is for one-time inference test with variety of generated models.
+ * It is the test maker's responsiblity to create @c _context which contains
+ * test body, which are generated circle buffer, model input data and output data and
+ * backend list to be tested.
+ * The rest(calling API functions for execution) is done by @c Setup and @c TearDown .
+ *
+ */
+class GenModelTest : public ::testing::Test
+{
+protected:
+  void SetUp() override
+  { // DO NOTHING
+  }
+
+  void TearDown() override
+  {
+    for (std::string backend : _context->backends())
+    {
+      // NOTE If we can prepare many times for one model loading on same session,
+      //      we can move nnfw_create_session to SetUp and
+      //      nnfw_load_circle_from_buffer to outside forloop
+      NNFW_ENSURE_SUCCESS(nnfw_create_session(&_so.session));
+      auto &cbuf = _context->cbuf();
+      NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(_so.session, cbuf.buffer(), cbuf.size()));
+      NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_so.session, backend.data()));
+
+      if (_context->fail_compile())
+      {
+        ASSERT_EQ(nnfw_prepare(_so.session), NNFW_STATUS_ERROR);
+
+        NNFW_ENSURE_SUCCESS(nnfw_close_session(_so.session));
+        continue;
+      }
+      NNFW_ENSURE_SUCCESS(nnfw_prepare(_so.session));
+
+      // In/Out buffer settings
+      uint32_t num_inputs;
+      NNFW_ENSURE_SUCCESS(nnfw_input_size(_so.session, &num_inputs));
+      _so.inputs.resize(num_inputs);
+      for (uint32_t ind = 0; ind < _so.inputs.size(); ind++)
+      {
+        nnfw_tensorinfo ti;
+        NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(_so.session, ind, &ti));
+        uint64_t input_elements = num_elems(&ti);
+        _so.inputs[ind].resize(input_elements);
+
+        ASSERT_EQ(nnfw_set_input(_so.session, ind, ti.dtype, _so.inputs[ind].data(),
+                                 sizeof(float) * input_elements),
+                  NNFW_STATUS_NO_ERROR);
+      }
+
+      uint32_t num_outputs;
+      NNFW_ENSURE_SUCCESS(nnfw_output_size(_so.session, &num_outputs));
+      _so.outputs.resize(num_outputs);
+      for (uint32_t ind = 0; ind < _so.outputs.size(); ind++)
+      {
+        nnfw_tensorinfo ti;
+        NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_so.session, ind, &ti));
+        uint64_t output_elements = num_elems(&ti);
+        _so.outputs[ind].resize(output_elements);
+        ASSERT_EQ(nnfw_set_output(_so.session, ind, ti.dtype, _so.outputs[ind].data(),
+                                  sizeof(float) * output_elements),
+                  NNFW_STATUS_NO_ERROR);
+      }
+
+      // Set input values, run, and check output values
+      for (auto &test_case : _context->test_cases())
+      {
+        auto &ref_inputs = test_case.inputs;
+        auto &ref_outputs = test_case.outputs;
+        ASSERT_EQ(_so.inputs.size(), ref_inputs.size());
+        for (uint32_t i = 0; i < _so.inputs.size(); i++)
+        {
+          // Fill the values
+          ASSERT_EQ(_so.inputs[i].size(), ref_inputs[i].size());
+          memcpy(_so.inputs[i].data(), ref_inputs[i].data(), _so.inputs[i].size() * sizeof(float));
+        }
+
+        NNFW_ENSURE_SUCCESS(nnfw_run(_so.session));
+
+        ASSERT_EQ(_so.outputs.size(), ref_outputs.size());
+        for (uint32_t i = 0; i < _so.outputs.size(); i++)
+        {
+          // Check output tensor values
+          auto &ref_output = ref_outputs[i];
+          auto &output = _so.outputs[i];
+          ASSERT_EQ(output.size(), ref_output.size());
+          for (uint32_t e = 0; e < ref_output.size(); e++)
+            EXPECT_NEAR(ref_output[e], output[e], 0.001); // TODO better way for handling FP error?
+        }
+      }
+
+      NNFW_ENSURE_SUCCESS(nnfw_close_session(_so.session));
+    }
+  }
+
+protected:
+  SessionObject _so;
+  std::unique_ptr<GenModelTestContext> _context;
+};
diff --git a/tests/nnfw_api/src/GenModelTests.cc b/tests/nnfw_api/src/GenModelTests.cc

deleted file mode 100644 (file)

index 2bd839a..0000000
--- a/tests/nnfw_api/src/GenModelTests.cc
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-#include <nnfw_internal.h>
-
-#include <fstream>
-
-#include "CircleGen.h"
-#include "fixtures.h"
-
-/**
- * @brief Generated Model test fixture for a one time inference
- *
- * This fixture is for one-time inference test with variety of generated models.
- * It is the user's responsiblity to create @c _cbuf , @c _ref_inputs and @c _ref_outputs in the
- * test body, which are generated circle buffer, model input data and output data respectively.
- * The rest(calling API functions for execution) is done by @c Setup and @c TearDown .
- *
- */
-class GenModelTest : public ::testing::Test
-{
-protected:
-  void SetUp() override { NNFW_ENSURE_SUCCESS(nnfw_create_session(&_so.session)); }
-
-  void TearDown() override
-  {
-    NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(_so.session, _cbuf.buffer(), _cbuf.size()));
-    NNFW_ENSURE_SUCCESS(nnfw_prepare(_so.session));
-
-    // In/Out buffer settings
-    {
-      uint32_t num_inputs;
-      NNFW_ENSURE_SUCCESS(nnfw_input_size(_so.session, &num_inputs));
-      _so.inputs.resize(num_inputs);
-      for (uint32_t ind = 0; ind < _so.inputs.size(); ind++)
-      {
-        nnfw_tensorinfo ti;
-        NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(_so.session, ind, &ti));
-        uint64_t input_elements = num_elems(&ti);
-        _so.inputs[ind].resize(input_elements);
-
-        ASSERT_EQ(nnfw_set_input(_so.session, ind, ti.dtype, _so.inputs[ind].data(),
-                                 sizeof(float) * input_elements),
-                  NNFW_STATUS_NO_ERROR);
-      }
-
-      uint32_t num_outputs;
-      NNFW_ENSURE_SUCCESS(nnfw_output_size(_so.session, &num_outputs));
-      _so.outputs.resize(num_outputs);
-      for (uint32_t ind = 0; ind < _so.outputs.size(); ind++)
-      {
-        nnfw_tensorinfo ti;
-        NNFW_ENSURE_SUCCESS(nnfw_output_tensorinfo(_so.session, ind, &ti));
-        uint64_t output_elements = num_elems(&ti);
-        _so.outputs[ind].resize(output_elements);
-        ASSERT_EQ(nnfw_set_output(_so.session, ind, ti.dtype, _so.outputs[ind].data(),
-                                  sizeof(float) * output_elements),
-                  NNFW_STATUS_NO_ERROR);
-      }
-    }
-
-    // Set input values, run, and check output values
-    {
-      ASSERT_EQ(_so.inputs.size(), _ref_inputs.size());
-      for (uint32_t i = 0; i < _so.inputs.size(); i++)
-      {
-        // Fill the values
-        ASSERT_EQ(_so.inputs[i].size(), _ref_inputs[i].size());
-        memcpy(_so.inputs[i].data(), _ref_inputs[i].data(), _so.inputs[i].size() * sizeof(float));
-      }
-
-      NNFW_ENSURE_SUCCESS(nnfw_run(_so.session));
-
-      ASSERT_EQ(_so.outputs.size(), _ref_outputs.size());
-      for (uint32_t i = 0; i < _so.outputs.size(); i++)
-      {
-        // Check output tensor values
-        auto &ref_output = _ref_outputs[i];
-        auto &output = _so.outputs[i];
-        ASSERT_EQ(output.size(), ref_output.size());
-        for (uint32_t e = 0; e < ref_output.size(); e++)
-          ASSERT_FLOAT_EQ(ref_output[e], output[e]);
-      }
-    }
-
-    NNFW_ENSURE_SUCCESS(nnfw_close_session(_so.session));
-  }
-
-protected:
-  SessionObject _so;
-  CircleBuffer _cbuf;
-  std::vector<std::vector<float>> _ref_inputs;
-  std::vector<std::vector<float>> _ref_outputs;
-};
-
-TEST_F(GenModelTest, OneOp_Add_VarToConst)
-{
-  CircleGen cgen;
-  std::vector<float> rhs_data{5, 4, 7, 4};
-  uint32_t rhs_buf = cgen.addBuffer(rhs_data);
-  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32, rhs_buf});
-  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({lhs}, {out});
-  _cbuf = cgen.finish();
-
-  _ref_inputs = {{1, 3, 2, 4}};
-  _ref_outputs = {{6, 7, 9, 8}};
-}
-
-TEST_F(GenModelTest, OneOp_Add_VarToVar)
-{
-  CircleGen cgen;
-  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({lhs, rhs}, {out});
-  _cbuf = cgen.finish();
-
-  _ref_inputs = {{1, 3, 2, 4}, {5, 4, 7, 4}};
-  _ref_outputs = {{6, 7, 9, 8}};
-}
-
-TEST_F(GenModelTest, OneOp_AvgPool2D)
-{
-  CircleGen cgen;
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2,
-                                circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in}, {out});
-  _cbuf = cgen.finish();
-
-  _ref_inputs = {{1, 3, 2, 4}};
-  _ref_outputs = {{2.5}};
-}
diff --git a/tests/nnfw_api/src/ModelTestDynamicTensor.cc b/tests/nnfw_api/src/ModelTestDynamicTensor.cc

index c1f4369..e2d70d2 100644 (file)
--- a/tests/nnfw_api/src/ModelTestDynamicTensor.cc
+++ b/tests/nnfw_api/src/ModelTestDynamicTensor.cc
@@ -20,6 +20,7 @@
  #include "common.h"
  #include "fixtures.h"
  #include "NNPackages.h"
+#include "CircleGen.h"
  
  void set_input_output(nnfw_session *session, const std::vector<float> &input,
                        std::vector<float> &actual_output)
@@ -58,6 +59,7 @@ void set_input_output(nnfw_session *session, const std::vector<float> &input0,
   *
   * @note Run this test with "cpu" backend
   */
+// TODO Rewrite this with CircleGen
  class TestDynamicTensorReshapeModelLoaded
      : public ValidationTestModelLoaded<NNPackages::DYNAMIC_TENSOR_RESHAPE>
  {
@@ -209,30 +211,6 @@ TEST_F(TestDynamicTensorReshapeModelLoaded, neg_reshape_multiple_executions)
  //    Trying to set unknown dim to other value before calling nnfw_prepare()
  //
  
-class TestInputUnknownDimInputConcatModelLoaded
-    : public ValidationTestModelLoaded<NNPackages::UNKNOWN_DIM_INPUT_CONCAT>
-{
-protected:
-  void prepare_apply_set_input_output(const std::vector<float> &input0,
-                                      const std::vector<float> &input1,
-                                      std::vector<float> *actual_output, nnfw_tensorinfo input0_ti)
-  {
-    NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
-    NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &input0_ti));
-
-    ASSERT_EQ(nnfw_set_input(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, input0.data(),
-                             sizeof(float) * input0.size()),
-              NNFW_STATUS_NO_ERROR);
-    ASSERT_EQ(nnfw_set_input(_session, 1, NNFW_TYPE_TENSOR_FLOAT32, input1.data(),
-                             sizeof(float) * input1.size()),
-              NNFW_STATUS_NO_ERROR);
-
-    ASSERT_EQ(nnfw_set_output(_session, 0, NNFW_TYPE_TENSOR_FLOAT32, actual_output->data(),
-                              sizeof(float) * actual_output->size()),
-              NNFW_STATUS_NO_ERROR);
-  }
-};
-
  /**
   * @brief Testing the following model:
   *
@@ -248,9 +226,28 @@ protected:
   *
   * @note Run this test with "cpu" backend
   */
-TEST_F(TestInputUnknownDimInputConcatModelLoaded, concat_input0_to_2x3)
+auto build_model_buf_Concatenation_unknwon_dims()
  {
-  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
+  // Model is not important
+  CircleGen cgen;
+  auto f32 = circle::TensorType::TensorType_FLOAT32;
+  int in1 = cgen.addTensor({{1, 1}, f32}); // consider this [None, None]
+  int in2 = cgen.addTensor({{2, 3}, f32});
+  int out = cgen.addTensor({{}, f32}); // scalar, meaning output shape is unspecified
+  cgen.addOperatorConcatenation({{in1, in2}, {out}}, 0, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in1, in2}, {out});
+  auto cbuf = cgen.finish();
+  return cbuf;
+}
+
+TEST(TestDynamicTensor, concat_unknown_dim_input0_to_2x3)
+{
+  nnfw_session *session = nullptr;
+  NNFW_ENSURE_SUCCESS(nnfw_create_session(&session));
+  const auto model_buf = build_model_buf_Concatenation_unknwon_dims();
+  NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(session, model_buf.buffer(), model_buf.size()));
+
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session, "cpu"));
  
    const std::vector<float> input0 = {1, 2, 3};          // of shape [1, 3]
    const std::vector<float> input1 = {4, 5, 6, 7, 8, 9}; // of shape [2, 3]
@@ -260,13 +257,13 @@ TEST_F(TestInputUnknownDimInputConcatModelLoaded, concat_input0_to_2x3)
  
    // input reshaping to [1, 3]
    nnfw_tensorinfo ti = {NNFW_TYPE_TENSOR_FLOAT32, 2, {1, 3}};
-  NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &ti));
-  NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
+  NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(session, 0, &ti));
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(session));
  
-  set_input_output(_session, input0, input1, actual_output);
+  set_input_output(session, input0, input1, actual_output);
  
    // Do inference
-  NNFW_STATUS res = nnfw_run(_session);
+  NNFW_STATUS res = nnfw_run(session);
    NNFW_ENSURE_SUCCESS(res);
  
    // output value check
@@ -289,9 +286,14 @@ TEST_F(TestInputUnknownDimInputConcatModelLoaded, concat_input0_to_2x3)
   *
   * @note Run this test with "cpu" backend and "linear" executor
   */
-TEST_F(TestInputUnknownDimInputConcatModelLoaded, neg_concat_input0_to_wrong_shape)
+TEST(TestDynamicTensor, neg_concat_input0_to_wrong_shape)
  {
-  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
+  nnfw_session *session = nullptr;
+  NNFW_ENSURE_SUCCESS(nnfw_create_session(&session));
+  const auto model_buf = build_model_buf_Concatenation_unknwon_dims();
+  NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(session, model_buf.buffer(), model_buf.size()));
+
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session, "cpu"));
  
    const std::vector<float> input0 = {1, 2, 3};          // of shape [3, 1], wrong shape
    const std::vector<float> input1 = {4, 5, 6, 7, 8, 9}; // of shape [2, 3]
@@ -300,9 +302,9 @@ TEST_F(TestInputUnknownDimInputConcatModelLoaded, neg_concat_input0_to_wrong_sha
  
    // input reshaping to [3, 1]
    nnfw_tensorinfo ti = {NNFW_TYPE_TENSOR_FLOAT32, 2, {3, 1}};
-  NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &ti));
+  NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(session, 0, &ti));
  
-  ASSERT_EQ(nnfw_prepare(_session), NNFW_STATUS_ERROR);
+  ASSERT_EQ(nnfw_prepare(session), NNFW_STATUS_ERROR);
  }
  
  //
@@ -325,12 +327,30 @@ TEST_F(TestInputUnknownDimInputConcatModelLoaded, neg_concat_input0_to_wrong_sha
   *
   * @note Run this test with "cpu" backend
   */
-using TestDynamicTensorApplyTensorInfoBinaryOp =
-    ValidationTestModelLoaded<NNPackages::ADD_UNSPECIFIED_RANK_INPUTS>;
+auto build_model_buf_Add_unspecified_rank()
+{
+  // Model is not important
+  CircleGen cgen;
+  auto f32 = circle::TensorType::TensorType_FLOAT32;
+  int in1 = cgen.addTensor({{}, f32}); // scalar, meaning shape is unspecified
+  int in2 = cgen.addTensor({{1, 2, 3}, f32});
+  int op_out = cgen.addTensor({{}, f32}); // unspecified
+  int out = cgen.addTensor({{}, f32});    // unspecified
+  cgen.addOperatorAdd({{in1, in2}, {op_out}}, circle::ActivationFunctionType_NONE);
+  cgen.addOperatorAdd({{op_out, op_out}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in1, in2}, {out});
+  auto cbuf = cgen.finish();
+  return cbuf;
+}
  
-TEST_F(TestDynamicTensorApplyTensorInfoBinaryOp, set_input_tensorinfo_after_compilation_add)
+TEST(TestDynamicTensor, set_input_tensorinfo_after_compilation_add)
  {
-  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
+  nnfw_session *session = nullptr;
+  NNFW_ENSURE_SUCCESS(nnfw_create_session(&session));
+  const auto model_buf = build_model_buf_Add_unspecified_rank();
+  NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(session, model_buf.buffer(), model_buf.size()));
+
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session, "cpu"));
  
    // input reshaping to [2, 2, 3]
    nnfw_tensorinfo input0_ti = {NNFW_TYPE_TENSOR_FLOAT32, 3, {2, 2, 3}};
@@ -341,14 +361,14 @@ TEST_F(TestDynamicTensorApplyTensorInfoBinaryOp, set_input_tensorinfo_after_comp
    std::vector<float> expected_output = {1.1 * 2, 2.1 * 2, 3.1 * 2, 4.1 * 2,  5.1 * 2,  6.1 * 2,
                                          7.1 * 2, 8.1 * 2, 9.1 * 2, 10.1 * 2, 11.1 * 2, 12.1 * 2};
  
-  NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(session));
  
-  NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &input0_ti));
+  NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(session, 0, &input0_ti));
  
-  set_input_output(_session, input0, input1, actual_output);
+  set_input_output(session, input0, input1, actual_output);
  
    // Do inference
-  NNFW_STATUS res = nnfw_run(_session);
+  NNFW_STATUS res = nnfw_run(session);
    NNFW_ENSURE_SUCCESS(res);
  
    // output value check
@@ -370,11 +390,27 @@ TEST_F(TestDynamicTensorApplyTensorInfoBinaryOp, set_input_tensorinfo_after_comp
   *
   * @note Run this test with "cpu" backend
   */
-using TestDynamicTensorApplyTensorInfoUnaryOp = ValidationTestModelLoaded<NNPackages::NEG>;
  
-TEST_F(TestDynamicTensorApplyTensorInfoUnaryOp, set_input_tensorinfo_after_compilation_neg)
+auto build_model_buf_NEG()
  {
-  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
+  // Model is not important
+  CircleGen cgen;
+  int in = cgen.addTensor({{4, 4}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{4, 4}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorNeg({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+  auto cbuf = cgen.finish();
+  return cbuf;
+}
+
+TEST(TestDynamicTensor, set_input_tensorinfo_after_compilation_neg)
+{
+  nnfw_session *session = nullptr;
+  NNFW_ENSURE_SUCCESS(nnfw_create_session(&session));
+  const auto model_buf = build_model_buf_NEG();
+  nnfw_load_circle_from_buffer(session, model_buf.buffer(), model_buf.size());
+
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session, "cpu"));
  
    nnfw_tensorinfo input0_ti_original = {NNFW_TYPE_TENSOR_FLOAT32, 2, {4, 4}};
  
@@ -397,28 +433,28 @@ TEST_F(TestDynamicTensorApplyTensorInfoUnaryOp, set_input_tensorinfo_after_compi
      expected_output[i] = -1 * input0[i];
    }
  
-  NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(session));
  
    // input shape check
    {
      nnfw_tensorinfo ti = {};
-    NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(_session, 0, &ti));
+    NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(session, 0, &ti));
      ASSERT_TRUE(tensorInfoEqual(input0_ti_original, ti));
    }
  
-  NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(_session, 0, &input0_ti));
+  NNFW_ENSURE_SUCCESS(nnfw_set_input_tensorinfo(session, 0, &input0_ti));
  
    // input shape check
    {
      nnfw_tensorinfo ti = {};
-    NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(_session, 0, &ti));
+    NNFW_ENSURE_SUCCESS(nnfw_input_tensorinfo(session, 0, &ti));
      ASSERT_TRUE(tensorInfoEqual(input0_ti, ti));
    }
  
-  set_input_output(_session, input0, actual_output);
+  set_input_output(session, input0, actual_output);
  
    // Do inference
-  NNFW_STATUS res = nnfw_run(_session);
+  NNFW_STATUS res = nnfw_run(session);
    NNFW_ENSURE_SUCCESS(res);
  
    // output value check
@@ -469,8 +505,7 @@ TEST_F(TestWhileDynamicModelLoaded, neg_run_verify)
  
    set_input_output(_session, while_dynamic_input0, actual_output0);
  
-  // TODO Change error code NNFW_STATUS_ERROR -> NNFW_INSUFFICIENT_OUTPUT_SIZE
-  ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_ERROR);
+  ASSERT_EQ(nnfw_run(_session), NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE);
  }
  
  using TestIfDynamicModelLoaded = ValidationTestModelLoaded<NNPackages::IF_DYNAMIC>;
diff --git a/tests/nnfw_api/src/NNPackages.cc b/tests/nnfw_api/src/NNPackages.cc

index ebae2ea..534973c 100644 (file)
--- a/tests/nnfw_api/src/NNPackages.cc
+++ b/tests/nnfw_api/src/NNPackages.cc
@@ -29,8 +29,7 @@ const char *TEST_PACKAGE_NAMES[] = {
      "add", "add_no_manifest", "add_invalid_manifest",
  
      // for dynamic tensor test
-    "input_reshaping_add", "dynamic_tensor_reshape", "unknown_dim_input_concat",
-    "add_unspecified_rank_inputs", "neg", "while_dynamic", "if_dynamic",
+    "input_reshaping_add", "dynamic_tensor_reshape", "while_dynamic", "if_dynamic",
  };
  
  NNPackages &NNPackages::get()
diff --git a/tests/nnfw_api/src/NNPackages.h b/tests/nnfw_api/src/NNPackages.h

index 7f41d6b..735fa96 100644 (file)
--- a/tests/nnfw_api/src/NNPackages.h
+++ b/tests/nnfw_api/src/NNPackages.h
@@ -23,7 +23,7 @@
   * @brief A helper class to find NN Packages for testing
   *        To add a nnpackage for your test, please do the followings:
   *          0. Prerequisite: the actual file must be uploaded on the server
- *                           Add `config.sh` file to `tests/scripts/nnfw_api_gtest_models`
+ *                           Add `config.sh` file to `tests/scripts/models/nnfw_api_gtest`
   *          1. Append an enum value to @c NNPackages::TestPackages
   *          2. Append a string literal to @c TEST_PACKAGE_NAMES in the source file
   */
@@ -45,9 +45,6 @@ public:
      // for dynamic tensor test
      INPUT_RESHAPING_ADD,
      DYNAMIC_TENSOR_RESHAPE,
-    UNKNOWN_DIM_INPUT_CONCAT,
-    ADD_UNSPECIFIED_RANK_INPUTS,
-    NEG,
      WHILE_DYNAMIC,
      IF_DYNAMIC,
  
diff --git a/tests/nnfw_api/src/RegressionTests.cc b/tests/nnfw_api/src/RegressionTests.cc

index e4dfa91..05914b8 100644 (file)
--- a/tests/nnfw_api/src/RegressionTests.cc
+++ b/tests/nnfw_api/src/RegressionTests.cc
@@ -17,6 +17,10 @@
  #include "fixtures.h"
  #include "NNPackages.h"
  
+#include <nnfw_internal.h>
+
+#include "CircleGen.h"
+
  TEST_F(RegressionTest, github_1535)
  {
    auto package_path = NNPackages::get().getModelAbsolutePath(NNPackages::ADD);
@@ -35,4 +39,26 @@ TEST_F(RegressionTest, github_1535)
  
    NNFW_ENSURE_SUCCESS(nnfw_close_session(session1));
    NNFW_ENSURE_SUCCESS(nnfw_close_session(session2));
+
+  SUCCEED();
+}
+
+TEST_F(RegressionTest, neg_github_3826)
+{
+  // Model is not important
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2,
+                                circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+  auto cbuf = cgen.finish();
+
+  nnfw_session *session = nullptr;
+  NNFW_ENSURE_SUCCESS(nnfw_create_session(&session));
+  NNFW_ENSURE_SUCCESS(nnfw_load_circle_from_buffer(session, cbuf.buffer(), cbuf.size()));
+  // To test when there is no backends loaded for the session
+  NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(session, "unavailable_backend"));
+  ASSERT_EQ(nnfw_prepare(session), NNFW_STATUS_ERROR);
+  NNFW_ENSURE_SUCCESS(nnfw_close_session(session));
  }
diff --git a/tests/nnfw_api/src/ValidationTestAddModelLoaded.cc b/tests/nnfw_api/src/ValidationTestAddModelLoaded.cc

index 11c6034..1d3d4fc 100644 (file)
--- a/tests/nnfw_api/src/ValidationTestAddModelLoaded.cc
+++ b/tests/nnfw_api/src/ValidationTestAddModelLoaded.cc
@@ -19,11 +19,18 @@
  
  using ValidationTestAddModelLoaded = ValidationTestModelLoaded<NNPackages::ADD>;
  
-TEST_F(ValidationTestAddModelLoaded, prepare_001) { NNFW_ENSURE_SUCCESS(nnfw_prepare(_session)); }
+TEST_F(ValidationTestAddModelLoaded, prepare_001)
+{
+  NNFW_ENSURE_SUCCESS(nnfw_prepare(_session));
+
+  SUCCEED();
+}
  
  TEST_F(ValidationTestAddModelLoaded, set_available_backends_001)
  {
    NNFW_ENSURE_SUCCESS(nnfw_set_available_backends(_session, "cpu"));
+
+  SUCCEED();
  }
  
  TEST_F(ValidationTestAddModelLoaded, get_input_size)
@@ -48,6 +55,17 @@ TEST_F(ValidationTestAddModelLoaded, output_tensorinfo)
    ASSERT_EQ(tensor_info.dims[0], 1);
  }
  
+TEST_F(ValidationTestAddModelLoaded, input_output_tensorindex)
+{
+  uint32_t in_ind = 100;
+  NNFW_ENSURE_SUCCESS(nnfw_input_tensorindex(_session, "X_input", &in_ind));
+  ASSERT_EQ(in_ind, 0);
+
+  uint32_t out_ind = 100;
+  NNFW_ENSURE_SUCCESS(nnfw_output_tensorindex(_session, "ADD_TOP", &out_ind));
+  ASSERT_EQ(out_ind, 0);
+}
+
  TEST_F(ValidationTestAddModelLoaded, neg_run)
  {
    // nnfw_prepare is not called
@@ -91,3 +109,16 @@ TEST_F(ValidationTestAddModelLoaded, neg_output_tensorinfo)
    // tensor_info is null
    ASSERT_EQ(nnfw_output_tensorinfo(_session, 0, nullptr), NNFW_STATUS_UNEXPECTED_NULL);
  }
+
+TEST_F(ValidationTestAddModelLoaded, neg_input_output_tensorindex)
+{
+  uint32_t in_ind = 100;
+  ASSERT_EQ(nnfw_input_tensorindex(_session, "ADD_TOP", &in_ind), NNFW_STATUS_ERROR);
+  ASSERT_EQ(in_ind, 100);
+  ASSERT_EQ(nnfw_input_tensorindex(_session, "y_var", &in_ind), NNFW_STATUS_ERROR);
+  ASSERT_EQ(in_ind, 100);
+
+  uint32_t out_ind = 100;
+  ASSERT_EQ(nnfw_output_tensorindex(_session, "X_input", &out_ind), NNFW_STATUS_ERROR);
+  ASSERT_EQ(out_ind, 100);
+}
diff --git a/tests/nnfw_api/src/ValidationTestFourAddModelsSetInput.cc b/tests/nnfw_api/src/ValidationTestFourAddModelsSetInput.cc

index 4e2a905..e09402b 100644 (file)
--- a/tests/nnfw_api/src/ValidationTestFourAddModelsSetInput.cc
+++ b/tests/nnfw_api/src/ValidationTestFourAddModelsSetInput.cc
@@ -23,6 +23,8 @@ TEST_F(ValidationTestFourAddModelsSetInput, run_001)
  {
    NNFW_ENSURE_SUCCESS(nnfw_run(_objects[0].session));
    NNFW_ENSURE_SUCCESS(nnfw_run(_objects[1].session));
+
+  SUCCEED();
  }
  
  TEST_F(ValidationTestFourAddModelsSetInput, run_002)
@@ -33,6 +35,8 @@ TEST_F(ValidationTestFourAddModelsSetInput, run_002)
      for (auto obj : _objects)
        NNFW_ENSURE_SUCCESS(nnfw_run(obj.session));
    }
+
+  SUCCEED();
  }
  
  TEST_F(ValidationTestFourAddModelsSetInput, run_async)
@@ -41,4 +45,6 @@ TEST_F(ValidationTestFourAddModelsSetInput, run_async)
      NNFW_ENSURE_SUCCESS(nnfw_run_async(obj.session));
    for (auto obj : _objects)
      NNFW_ENSURE_SUCCESS(nnfw_await(obj.session));
+
+  SUCCEED();
  }
diff --git a/tests/nnfw_api/src/ValidationTestSessionCreated.cc b/tests/nnfw_api/src/ValidationTestSessionCreated.cc

index dafcd36..4ef14f7 100644 (file)
--- a/tests/nnfw_api/src/ValidationTestSessionCreated.cc
+++ b/tests/nnfw_api/src/ValidationTestSessionCreated.cc
@@ -29,6 +29,8 @@ TEST_F(ValidationTestSessionCreated, close_and_create_again)
  {
    NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
    NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
+
+  SUCCEED();
  }
  
  TEST_F(ValidationTestSessionCreated, neg_load_session_1)
diff --git a/tests/nnfw_api/src/ValidationTestSingleSession.cc b/tests/nnfw_api/src/ValidationTestSingleSession.cc

index 5e6027f..c746492 100644 (file)
--- a/tests/nnfw_api/src/ValidationTestSingleSession.cc
+++ b/tests/nnfw_api/src/ValidationTestSingleSession.cc
@@ -21,12 +21,16 @@ TEST_F(ValidationTestSingleSession, create_001)
  {
    NNFW_ENSURE_SUCCESS(nnfw_create_session(&_session));
    NNFW_ENSURE_SUCCESS(nnfw_close_session(_session));
+
+  SUCCEED();
  }
  
  TEST_F(ValidationTestSingleSession, query_info_u32)
  {
    uint32_t val = 0;
    NNFW_ENSURE_SUCCESS(nnfw_query_info_u32(nullptr, NNFW_INFO_ID_VERSION, &val));
+
+  SUCCEED();
  }
  
  TEST_F(ValidationTestSingleSession, neg_create_001)
diff --git a/tests/nnfw_api/src/fixtures.h b/tests/nnfw_api/src/fixtures.h

index 8fe5c16..f273d65 100644 (file)
--- a/tests/nnfw_api/src/fixtures.h
+++ b/tests/nnfw_api/src/fixtures.h
@@ -19,7 +19,7 @@
  
  #include <array>
  #include <gtest/gtest.h>
-#include <nnfw.h>
+#include <nnfw_experimental.h>
  
  #include "NNPackages.h"
  
diff --git a/tests/nnfw_api/src/one_op_tests/Add.cc b/tests/nnfw_api/src/one_op_tests/Add.cc

new file mode 100644 (file)

index 0000000..281d5de
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Add.cc
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_Add_VarToConst)
+{
+  CircleGen cgen;
+  std::vector<float> rhs_data{5, 4, 7, 4};
+  uint32_t rhs_buf = cgen.addBuffer(rhs_data);
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32, rhs_buf});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase({{{1, 3, 2, 4}}, {{6, 7, 9, 8}}});
+  _context->addTestCase({{{0, 1, 2, 3}}, {{5, 5, 9, 7}}});
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Add_VarToVar)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase({{{1, 3, 2, 4}, {5, 4, 7, 4}}, {{6, 7, 9, 8}}});
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_InvalidShape)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->setCompileFail();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_InvalidShapeConst)
+{
+  CircleGen cgen;
+  std::vector<float> rhs_data{5, 4, 0, 7, 4, 0};
+  uint32_t rhs_buf = cgen.addBuffer(rhs_data);
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32, rhs_buf});
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->setCompileFail();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_OneOperand)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAdd({{in}, {out}}, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->setCompileFail();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc b/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc

new file mode 100644 (file)

index 0000000..854517e
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_AvgPool2D)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2,
+                                circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase({{{1, 3, 2, 4}}, {{2.5}}});
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_AvgPool2D)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2,
+                                circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->setCompileFail();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Cos.cc b/tests/nnfw_api/src/one_op_tests/Cos.cc

new file mode 100644 (file)

index 0000000..72bfe3e
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Cos.cc
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_Cos)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorCos({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  const float pi = 3.141592653589793;
+  _context->addTestCase({{{0, pi / 2, pi, 7}}, {{1, 0, -1, 0.75390225434}}});
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Cos_TwoOperand)
+{
+  CircleGen cgen;
+  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out1 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out2 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorCos({{lhs, rhs}, {out1, out2}});
+  cgen.setInputsAndOutputs({lhs, rhs}, {out1, out2});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"cpu"});
+  _context->setCompileFail();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/L2Normalization.cc b/tests/nnfw_api/src/one_op_tests/L2Normalization.cc

new file mode 100644 (file)

index 0000000..8b4b8f5
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/L2Normalization.cc
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_L2Normalization)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 3}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 3}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorL2Normalization({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase({{{0, 3, 4, 0, 5, 12, 0, 8, 15, 0, 7, 24}},
+                         {{0, 0.6, 0.8, 0, 0.38461539149284363, 0.92307698726654053, 0,
+                           0.47058823704719543, 0.88235294818878174, 0, 0.28, 0.96}}});
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
diff --git a/runtime/onert/core/src/ir/operation/Quantize.cc b/tests/nnfw_api/src/one_op_tests/LeakyRelu.cc

similarity index 53%

rename from runtime/onert/core/src/ir/operation/Quantize.cc

rename to tests/nnfw_api/src/one_op_tests/LeakyRelu.cc

index 0e3d5b6..9db9117 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Quantize.cc
+++ b/tests/nnfw_api/src/one_op_tests/LeakyRelu.cc
@@ -14,24 +14,19 @@
   * limitations under the License.
   */
  
-#include "ir/operation/Quantize.h"
+#include "GenModelTest.h"
  
-#include "ir/OperationVisitor.h"
-
-namespace onert
-{
-namespace ir
-{
-namespace operation
+TEST_F(GenModelTest, OneOp_LeakyRelu)
  {
+  CircleGen cgen;
+  int in = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
+  cgen.addOperatorLeakyRelu({{in}, {out}}, 0.5);
+  cgen.setInputsAndOutputs({in}, {out});
  
-void Quantize::accept(OperationVisitor &v) const { v.visit(*this); }
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase({{{0, 1.0, 3.0, 1.0, -1.0, -2.0f}}, {{0, 1.0, 3.0, 1.0, -0.5, -1.0}}});
+  _context->setBackends({"acl_cl", "acl_neon"});
  
-Quantize::Quantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs)
-    : Operation{OperandConstraint::createExact(2u), inputs, outputs}
-{
+  SUCCEED();
  }
-
-} // namespace operation
-} // namespace ir
-} // namespace onert
diff --git a/tests/nnfw_api/src/one_op_tests/Pad.cc b/tests/nnfw_api/src/one_op_tests/Pad.cc

new file mode 100644 (file)

index 0000000..10fe6c7
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Pad.cc
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_Pad)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
+  uint32_t padding_buf = cgen.addBuffer(padding_data);
+  int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+  int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorPad({{in, padding}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase({{{1, 2, 3, 4}}, {{0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 0, 0, 0, 0, 0}}});
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadRank)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  std::vector<int32_t> padding_data{1, 1, 1, 1};
+  uint32_t padding_buf = cgen.addBuffer(padding_data);
+  int padding = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, padding_buf});
+  int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorPad({{in, padding}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->setCompileFail();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadDim0)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  std::vector<int32_t> padding_data{1, 1, 1, 1};
+  uint32_t padding_buf = cgen.addBuffer(padding_data);
+  int padding = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+  int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorPad({{in, padding}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->setCompileFail();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadDim1)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+  std::vector<int32_t> padding_data{1, 1, 1, 1};
+  uint32_t padding_buf = cgen.addBuffer(padding_data);
+  int padding = cgen.addTensor({{4, 1}, circle::TensorType::TensorType_INT32, padding_buf});
+  int out = cgen.addTensor({{2, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorPad({{in, padding}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->setCompileFail();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/PadV2.cc b/tests/nnfw_api/src/one_op_tests/PadV2.cc

new file mode 100644 (file)

index 0000000..9f7ff9c
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/PadV2.cc
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_PadV2)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
+  uint32_t padding_buf = cgen.addBuffer(padding_data);
+  int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+  std::vector<float> padding_value_data{3.0};
+  uint32_t padding_value_buf = cgen.addBuffer(padding_value_data);
+  int padding_value =
+      cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf});
+
+  int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorPadV2({{in, padding, padding_value}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase({{{1, 2, 3, 4}}, {{3, 3, 3, 3, 3, 1, 2, 3, 3, 3, 4, 3, 3, 3, 3, 3}}});
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_PadV2_InvalidPadRank)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  std::vector<int32_t> padding_data{1, 1, 1, 1};
+  uint32_t padding_buf = cgen.addBuffer(padding_data);
+  int padding = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, padding_buf});
+  std::vector<float> padding_value_data{3.0};
+  uint32_t padding_value_buf = cgen.addBuffer(padding_value_data);
+  int padding_value =
+      cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf});
+
+  int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorPad({{in, padding, padding_value}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->setCompileFail();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_PadV2_InvalidPadDim0)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  std::vector<int32_t> padding_data{1, 1, 1, 1};
+  uint32_t padding_buf = cgen.addBuffer(padding_data);
+  int padding = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+  std::vector<float> padding_value_data{3.0};
+  uint32_t padding_value_buf = cgen.addBuffer(padding_value_data);
+  int padding_value =
+      cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf});
+
+  int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorPad({{in, padding, padding_value}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->setCompileFail();
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_PadV2_InvalidPadDim1)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+  std::vector<int32_t> padding_data{1, 1, 1, 1};
+  uint32_t padding_buf = cgen.addBuffer(padding_data);
+  int padding = cgen.addTensor({{4, 1}, circle::TensorType::TensorType_INT32, padding_buf});
+  std::vector<float> padding_value_data{3.0};
+  uint32_t padding_value_buf = cgen.addBuffer(padding_value_data);
+  int padding_value =
+      cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, padding_value_buf});
+
+  int out = cgen.addTensor({{2, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorPad({{in, padding, padding_value}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->setCompileFail();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Rank.cc b/tests/nnfw_api/src/one_op_tests/Rank.cc

new file mode 100644 (file)

index 0000000..ed9d672
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Rank.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+// WORKAROUND Handle int32_t type input/output
+union float_int {
+  int32_t i;
+  float f;
+};
+
+TEST_F(GenModelTest, OneOp_Rank)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32});
+
+  // TODO handle many type in addTestCase
+  float_int output_data;
+  output_data.i = 4;
+
+  cgen.addOperatorRank({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+      {{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}}, {{output_data.f}}});
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Rank_Int32)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_INT32});
+  int out = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32});
+
+  // TODO handle many type in addTestCase
+  float_int output_data;
+  output_data.i = 4;
+
+  cgen.addOperatorRank({{in}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+      {{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}}, {{output_data.f}}});
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/ResizeNearestNeighbor.cc b/tests/nnfw_api/src/one_op_tests/ResizeNearestNeighbor.cc

new file mode 100644 (file)

index 0000000..94f45d4
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/ResizeNearestNeighbor.cc
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_ResizeNearestNeighbor)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+  std::vector<int32_t> size_data{3, 3};
+  uint32_t size_buf = cgen.addBuffer(size_data);
+  int size = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32, size_buf});
+
+  int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorResizeNearestNeighbor({{in, size}, {out}});
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase({{{3, 4, 6, 10, 9, 10, 12, 16}},
+                         {{3, 4, 3, 4, 6, 10, 3, 4, 3, 4, 6, 10, 9, 10, 9, 10, 12, 16}}});
+  _context->setBackends({"acl_cl"});
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/While.cc b/tests/nnfw_api/src/one_op_tests/While.cc

new file mode 100644 (file)

index 0000000..1d86e6d
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/While.cc
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_While)
+{
+  // The model looks just like the below pseudocode
+  //
+  // function model(x)
+  // {
+  //   while (x < 100.0)
+  //   {
+  //     x = x + 10.0;
+  //   }
+  //   return x
+  // }
+
+  CircleGen cgen;
+  std::vector<float> incr_data{10};
+  uint32_t incr_buf = cgen.addBuffer(incr_data);
+  std::vector<float> end_data{100};
+  uint32_t end_buf = cgen.addBuffer(end_data);
+
+  // primary subgraph
+  {
+    int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    cgen.addOperatorWhile({{x_in}, {x_out}}, 1, 2);
+    cgen.setInputsAndOutputs({x_in}, {x_out});
+  }
+
+  // cond subgraph
+  {
+    cgen.nextSubgraph();
+    int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32, end_buf});
+    int result = cgen.addTensor({{1}, circle::TensorType_BOOL});
+    cgen.addOperatorLess({{x, end}, {result}});
+    cgen.setInputsAndOutputs({x}, {result});
+  }
+
+  // body subgraph
+  {
+    cgen.nextSubgraph();
+    int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf});
+    int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+    cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE);
+    cgen.setInputsAndOutputs({x_in}, {x_out});
+  }
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase({{{0}}, {{100}}});
+  _context->addTestCase({{{2}}, {{102}}});
+  _context->addTestCase({{{22}}, {{102}}});
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
diff --git a/tests/scripts/CMakeLists.txt b/tests/scripts/CMakeLists.txt

index 82235d9..40e0dfd 100644 (file)
--- a/tests/scripts/CMakeLists.txt
+++ b/tests/scripts/CMakeLists.txt
@@ -13,13 +13,13 @@ install(DIRECTORY command DESTINATION test)
  file(GLOB MODEL_TEST_SCRIPT "models/run_test.sh")
  install(PROGRAMS ${MODEL_TEST_SCRIPT} DESTINATION test/models)
  
-# Install models test list file
-file(GLOB MODEL_TEST_DIR models/config)
-install(DIRECTORY ${MODEL_TEST_DIR} DESTINATION test/models)
+# Install models test config
+file(GLOB TFLITE_CONFIG_DIR models/tflite)
+install(DIRECTORY ${TFLITE_CONFIG_DIR} DESTINATION test/models)
  
  # Install nnpackage test config
-file(GLOB MODEL_TEST_DIR LIST_DIRECTORIES true nnfw_api_gtest/models/*)
-install(DIRECTORY ${MODEL_TEST_DIR} DESTINATION test/models/nnpackage)
+file(GLOB NNPACKAGE_MODEL_CONFIG_DIR models/nnfw_api_gtest)
+install(DIRECTORY ${NNPACKAGE_MODEL_CONFIG_DIR} DESTINATION test/models)
  
  # Install test list
  file(GLOB TEST_LIST_DIR list)
diff --git a/tests/scripts/command/prepare-model b/tests/scripts/command/prepare-model

index feb658c..3feb7a7 100644 (file)
--- a/tests/scripts/command/prepare-model
+++ b/tests/scripts/command/prepare-model
@@ -57,7 +57,7 @@ fi
  
  if [[ $DOWNLOAD_MODEL == "all" ]] || [[ $DOWNLOAD_MODEL == "nnpackage" ]]; then
      # Download nnpackage model
-    NNPACKAGE_CONFIG_DIR=$INSTALL_DIR/test/models/nnpackage/
+    NNPACKAGE_CONFIG_DIR=$INSTALL_DIR/test/models/nnfw_api_gtest/
      NNPACKAGE_CACHE_DIR=$INSTALL_DIR/unittest_standalone/nnfw_api_gtest_models/
      $INSTALL_DIR/test/models/run_test.sh --download=on --run=off --md5=$MD5_CHECK \
          --configdir=$NNPACKAGE_CONFIG_DIR --cachedir=$NNPACKAGE_CACHE_DIR
diff --git a/tests/scripts/common.sh b/tests/scripts/common.sh

index 87aec86..a2a261a 100755 (executable)
--- a/tests/scripts/common.sh
+++ b/tests/scripts/common.sh
@@ -30,7 +30,7 @@ function get_result_of_benchmark_test()
          exit $RET
      fi
  
-    local RESULT=`grep -E '^- Mean:' $LOG_FILE | sed -e 's/ms//g' | awk '{print $3}'`
+    local RESULT=`grep -E '^- MEAN ' $LOG_FILE | awk '{print $4}'`
      echo "$RESULT"
  }
  
diff --git a/tests/scripts/list/nnpkg_test_list.armv7l-linux.acl_cl b/tests/scripts/list/nnpkg_test_list.armv7l-linux.acl_cl

index 866f734..ac67435 100644 (file)
--- a/tests/scripts/list/nnpkg_test_list.armv7l-linux.acl_cl
+++ b/tests/scripts/list/nnpkg_test_list.armv7l-linux.acl_cl
@@ -1,27 +1,42 @@
-NET_000[0-5,7-9]
-NET_001[0,2-9]
-NET_002[0-2,4-9]
-NET_003[0-9]
-NET_004[0-9]
-UNIT_Add_*
-UNIT_AvgPool_*
-UNIT_BiasAdd_*
-UNIT_ConcatV2_*
-UNIT_Conv2D_*
-UNIT_Conv2DBackpropInput_*
-UNIT_DepthwiseConv2dNative_*
-UNIT_MaxPool_*
-UNIT_Mean_*
-UNIT_Mul_*
-UNIT_Pad_*
-UNIT_RealDiv_*
-UNIT_Relu6_*
-UNIT_Relu_*
-UNIT_Reshape_*
-UNIT_Rsqrt_*
-UNIT_Softmax_*
-UNIT_Sqrt_*
-UNIT_SquaredDifference_*
-UNIT_Squeeze_*
-UNIT_Sub_*
-UNIT_Tanh_000
+Add_000.opt
+#ArgMax_000.opt
+#ArgMax_001.opt
+#ArgMax_002.opt
+#ArgMax_003.opt
+AveragePool2D_000.opt
+AveragePool2D_U8_000.opt
+Concatenation_000.opt
+Conv2D_000.opt
+Conv2D_001.opt
+Conv2D_002.opt
+Conv2D_004.opt
+DepthwiseConv2D_000.opt
+DepthwiseConv2D_002.opt
+FullyConnected_000.opt
+FullyConnected_001.opt
+FullyConnected_003.opt
+#L2Normalize_U8_000.opt
+Logistic_000.opt
+#Logistic_U8_000.opt
+MaxPool2D_000.opt
+Mean_000.opt
+Mean_001.opt
+Mul_000.opt
+#Net_TConv_BN_000.opt
+Net_UnpackAdd_001.opt
+Pad_000.opt
+Quantization_000.opt
+Reshape_000.opt
+Reshape_001.opt
+Reshape_002.opt
+Softmax_000.opt
+SpaceToDepth_U8_000.opt
+Split_000.opt
+#Tanh_U8_000.opt
+#TransposeConv_000.opt
+#TransposeConv_001.opt
+Transpose_000.opt
+Unpack_000.opt
+Unpack_001.opt
+Unpack_002.opt
+#Unpack_003.opt
diff --git a/tests/scripts/list/nnpkg_test_list.armv7l-linux.acl_neon b/tests/scripts/list/nnpkg_test_list.armv7l-linux.acl_neon

index 7e2371b..6612b4e 100644 (file)
--- a/tests/scripts/list/nnpkg_test_list.armv7l-linux.acl_neon
+++ b/tests/scripts/list/nnpkg_test_list.armv7l-linux.acl_neon
@@ -1,27 +1,42 @@
-NET_000[0-5,7-9]
-NET_001[0-9]
-NET_002[0-2,4-9]
-NET_003[0-9]
-NET_004[0-9]
-UNIT_Add_*
-UNIT_AvgPool_*
-UNIT_BiasAdd_*
-UNIT_ConcatV2_*
-UNIT_Conv2D_*
-UNIT_Conv2DBackpropInput_*
-UNIT_DepthwiseConv2dNative_*
-UNIT_MaxPool_*
-UNIT_Mean_*
-UNIT_Mul_*
-UNIT_Pad_*
-UNIT_RealDiv_*
-UNIT_Relu6_*
-UNIT_Relu_*
-UNIT_Reshape_*
-UNIT_Rsqrt_*
-UNIT_Softmax_*
-UNIT_Sqrt_*
-UNIT_SquaredDifference_*
-UNIT_Squeeze_*
-UNIT_Sub_*
-UNIT_Tanh_000
+Add_000.opt
+#ArgMax_000.opt
+#ArgMax_001.opt
+#ArgMax_002.opt
+#ArgMax_003.opt
+AveragePool2D_000.opt
+AveragePool2D_U8_000.opt
+Concatenation_000.opt
+Conv2D_000.opt
+Conv2D_001.opt
+Conv2D_002.opt
+Conv2D_004.opt
+#DepthwiseConv2D_000.opt
+DepthwiseConv2D_002.opt
+FullyConnected_000.opt
+FullyConnected_001.opt
+FullyConnected_003.opt
+#L2Normalize_U8_000.opt
+Logistic_000.opt
+#Logistic_U8_000.opt
+MaxPool2D_000.opt
+Mean_000.opt
+Mean_001.opt
+Mul_000.opt
+#Net_TConv_BN_000.opt
+Net_UnpackAdd_001.opt
+Pad_000.opt
+Quantization_000.opt
+Reshape_000.opt
+Reshape_001.opt
+Reshape_002.opt
+#Softmax_000.opt
+SpaceToDepth_U8_000.opt
+Split_000.opt
+#Tanh_U8_000.opt
+#TransposeConv_000.opt
+#TransposeConv_001.opt
+Transpose_000.opt
+Unpack_000.opt
+Unpack_001.opt
+Unpack_002.opt
+#Unpack_003.opt
diff --git a/tests/scripts/list/nnpkg_test_list.armv7l-linux.cpu b/tests/scripts/list/nnpkg_test_list.armv7l-linux.cpu

index cd765a4..6c71a8a 100644 (file)
--- a/tests/scripts/list/nnpkg_test_list.armv7l-linux.cpu
+++ b/tests/scripts/list/nnpkg_test_list.armv7l-linux.cpu
@@ -1,17 +1,42 @@
-NET_000[0-4,6-9]
-NET_001[0-3,6-9]
-NET_002[0-2,4-7,9]
-NET_003[0-9]
-NET_004[0-9]
-UNIT_Add_*
-UNIT_AvgPool_*
-UNIT_ConcatV2_*
-UNIT_Conv2D_*
-UNIT_DepthwiseConv2dNative_*
-UNIT_MaxPool_*
-UNIT_Mul_*
-UNIT_Pad_*
-UNIT_Reshape_*
-UNIT_Softmax_*
-UNIT_Squeeze_*
-UNIT_Sub_*
+Add_000.opt
+ArgMax_000.opt
+ArgMax_001.opt
+ArgMax_002.opt
+ArgMax_003.opt
+AveragePool2D_000.opt
+AveragePool2D_U8_000.opt
+Concatenation_000.opt
+Conv2D_000.opt
+Conv2D_001.opt
+Conv2D_002.opt
+Conv2D_004.opt
+DepthwiseConv2D_000.opt
+DepthwiseConv2D_002.opt
+FullyConnected_000.opt
+FullyConnected_001.opt
+FullyConnected_003.opt
+L2Normalize_U8_000.opt
+Logistic_000.opt
+Logistic_U8_000.opt
+MaxPool2D_000.opt
+Mean_000.opt
+Mean_001.opt
+Mul_000.opt
+#Net_TConv_BN_000.opt
+Net_UnpackAdd_001.opt
+Pad_000.opt
+Quantization_000.opt
+Reshape_000.opt
+Reshape_001.opt
+Reshape_002.opt
+Softmax_000.opt
+SpaceToDepth_U8_000.opt
+Split_000.opt
+Tanh_U8_000.opt
+#TransposeConv_000.opt
+#TransposeConv_001.opt
+Transpose_000.opt
+Unpack_000.opt
+Unpack_001.opt
+Unpack_002.opt
+Unpack_003.opt
diff --git a/tests/scripts/list/nnpkg_test_list.armv7l-linux.srcn b/tests/scripts/list/nnpkg_test_list.armv7l-linux.srcn

deleted file mode 100644 (file)

index a893d59..0000000
--- a/tests/scripts/list/nnpkg_test_list.armv7l-linux.srcn
+++ /dev/null
@@ -1,5 +0,0 @@
-NET_000[1,3,7-9]
-NET_001[6,9]
-NET_002[2,8]
-UNIT_Conv2D_*
-UNIT_Conv2DBackpropInput_*
diff --git a/tests/scripts/list/nnpkg_test_list.noarch.interp b/tests/scripts/list/nnpkg_test_list.noarch.interp

index cb865da..cc4e034 100644 (file)
--- a/tests/scripts/list/nnpkg_test_list.noarch.interp
+++ b/tests/scripts/list/nnpkg_test_list.noarch.interp
@@ -1,19 +1,42 @@
-NET_000[0-9]
-NET_001[0-4,6-9]
-NET_002[0-2,4-9]
-NET_003[0-9]
-NET_004[0-9]
-UNIT_Add_*
-UNIT_AvgPool_*
-UNIT_BiasAdd_*
-UNIT_ConcatV2_*
-UNIT_Conv2D_*
-UNIT_Conv2DBackpropInput_*
-UNIT_DepthwiseConv2dNative_*
-UNIT_MaxPool_*
-UNIT_Mul_*
-UNIT_Pad_*
-UNIT_Reshape_*
-UNIT_Softmax_*
-UNIT_Squeeze_*
-UNIT_Sub_*
+Add_000.opt
+#ArgMax_000.opt
+#ArgMax_001.opt
+#ArgMax_002.opt
+#ArgMax_003.opt
+AveragePool2D_000.opt
+#AveragePool2D_U8_000.opt
+Concatenation_000.opt
+Conv2D_000.opt
+Conv2D_001.opt
+Conv2D_002.opt
+Conv2D_004.opt
+DepthwiseConv2D_000.opt
+DepthwiseConv2D_002.opt
+FullyConnected_000.opt
+FullyConnected_001.opt
+FullyConnected_003.opt
+#L2Normalize_U8_000.opt
+Logistic_000.opt
+#Logistic_U8_000.opt
+MaxPool2D_000.opt
+#Mean_000.opt
+#Mean_001.opt
+Mul_000.opt
+#Net_TConv_BN_000.opt
+#Net_UnpackAdd_001.opt
+Pad_000.opt
+Quantization_000.opt
+Reshape_000.opt
+Reshape_001.opt
+Reshape_002.opt
+Softmax_000.opt
+#SpaceToDepth_U8_000.opt
+#Split_000.opt
+#Tanh_U8_000.opt
+#TransposeConv_000.opt
+#TransposeConv_001.opt
+#Transpose_000.opt
+#Unpack_000.opt
+#Unpack_001.opt
+#Unpack_002.opt
+#Unpack_003.opt
diff --git a/tests/scripts/nnfw_api_gtest/models/add/config.sh b/tests/scripts/models/nnfw_api_gtest/add/config.sh

similarity index 100%

rename from tests/scripts/nnfw_api_gtest/models/add/config.sh

rename to tests/scripts/models/nnfw_api_gtest/add/config.sh
diff --git a/tests/scripts/nnfw_api_gtest/models/add_invalid_manifest/config.sh b/tests/scripts/models/nnfw_api_gtest/add_invalid_manifest/config.sh

similarity index 100%

rename from tests/scripts/nnfw_api_gtest/models/add_invalid_manifest/config.sh

rename to tests/scripts/models/nnfw_api_gtest/add_invalid_manifest/config.sh
diff --git a/tests/scripts/nnfw_api_gtest/models/add_no_manifest/config.sh b/tests/scripts/models/nnfw_api_gtest/add_no_manifest/config.sh

similarity index 100%

rename from tests/scripts/nnfw_api_gtest/models/add_no_manifest/config.sh

rename to tests/scripts/models/nnfw_api_gtest/add_no_manifest/config.sh
diff --git a/tests/scripts/nnfw_api_gtest/models/add_unspecified_rank_inputs/config.sh b/tests/scripts/models/nnfw_api_gtest/add_unspecified_rank_inputs/config.sh

similarity index 100%

rename from tests/scripts/nnfw_api_gtest/models/add_unspecified_rank_inputs/config.sh

rename to tests/scripts/models/nnfw_api_gtest/add_unspecified_rank_inputs/config.sh
diff --git a/tests/scripts/nnfw_api_gtest/models/dynamic_tensor_reshape/config.sh b/tests/scripts/models/nnfw_api_gtest/dynamic_tensor_reshape/config.sh

similarity index 100%

rename from tests/scripts/nnfw_api_gtest/models/dynamic_tensor_reshape/config.sh

rename to tests/scripts/models/nnfw_api_gtest/dynamic_tensor_reshape/config.sh
diff --git a/tests/scripts/nnfw_api_gtest/models/if_dynamic/config.sh b/tests/scripts/models/nnfw_api_gtest/if_dynamic/config.sh

similarity index 100%

rename from tests/scripts/nnfw_api_gtest/models/if_dynamic/config.sh

rename to tests/scripts/models/nnfw_api_gtest/if_dynamic/config.sh
diff --git a/tests/scripts/nnfw_api_gtest/models/input_reshaping_add/config.sh b/tests/scripts/models/nnfw_api_gtest/input_reshaping_add/config.sh

similarity index 100%

rename from tests/scripts/nnfw_api_gtest/models/input_reshaping_add/config.sh

rename to tests/scripts/models/nnfw_api_gtest/input_reshaping_add/config.sh
diff --git a/tests/scripts/nnfw_api_gtest/models/neg/config.sh b/tests/scripts/models/nnfw_api_gtest/neg/config.sh

similarity index 100%

rename from tests/scripts/nnfw_api_gtest/models/neg/config.sh

rename to tests/scripts/models/nnfw_api_gtest/neg/config.sh
diff --git a/tests/scripts/nnfw_api_gtest/models/unknown_dim_input_concat/config.sh b/tests/scripts/models/nnfw_api_gtest/unknown_dim_input_concat/config.sh

similarity index 100%

rename from tests/scripts/nnfw_api_gtest/models/unknown_dim_input_concat/config.sh

rename to tests/scripts/models/nnfw_api_gtest/unknown_dim_input_concat/config.sh
diff --git a/tests/scripts/nnfw_api_gtest/models/while_dynamic/config.sh b/tests/scripts/models/nnfw_api_gtest/while_dynamic/config.sh

similarity index 100%

rename from tests/scripts/nnfw_api_gtest/models/while_dynamic/config.sh

rename to tests/scripts/models/nnfw_api_gtest/while_dynamic/config.sh
diff --git a/tests/scripts/models/run_test.sh b/tests/scripts/models/run_test.sh

index 0aa363f..32a2778 100755 (executable)
--- a/tests/scripts/models/run_test.sh
+++ b/tests/scripts/models/run_test.sh
@@ -18,7 +18,7 @@
  MY_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
  NNFW_HOME="$(dirname $(dirname $(dirname ${MY_PATH})))"
  CACHE_ROOT_PATH=$MY_PATH/"cache"
-TEST_ROOT_PATH=$MY_PATH/"config"
+TEST_ROOT_PATH=$MY_PATH/"tflite"
  REPORT_DIR="report"
  
  RUN_DISABLED="true"
@@ -131,7 +131,7 @@ if [ ! -d "$TEST_ROOT_PATH" ]; then
  fi
  
  # Check test driver setting
-if [ ! command_exists $DRIVER_BIN ] && [ "$RUN_TEST" = "on" ]; then
+if ! command_exists $DRIVER_BIN && [ "$RUN_TEST" = "on" ]; then
      echo "Cannot find test driver" $DRIVER_BIN ": please set proper DRIVER_BIN"
      exit 1
  fi
diff --git a/tests/scripts/models/config/MODELS/inception_module/config.sh b/tests/scripts/models/tflite/MODELS/inception_module/config.sh

similarity index 100%

rename from tests/scripts/models/config/MODELS/inception_module/config.sh

rename to tests/scripts/models/tflite/MODELS/inception_module/config.sh
diff --git a/tests/scripts/models/config/MODELS/inception_nonslim/config.sh b/tests/scripts/models/tflite/MODELS/inception_nonslim/config.sh

similarity index 100%

rename from tests/scripts/models/config/MODELS/inception_nonslim/config.sh

rename to tests/scripts/models/tflite/MODELS/inception_nonslim/config.sh
diff --git a/tests/scripts/models/config/MODELS/inception_slim/config.sh b/tests/scripts/models/tflite/MODELS/inception_slim/config.sh

similarity index 100%

rename from tests/scripts/models/config/MODELS/inception_slim/config.sh

rename to tests/scripts/models/tflite/MODELS/inception_slim/config.sh
diff --git a/tests/scripts/models/config/MODELS/mobilenet/config.sh b/tests/scripts/models/tflite/MODELS/mobilenet/config.sh

similarity index 100%

rename from tests/scripts/models/config/MODELS/mobilenet/config.sh

rename to tests/scripts/models/tflite/MODELS/mobilenet/config.sh
diff --git a/tests/scripts/models/config/MODELS/mobilenet_quant8/config.sh b/tests/scripts/models/tflite/MODELS/mobilenet_quant8/config.sh

similarity index 100%

rename from tests/scripts/models/config/MODELS/mobilenet_quant8/config.sh

rename to tests/scripts/models/tflite/MODELS/mobilenet_quant8/config.sh
diff --git a/tests/scripts/models/config/abs/config.sh b/tests/scripts/models/tflite/abs/config.sh

similarity index 100%

rename from tests/scripts/models/config/abs/config.sh

rename to tests/scripts/models/tflite/abs/config.sh
diff --git a/tests/scripts/models/config/add/1D/config.sh b/tests/scripts/models/tflite/add/1D/config.sh

similarity index 100%

rename from tests/scripts/models/config/add/1D/config.sh

rename to tests/scripts/models/tflite/add/1D/config.sh
diff --git a/tests/scripts/models/config/add/4D/config.sh b/tests/scripts/models/tflite/add/4D/config.sh

similarity index 100%

rename from tests/scripts/models/config/add/4D/config.sh

rename to tests/scripts/models/tflite/add/4D/config.sh
diff --git a/tests/scripts/models/config/average_pool_2d/aligned/config.sh b/tests/scripts/models/tflite/average_pool_2d/aligned/config.sh

similarity index 100%

rename from tests/scripts/models/config/average_pool_2d/aligned/config.sh

rename to tests/scripts/models/tflite/average_pool_2d/aligned/config.sh
diff --git a/tests/scripts/models/config/average_pool_2d/avgpool1/config.sh b/tests/scripts/models/tflite/average_pool_2d/avgpool1/config.sh

similarity index 100%

rename from tests/scripts/models/config/average_pool_2d/avgpool1/config.sh

rename to tests/scripts/models/tflite/average_pool_2d/avgpool1/config.sh
diff --git a/tests/scripts/models/config/average_pool_2d/avgpool2/config.sh b/tests/scripts/models/tflite/average_pool_2d/avgpool2/config.sh

similarity index 100%

rename from tests/scripts/models/config/average_pool_2d/avgpool2/config.sh

rename to tests/scripts/models/tflite/average_pool_2d/avgpool2/config.sh
diff --git a/tests/scripts/models/config/batch_to_space_nd2/config.sh b/tests/scripts/models/tflite/batch_to_space_nd2/config.sh

similarity index 100%

rename from tests/scripts/models/config/batch_to_space_nd2/config.sh

rename to tests/scripts/models/tflite/batch_to_space_nd2/config.sh
diff --git a/tests/scripts/models/config/cast/config.sh b/tests/scripts/models/tflite/cast/config.sh

similarity index 100%

rename from tests/scripts/models/config/cast/config.sh

rename to tests/scripts/models/tflite/cast/config.sh
diff --git a/tests/scripts/models/config/concat/2D/config.sh b/tests/scripts/models/tflite/concat/2D/config.sh

similarity index 100%

rename from tests/scripts/models/config/concat/2D/config.sh

rename to tests/scripts/models/tflite/concat/2D/config.sh
diff --git a/tests/scripts/models/config/concat/concat1/config.sh b/tests/scripts/models/tflite/concat/concat1/config.sh

similarity index 100%

rename from tests/scripts/models/config/concat/concat1/config.sh

rename to tests/scripts/models/tflite/concat/concat1/config.sh
diff --git a/tests/scripts/models/config/concat/concat2/config.sh b/tests/scripts/models/tflite/concat/concat2/config.sh

similarity index 100%

rename from tests/scripts/models/config/concat/concat2/config.sh

rename to tests/scripts/models/tflite/concat/concat2/config.sh
diff --git a/tests/scripts/models/config/conv_2d/convolution1/config.sh b/tests/scripts/models/tflite/conv_2d/convolution1/config.sh

similarity index 100%

rename from tests/scripts/models/config/conv_2d/convolution1/config.sh

rename to tests/scripts/models/tflite/conv_2d/convolution1/config.sh
diff --git a/tests/scripts/models/config/conv_2d/convolution2/config.sh b/tests/scripts/models/tflite/conv_2d/convolution2/config.sh

similarity index 100%

rename from tests/scripts/models/config/conv_2d/convolution2/config.sh

rename to tests/scripts/models/tflite/conv_2d/convolution2/config.sh
diff --git a/tests/scripts/models/config/custom/squared_difference/config.sh b/tests/scripts/models/tflite/custom/squared_difference/config.sh

similarity index 100%

rename from tests/scripts/models/config/custom/squared_difference/config.sh

rename to tests/scripts/models/tflite/custom/squared_difference/config.sh
diff --git a/tests/scripts/models/config/depthwise_conv_2d/depthconv1/config.sh b/tests/scripts/models/tflite/depthwise_conv_2d/depthconv1/config.sh

similarity index 100%

rename from tests/scripts/models/config/depthwise_conv_2d/depthconv1/config.sh

rename to tests/scripts/models/tflite/depthwise_conv_2d/depthconv1/config.sh
diff --git a/tests/scripts/models/config/depthwise_conv_2d/depthconv2/config.sh b/tests/scripts/models/tflite/depthwise_conv_2d/depthconv2/config.sh

similarity index 100%

rename from tests/scripts/models/config/depthwise_conv_2d/depthconv2/config.sh

rename to tests/scripts/models/tflite/depthwise_conv_2d/depthconv2/config.sh
diff --git a/tests/scripts/models/config/depthwise_conv_2d_no_fuse/config.sh b/tests/scripts/models/tflite/depthwise_conv_2d_no_fuse/config.sh

similarity index 100%

rename from tests/scripts/models/config/depthwise_conv_2d_no_fuse/config.sh

rename to tests/scripts/models/tflite/depthwise_conv_2d_no_fuse/config.sh
diff --git a/tests/scripts/models/config/div/broadcast/config.sh b/tests/scripts/models/tflite/div/broadcast/config.sh

similarity index 100%

rename from tests/scripts/models/config/div/broadcast/config.sh

rename to tests/scripts/models/tflite/div/broadcast/config.sh
diff --git a/tests/scripts/models/config/embedding_lookup/config.sh b/tests/scripts/models/tflite/embedding_lookup/config.sh

similarity index 100%

rename from tests/scripts/models/config/embedding_lookup/config.sh

rename to tests/scripts/models/tflite/embedding_lookup/config.sh
diff --git a/tests/scripts/models/config/equal/config.sh b/tests/scripts/models/tflite/equal/config.sh

similarity index 100%

rename from tests/scripts/models/config/equal/config.sh

rename to tests/scripts/models/tflite/equal/config.sh
diff --git a/tests/scripts/models/config/exp/config.sh b/tests/scripts/models/tflite/exp/config.sh

similarity index 100%

rename from tests/scripts/models/config/exp/config.sh

rename to tests/scripts/models/tflite/exp/config.sh
diff --git a/tests/scripts/models/config/floor/floor1/config.sh b/tests/scripts/models/tflite/floor/floor1/config.sh

similarity index 100%

rename from tests/scripts/models/config/floor/floor1/config.sh

rename to tests/scripts/models/tflite/floor/floor1/config.sh
diff --git a/tests/scripts/models/config/floor/floor2/config.sh b/tests/scripts/models/tflite/floor/floor2/config.sh

similarity index 100%

rename from tests/scripts/models/config/floor/floor2/config.sh

rename to tests/scripts/models/tflite/floor/floor2/config.sh
diff --git a/tests/scripts/models/config/fullyconnected/fc1/config.sh b/tests/scripts/models/tflite/fullyconnected/fc1/config.sh

similarity index 100%

rename from tests/scripts/models/config/fullyconnected/fc1/config.sh

rename to tests/scripts/models/tflite/fullyconnected/fc1/config.sh
diff --git a/tests/scripts/models/config/fullyconnected/hybrid/config.sh b/tests/scripts/models/tflite/fullyconnected/hybrid/config.sh

similarity index 100%

rename from tests/scripts/models/config/fullyconnected/hybrid/config.sh

rename to tests/scripts/models/tflite/fullyconnected/hybrid/config.sh
diff --git a/tests/scripts/models/config/fullyconnected/matmul2x2/config.sh b/tests/scripts/models/tflite/fullyconnected/matmul2x2/config.sh

similarity index 100%

rename from tests/scripts/models/config/fullyconnected/matmul2x2/config.sh

rename to tests/scripts/models/tflite/fullyconnected/matmul2x2/config.sh
diff --git a/tests/scripts/models/config/fullyconnected/weights_as_input/config.sh b/tests/scripts/models/tflite/fullyconnected/weights_as_input/config.sh

similarity index 100%

rename from tests/scripts/models/config/fullyconnected/weights_as_input/config.sh

rename to tests/scripts/models/tflite/fullyconnected/weights_as_input/config.sh
diff --git a/tests/scripts/models/config/gather/config.sh b/tests/scripts/models/tflite/gather/config.sh

similarity index 100%

rename from tests/scripts/models/config/gather/config.sh

rename to tests/scripts/models/tflite/gather/config.sh
diff --git a/tests/scripts/models/config/greater/config.sh b/tests/scripts/models/tflite/greater/config.sh

similarity index 100%

rename from tests/scripts/models/config/greater/config.sh

rename to tests/scripts/models/tflite/greater/config.sh
diff --git a/tests/scripts/models/config/greater_equal/config.sh b/tests/scripts/models/tflite/greater_equal/config.sh

similarity index 100%

rename from tests/scripts/models/config/greater_equal/config.sh

rename to tests/scripts/models/tflite/greater_equal/config.sh
diff --git a/tests/scripts/models/config/hashtable_lookup/config.sh b/tests/scripts/models/tflite/hashtable_lookup/config.sh

similarity index 100%

rename from tests/scripts/models/config/hashtable_lookup/config.sh

rename to tests/scripts/models/tflite/hashtable_lookup/config.sh
diff --git a/tests/scripts/models/config/l2_normalization/config.sh b/tests/scripts/models/tflite/l2_normalization/config.sh

similarity index 100%

rename from tests/scripts/models/config/l2_normalization/config.sh

rename to tests/scripts/models/tflite/l2_normalization/config.sh
diff --git a/tests/scripts/models/config/l2_pool_2d/config.sh b/tests/scripts/models/tflite/l2_pool_2d/config.sh

similarity index 100%

rename from tests/scripts/models/config/l2_pool_2d/config.sh

rename to tests/scripts/models/tflite/l2_pool_2d/config.sh
diff --git a/tests/scripts/models/config/less/config.sh b/tests/scripts/models/tflite/less/config.sh

similarity index 100%

rename from tests/scripts/models/config/less/config.sh

rename to tests/scripts/models/tflite/less/config.sh
diff --git a/tests/scripts/models/config/less_equal/config.sh b/tests/scripts/models/tflite/less_equal/config.sh

similarity index 100%

rename from tests/scripts/models/config/less_equal/config.sh

rename to tests/scripts/models/tflite/less_equal/config.sh
diff --git a/tests/scripts/models/config/logistic/config.sh b/tests/scripts/models/tflite/logistic/config.sh

similarity index 100%

rename from tests/scripts/models/config/logistic/config.sh

rename to tests/scripts/models/tflite/logistic/config.sh
diff --git a/tests/scripts/models/config/max/config.sh b/tests/scripts/models/tflite/max/config.sh

similarity index 100%

rename from tests/scripts/models/config/max/config.sh

rename to tests/scripts/models/tflite/max/config.sh
diff --git a/tests/scripts/models/config/max_pool_2d/maxpool1/config.sh b/tests/scripts/models/tflite/max_pool_2d/maxpool1/config.sh

similarity index 100%

rename from tests/scripts/models/config/max_pool_2d/maxpool1/config.sh

rename to tests/scripts/models/tflite/max_pool_2d/maxpool1/config.sh
diff --git a/tests/scripts/models/config/max_pool_2d/maxpool2/config.sh b/tests/scripts/models/tflite/max_pool_2d/maxpool2/config.sh

similarity index 100%

rename from tests/scripts/models/config/max_pool_2d/maxpool2/config.sh

rename to tests/scripts/models/tflite/max_pool_2d/maxpool2/config.sh
diff --git a/tests/scripts/models/config/mean/config.sh b/tests/scripts/models/tflite/mean/config.sh

similarity index 100%

rename from tests/scripts/models/config/mean/config.sh

rename to tests/scripts/models/tflite/mean/config.sh
diff --git a/tests/scripts/models/config/min/config.sh b/tests/scripts/models/tflite/min/config.sh

similarity index 100%

rename from tests/scripts/models/config/min/config.sh

rename to tests/scripts/models/tflite/min/config.sh
diff --git a/tests/scripts/models/config/mul/broadcast/config.sh b/tests/scripts/models/tflite/mul/broadcast/config.sh

similarity index 100%

rename from tests/scripts/models/config/mul/broadcast/config.sh

rename to tests/scripts/models/tflite/mul/broadcast/config.sh
diff --git a/tests/scripts/models/config/neg/config.sh b/tests/scripts/models/tflite/neg/config.sh

similarity index 100%

rename from tests/scripts/models/config/neg/config.sh

rename to tests/scripts/models/tflite/neg/config.sh
diff --git a/tests/scripts/models/config/not_equal/config.sh b/tests/scripts/models/tflite/not_equal/config.sh

similarity index 100%

rename from tests/scripts/models/config/not_equal/config.sh

rename to tests/scripts/models/tflite/not_equal/config.sh
diff --git a/tests/scripts/models/config/one_hot/config.sh b/tests/scripts/models/tflite/one_hot/config.sh

similarity index 100%

rename from tests/scripts/models/config/one_hot/config.sh

rename to tests/scripts/models/tflite/one_hot/config.sh
diff --git a/tests/scripts/models/config/pack/config.sh b/tests/scripts/models/tflite/pack/config.sh

similarity index 100%

rename from tests/scripts/models/config/pack/config.sh

rename to tests/scripts/models/tflite/pack/config.sh
diff --git a/tests/scripts/models/config/pad/4D_2D/config.sh b/tests/scripts/models/tflite/pad/4D_2D/config.sh

similarity index 100%

rename from tests/scripts/models/config/pad/4D_2D/config.sh

rename to tests/scripts/models/tflite/pad/4D_2D/config.sh
diff --git a/tests/scripts/models/config/pad/pad1/config.sh b/tests/scripts/models/tflite/pad/pad1/config.sh

similarity index 100%

rename from tests/scripts/models/config/pad/pad1/config.sh

rename to tests/scripts/models/tflite/pad/pad1/config.sh
diff --git a/tests/scripts/models/config/pad/pad2/config.sh b/tests/scripts/models/tflite/pad/pad2/config.sh

similarity index 100%

rename from tests/scripts/models/config/pad/pad2/config.sh

rename to tests/scripts/models/tflite/pad/pad2/config.sh
diff --git a/tests/scripts/models/config/reduce_max/config.sh b/tests/scripts/models/tflite/reduce_max/config.sh

similarity index 100%

rename from tests/scripts/models/config/reduce_max/config.sh

rename to tests/scripts/models/tflite/reduce_max/config.sh
diff --git a/tests/scripts/models/config/reduce_mean/test1/config.sh b/tests/scripts/models/tflite/reduce_mean/test1/config.sh

similarity index 100%

rename from tests/scripts/models/config/reduce_mean/test1/config.sh

rename to tests/scripts/models/tflite/reduce_mean/test1/config.sh
diff --git a/tests/scripts/models/config/reduce_mean/test2/config.sh b/tests/scripts/models/tflite/reduce_mean/test2/config.sh

similarity index 100%

rename from tests/scripts/models/config/reduce_mean/test2/config.sh

rename to tests/scripts/models/tflite/reduce_mean/test2/config.sh
diff --git a/tests/scripts/models/config/reduce_sum/float/config.sh b/tests/scripts/models/tflite/reduce_sum/float/config.sh

similarity index 100%

rename from tests/scripts/models/config/reduce_sum/float/config.sh

rename to tests/scripts/models/tflite/reduce_sum/float/config.sh
diff --git a/tests/scripts/models/config/reduce_sum/uint8/config.sh b/tests/scripts/models/tflite/reduce_sum/uint8/config.sh

similarity index 100%

rename from tests/scripts/models/config/reduce_sum/uint8/config.sh

rename to tests/scripts/models/tflite/reduce_sum/uint8/config.sh
diff --git a/tests/scripts/models/config/relu/config.sh b/tests/scripts/models/tflite/relu/config.sh

similarity index 100%

rename from tests/scripts/models/config/relu/config.sh

rename to tests/scripts/models/tflite/relu/config.sh
diff --git a/tests/scripts/models/config/relu6/config.sh b/tests/scripts/models/tflite/relu6/config.sh

similarity index 100%

rename from tests/scripts/models/config/relu6/config.sh

rename to tests/scripts/models/tflite/relu6/config.sh
diff --git a/tests/scripts/models/config/reshape/3D/config.sh b/tests/scripts/models/tflite/reshape/3D/config.sh

similarity index 100%

rename from tests/scripts/models/config/reshape/3D/config.sh

rename to tests/scripts/models/tflite/reshape/3D/config.sh
diff --git a/tests/scripts/models/config/reshape/reshape1/config.sh b/tests/scripts/models/tflite/reshape/reshape1/config.sh

similarity index 100%

rename from tests/scripts/models/config/reshape/reshape1/config.sh

rename to tests/scripts/models/tflite/reshape/reshape1/config.sh
diff --git a/tests/scripts/models/config/reshape/reshape2/config.sh b/tests/scripts/models/tflite/reshape/reshape2/config.sh

similarity index 100%

rename from tests/scripts/models/config/reshape/reshape2/config.sh

rename to tests/scripts/models/tflite/reshape/reshape2/config.sh
diff --git a/tests/scripts/models/config/resize_bilinear/config.sh b/tests/scripts/models/tflite/resize_bilinear/config.sh

similarity index 100%

rename from tests/scripts/models/config/resize_bilinear/config.sh

rename to tests/scripts/models/tflite/resize_bilinear/config.sh
diff --git a/tests/scripts/models/config/rnn/config.sh b/tests/scripts/models/tflite/rnn/config.sh

similarity index 100%

rename from tests/scripts/models/config/rnn/config.sh

rename to tests/scripts/models/tflite/rnn/config.sh
diff --git a/tests/scripts/models/config/rsqrt/config.sh b/tests/scripts/models/tflite/rsqrt/config.sh

similarity index 100%

rename from tests/scripts/models/config/rsqrt/config.sh

rename to tests/scripts/models/tflite/rsqrt/config.sh
diff --git a/tests/scripts/models/config/select/config.sh b/tests/scripts/models/tflite/select/config.sh

similarity index 100%

rename from tests/scripts/models/config/select/config.sh

rename to tests/scripts/models/tflite/select/config.sh
diff --git a/tests/scripts/models/config/shape/config.sh b/tests/scripts/models/tflite/shape/config.sh

similarity index 100%

rename from tests/scripts/models/config/shape/config.sh

rename to tests/scripts/models/tflite/shape/config.sh
diff --git a/tests/scripts/models/config/sin/config.sh b/tests/scripts/models/tflite/sin/config.sh

similarity index 100%

rename from tests/scripts/models/config/sin/config.sh

rename to tests/scripts/models/tflite/sin/config.sh
diff --git a/tests/scripts/models/config/slice/config.sh b/tests/scripts/models/tflite/slice/config.sh

similarity index 100%

rename from tests/scripts/models/config/slice/config.sh

rename to tests/scripts/models/tflite/slice/config.sh
diff --git a/tests/scripts/models/config/softmax/config.sh b/tests/scripts/models/tflite/softmax/config.sh

similarity index 100%

rename from tests/scripts/models/config/softmax/config.sh

rename to tests/scripts/models/tflite/softmax/config.sh
diff --git a/tests/scripts/models/config/space_to_batch_nd2/config.sh b/tests/scripts/models/tflite/space_to_batch_nd2/config.sh

similarity index 100%

rename from tests/scripts/models/config/space_to_batch_nd2/config.sh

rename to tests/scripts/models/tflite/space_to_batch_nd2/config.sh
diff --git a/tests/scripts/models/config/space_to_depth/config.sh b/tests/scripts/models/tflite/space_to_depth/config.sh

similarity index 100%

rename from tests/scripts/models/config/space_to_depth/config.sh

rename to tests/scripts/models/tflite/space_to_depth/config.sh
diff --git a/tests/scripts/models/config/sqrt/config.sh b/tests/scripts/models/tflite/sqrt/config.sh

similarity index 100%

rename from tests/scripts/models/config/sqrt/config.sh

rename to tests/scripts/models/tflite/sqrt/config.sh
diff --git a/tests/scripts/models/config/squeeze/config.sh b/tests/scripts/models/tflite/squeeze/config.sh

similarity index 100%

rename from tests/scripts/models/config/squeeze/config.sh

rename to tests/scripts/models/tflite/squeeze/config.sh
diff --git a/tests/scripts/models/config/strided_slice/config.sh b/tests/scripts/models/tflite/strided_slice/config.sh

similarity index 100%

rename from tests/scripts/models/config/strided_slice/config.sh

rename to tests/scripts/models/tflite/strided_slice/config.sh
diff --git a/tests/scripts/models/config/sub/broadcast/config.sh b/tests/scripts/models/tflite/sub/broadcast/config.sh

similarity index 100%

rename from tests/scripts/models/config/sub/broadcast/config.sh

rename to tests/scripts/models/tflite/sub/broadcast/config.sh
diff --git a/tests/scripts/models/config/tanh/config.sh b/tests/scripts/models/tflite/tanh/config.sh

similarity index 100%

rename from tests/scripts/models/config/tanh/config.sh

rename to tests/scripts/models/tflite/tanh/config.sh
diff --git a/tests/scripts/models/config/tile/config.sh b/tests/scripts/models/tflite/tile/config.sh

similarity index 100%

rename from tests/scripts/models/config/tile/config.sh

rename to tests/scripts/models/tflite/tile/config.sh
diff --git a/tests/scripts/models/config/topk_v2/config.sh b/tests/scripts/models/tflite/topk_v2/config.sh

similarity index 100%

rename from tests/scripts/models/config/topk_v2/config.sh

rename to tests/scripts/models/tflite/topk_v2/config.sh
diff --git a/tests/scripts/models/config/transpose/config.sh b/tests/scripts/models/tflite/transpose/config.sh

similarity index 100%

rename from tests/scripts/models/config/transpose/config.sh

rename to tests/scripts/models/tflite/transpose/config.sh
diff --git a/tests/scripts/models/config/transpose_conv/same/config.sh b/tests/scripts/models/tflite/transpose_conv/same/config.sh

similarity index 100%

rename from tests/scripts/models/config/transpose_conv/same/config.sh

rename to tests/scripts/models/tflite/transpose_conv/same/config.sh
diff --git a/tests/scripts/models/config/transpose_conv/valid/config.sh b/tests/scripts/models/tflite/transpose_conv/valid/config.sh

similarity index 100%

rename from tests/scripts/models/config/transpose_conv/valid/config.sh

rename to tests/scripts/models/tflite/transpose_conv/valid/config.sh
diff --git a/tests/scripts/models/config/zeros_like/config.sh b/tests/scripts/models/tflite/zeros_like/config.sh

similarity index 100%

rename from tests/scripts/models/config/zeros_like/config.sh

rename to tests/scripts/models/tflite/zeros_like/config.sh
diff --git a/tests/scripts/nnfw_api_gtest/install_nnfw_api_gtest_nnpackages.sh b/tests/scripts/nnfw_api_gtest/install_nnfw_api_gtest_nnpackages.sh

deleted file mode 100755 (executable)

index ca282c8..0000000
--- a/tests/scripts/nnfw_api_gtest/install_nnfw_api_gtest_nnpackages.sh
+++ /dev/null
@@ -1,148 +0,0 @@
-#!/usr/bin/env bash
-
-# TODO Reuse the fuction in run_test.sh. This is its duplication.
-function need_download()
-{
-    LOCAL_PATH=$1
-    REMOTE_URL=$2
-    if [ ! -e $LOCAL_PATH ]; then
-        return 0;
-    fi
-    # Ignore checking md5 in cache
-    if [ ! -z $IGNORE_MD5 ] && [ "$IGNORE_MD5" == "1" ]; then
-        return 1
-    fi
-
-    LOCAL_HASH=$(md5sum $LOCAL_PATH | awk '{ print $1 }')
-    REMOTE_HASH=$(curl -ss $REMOTE_URL | md5sum  | awk '{ print $1 }')
-    # TODO Emit an error when Content-MD5 field was not found. (Server configuration issue)
-    if [ "$LOCAL_HASH" != "$REMOTE_HASH" ]; then
-        echo "Downloaded file is outdated or incomplete."
-        return 0
-    fi
-    return 1
-}
-
-# TODO Reuse the fuction in run_test.sh. This is its duplication.
-download_tests()
-{
-    SELECTED_TESTS=$@
-
-    echo ""
-    echo "Downloading tests:"
-    echo "======================"
-    for TEST_NAME in $SELECTED_TESTS; do
-        echo $TEST_NAME
-    done
-    echo "======================"
-
-    for TEST_NAME in $SELECTED_TESTS; do
-        # Test configure initialization
-        MODELFILE_SERVER_PATH=""
-        MODELFILE_NAME=""
-        source $TEST_ROOT_PATH/$TEST_NAME/config.sh
-
-        TEST_CACHE_PATH=$CACHE_ROOT_PATH/$TEST_NAME
-        MODELFILE=$TEST_CACHE_PATH/$MODELFILE_NAME
-        MODELFILE_URL="$MODELFILE_SERVER/$MODELFILE_NAME"
-        if [ -n  "$FIXED_MODELFILE_SERVER" ]; then
-            MODELFILE_URL="$FIXED_MODELFILE_SERVER/$MODELFILE_NAME"
-        fi
-
-        # Download model file
-        if [ ! -e $TEST_CACHE_PATH ]; then
-            mkdir -p $TEST_CACHE_PATH
-        fi
-
-        # Download unless we have it in cache (Also check md5sum)
-        if need_download "$MODELFILE" "$MODELFILE_URL"; then
-            echo ""
-            echo "Download test file for $TEST_NAME"
-            echo "======================"
-
-            rm -f $MODELFILE # Remove invalid file if exists
-            pushd $TEST_CACHE_PATH
-            wget -nv $MODELFILE_URL
-            if [ "${MODELFILE_NAME##*.}" == "zip" ]; then
-                unzip -o $MODELFILE_NAME
-                rm *.zip
-            fi
-            popd
-        fi
-
-    done
-}
-
-realpath()
-{
-  readlink -e -- "$@"
-}
-
-usage()
-{
-    echo "Usage: $0 --modelfile-server=MODELFILE_SERVER --install-path=INSTALL_DIR"
-    echo "  MODELFILE_SERVER : Base URL of the model file server"
-    echo "  INSTALL_DIR      : Path to be installed"
-    exit 1
-}
-
-while [[ $# -gt 0 ]]
-do
-    key="$(echo $1 | awk '{print tolower($0)}')"
-    case "$key" in
-        -?|-h|--help)
-            usage
-            exit 1
-            ;;
-        --modelfile-server)
-            MODELFILE_SERVER="$2"
-            shift
-            ;;
-        --modelfile-server=*)
-            MODELFILE_SERVER="${1#*=}"
-            ;;
-        --install-dir)
-            INSTALL_DIR="$2"
-            shift
-            ;;
-        --install-dir=*)
-            INSTALL_DIR="${1#*=}"
-            ;;
-        *)
-            echo "Invalid option '$1'"
-            usage
-            exit 1
-            ;;
-    esac
-    shift
-done
-
-if [ -z "$MODELFILE_SERVER" ]; then
-    echo "Please specify a value for --modelfile-server or MODELFILE_SERVER(env)."
-    usage
-    exit 1
-fi
-
-if [ -z "$INSTALL_DIR" ]; then
-    echo "Please specify a value for --install-dir or INSTALL_DIR(env)."
-    usage
-    exit 1
-fi
-
-set -e
-
-THIS_SCRIPT_DIR=$(realpath $(dirname ${BASH_SOURCE}))
-source ${THIS_SCRIPT_DIR}/../common.sh
-
-CACHE_ROOT_PATH=$INSTALL_DIR
-FIXED_MODELFILE_SERVER="${MODELFILE_SERVER:-}"
-TEST_ROOT_PATH=${THIS_SCRIPT_DIR}/models
-
-# All models in the directory are the target models
-pushd ${TEST_ROOT_PATH}
-MODELS=$(ls -d */)
-popd
-
-download_tests $MODELS
-
-set +e
diff --git a/tests/scripts/test_scheduler_with_profiling.sh b/tests/scripts/test_scheduler_with_profiling.sh

index 5c24572..b88cae8 100755 (executable)
--- a/tests/scripts/test_scheduler_with_profiling.sh
+++ b/tests/scripts/test_scheduler_with_profiling.sh
@@ -37,7 +37,7 @@ function run_without_sched()
  
      print_with_dots "$EXECUTOR $BACKEND without scheduler"
  
-    RESULT=$(get_result_of_benchmark_test $RUN_TEST_SH $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
+    RESULT=$(get_result_of_benchmark_test $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
  
      printf -v RESULT_INT '%d' $RESULT 2>/dev/null
      PERCENTAGE=$((100-RESULT_SCH_INT*100/RESULT_INT))
@@ -105,12 +105,12 @@ function run_benchmark_test()
          export GRAPH_DOT_DUMP=1
          print_with_dots "Parallel with scheduler"
  
-        RESULT=$(get_result_of_benchmark_test $RUN_TEST_SH $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
+        RESULT=$(get_result_of_benchmark_test $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
          echo "$RESULT ms"
  
          printf -v RESULT_SCH_INT '%d' $RESULT 2>/dev/null
  
-        mv "after_lower.dot" $REPORT_MODEL_DIR/"after_lower_parallel.dot"
+        mv "after_lower_subg-0.dot" $REPORT_MODEL_DIR/"after_lower_subg-0_parallel.dot"
  
  ##################################################################################
          # Run Linear executor with scheduler
@@ -120,7 +120,7 @@ function run_benchmark_test()
          export GRAPH_DOT_DUMP=1
          print_with_dots "Linear with scheduler"
  
-        RESULT=$(get_result_of_benchmark_test $RUN_TEST_SH $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
+        RESULT=$(get_result_of_benchmark_test $BENCHMARK_DRIVER_BIN $MODEL $LOG_FILE)
  
          printf -v RESULT_INT '%d' $RESULT 2>/dev/null
          PERCENTAGE=$((100-RESULT_SCH_INT*100/RESULT_INT))
@@ -130,7 +130,7 @@ function run_benchmark_test()
          #   for operations with input&output sizes the same as the model
          mv "exec_time.json" $REPORT_MODEL_DIR
          # Save the dot graph
-        mv "after_lower.dot" $REPORT_MODEL_DIR/"after_lower_linear.dot"
+        mv "after_lower_subg-0.dot" $REPORT_MODEL_DIR/"after_lower_subg-0_linear.dot"
          unset GRAPH_DOT_DUMP
  
  ##################################################################################
diff --git a/tests/tools/nnpackage_run/src/args.cc b/tests/tools/nnpackage_run/src/args.cc

index cb4a7db..90021bf 100644 (file)
--- a/tests/tools/nnpackage_run/src/args.cc
+++ b/tests/tools/nnpackage_run/src/args.cc
@@ -55,8 +55,8 @@ std::unordered_map<uint32_t, Json::Value> argArrayToMap(const Json::Value &jsonv
    return ret;
  }
  
-// param shape_str is a form of, e.g., "[1, [2, 3], 3, []]"
-void handleShapeParam(nnpkg_run::TensorShapeMap &shape_map, const std::string &shape_str)
+// param shape_str is a form of, e.g., "[1, [2, 3], 3, []]" or "h5"
+void handleShapeJsonParam(nnpkg_run::TensorShapeMap &shape_map, const std::string &shape_str)
  {
    Json::Value root;
    Json::Reader reader;
@@ -152,9 +152,16 @@ void Args::Initialize(void)
    };
  
    auto process_shape_prepare = [&](const std::string &shape_str) {
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+    if (shape_str == "H5" || shape_str == "h5")
+    {
+      _when_to_use_h5_shape = WhenToUseH5Shape::PREPARE;
+      return;
+    }
+#endif
      try
      {
-      handleShapeParam(_shape_prepare, shape_str);
+      handleShapeJsonParam(_shape_prepare, shape_str);
      }
      catch (const std::exception &e)
      {
@@ -164,9 +171,16 @@ void Args::Initialize(void)
    };
  
    auto process_shape_run = [&](const std::string &shape_str) {
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+    if (shape_str == "H5" || shape_str == "h5")
+    {
+      _when_to_use_h5_shape = WhenToUseH5Shape::RUN;
+      return;
+    }
+#endif
      try
      {
-      handleShapeParam(_shape_run, shape_str);
+      handleShapeJsonParam(_shape_run, shape_str);
      }
      catch (const std::exception &e)
      {
@@ -202,11 +216,13 @@ void Args::Initialize(void)
           "e.g. nnpackage_run-UNIT_Add_000-acl_cl.csv.\n"
           "{nnpkg} name may be changed to realpath if you use symbolic-link.")
      ("shape_prepare", po::value<std::string>()->default_value("[]")->notifier(process_shape_prepare),
-         "set shape of specified tensor before compilation\n"
-         "e.g. '[0, [1, 2], 2, []]' to set 0th tensor to [1, 2] and 2nd tensor to [].\n")
+         "set shape of specified tensor before compilation (before calling nnfw_prepare()).\n"
+         "'h5': read shape(s) from H5 input file. '--load' should also be provided.\n"
+         "'[0, [1, 2], 2, []]': set 0th tensor to [1, 2] and 2nd tensor to [].")
      ("shape_run", po::value<std::string>()->default_value("[]")->notifier(process_shape_run),
-         "set shape of specified tensor right before running\n"
-         "e.g. '[1, [1, 2]]` to set 1st tensor to [1, 2].\n")
+         "set shape of specified tensor before running (before calling nnfw_run()).\n"
+         "'h5': read shape(s) from H5 input file. '--load' should also be provided.\n"
+         "'[0, [1, 2], 2, []]': set 0th tensor to [1, 2] and 2nd tensor to [].")
      ("verbose_level,v", po::value<int>()->default_value(0)->notifier([&](const auto &v) { _verbose_level = v; }),
           "Verbose level\n"
           "0: prints the only result. Messages btw run don't print\n"
diff --git a/tests/tools/nnpackage_run/src/args.h b/tests/tools/nnpackage_run/src/args.h

index 4bc3e6c..d2b33fc 100644 (file)
--- a/tests/tools/nnpackage_run/src/args.h
+++ b/tests/tools/nnpackage_run/src/args.h
@@ -22,12 +22,23 @@
  #include <vector>
  #include <boost/program_options.hpp>
  
+#include "types.h"
+
  namespace po = boost::program_options;
  
  namespace nnpkg_run
  {
  
-using TensorShapeMap = std::unordered_map<uint32_t, std::vector<int>>;
+using TensorShapeMap = std::unordered_map<uint32_t, TensorShape>;
+
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+enum class WhenToUseH5Shape
+{
+  DO_NOT_USE, // don't use shapes in h5 file
+  PREPARE,    // read shapes in h5 file and set them as inputs' shape before calling nnfw_prepare()
+  RUN,        // read shapes in h5 file and set them as inputs' shape before calling nnfw_run()
+};
+#endif
  
  class Args
  {
@@ -39,6 +50,7 @@ public:
  #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
    const std::string &getDumpFilename(void) const { return _dump_filename; }
    const std::string &getLoadFilename(void) const { return _load_filename; }
+  WhenToUseH5Shape getWhenToUseH5Shape(void) const { return _when_to_use_h5_shape; }
  #endif
    const int getNumRuns(void) const { return _num_runs; }
    const int getWarmupRuns(void) const { return _warmup_runs; }
@@ -48,8 +60,8 @@ public:
    const bool getMemoryPoll(void) const { return _mem_poll; }
    const bool getWriteReport(void) const { return _write_report; }
    const bool printVersion(void) const { return _print_version; }
-  const TensorShapeMap &getShapeMapForPrepare() { return _shape_prepare; }
-  const TensorShapeMap &getShapeMapForRun() { return _shape_run; }
+  TensorShapeMap &getShapeMapForPrepare() { return _shape_prepare; }
+  TensorShapeMap &getShapeMapForRun() { return _shape_run; }
    const int getVerboseLevel(void) const { return _verbose_level; }
  
  private:
@@ -64,6 +76,7 @@ private:
  #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
    std::string _dump_filename;
    std::string _load_filename;
+  WhenToUseH5Shape _when_to_use_h5_shape = WhenToUseH5Shape::DO_NOT_USE;
  #endif
    TensorShapeMap _shape_prepare;
    TensorShapeMap _shape_run;
diff --git a/tests/tools/nnpackage_run/src/h5formatter.cc b/tests/tools/nnpackage_run/src/h5formatter.cc

index 09ace47..3929c8d 100644 (file)
--- a/tests/tools/nnpackage_run/src/h5formatter.cc
+++ b/tests/tools/nnpackage_run/src/h5formatter.cc
@@ -22,10 +22,68 @@
  #include <stdexcept>
  #include <H5Cpp.h>
  
+namespace
+{
+nnpkg_run::TensorShape getShape(H5::DataSet &data_set)
+{
+  std::vector<hsize_t> h5_shape; // hsize_t is unsigned long long
+  H5::DataSpace data_space = data_set.getSpace();
+  int rank = data_space.getSimpleExtentNdims();
+  h5_shape.resize(rank);
+
+  // read shape info from H5 file
+  data_space.getSimpleExtentDims(h5_shape.data(), NULL);
+
+  nnpkg_run::TensorShape shape;
+  for (auto dim : h5_shape)
+    shape.emplace_back(static_cast<int>(dim));
+
+  return shape;
+}
+} // namespace
+
  namespace nnpkg_run
  {
  static const char *h5_value_grpname = "value";
  
+std::vector<TensorShape> H5Formatter::readTensorShapes(const std::string &filename)
+{
+  uint32_t num_inputs;
+  NNPR_ENSURE_STATUS(nnfw_input_size(session_, &num_inputs));
+  std::vector<TensorShape> tensor_shapes;
+
+  try
+  {
+    H5::Exception::dontPrint();
+
+    H5::H5File file(filename, H5F_ACC_RDONLY);
+    H5::Group value_group = file.openGroup(h5_value_grpname);
+
+    // Constraints: if there are n data set names, they should be unique and
+    //              one of [ "0", "1", .. , "n-1" ]
+    for (uint32_t i = 0; i < num_inputs; ++i)
+    {
+      H5::DataSet data_set = value_group.openDataSet(std::to_string(i));
+      H5::DataType type = data_set.getDataType();
+      auto shape = getShape(data_set);
+
+      tensor_shapes.emplace_back(shape);
+    }
+
+    return tensor_shapes;
+  }
+  catch (const H5::Exception &e)
+  {
+    H5::Exception::printErrorStack();
+    std::exit(-1);
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << e.what() << std::endl;
+    std::exit(-1);
+  }
+}
+
  void H5Formatter::loadInputs(const std::string &filename, std::vector<Allocation> &inputs)
  {
    uint32_t num_inputs;
@@ -41,6 +99,9 @@ void H5Formatter::loadInputs(const std::string &filename, std::vector<Allocation
      {
        nnfw_tensorinfo ti;
        NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session_, i, &ti));
+
+      // TODO Add Assert(nnfw shape, h5 file shape size)
+
        // allocate memory for data
        auto bufsz = bufsize_for(&ti);
        inputs[i].alloc(bufsz);
@@ -156,7 +217,7 @@ void H5Formatter::dumpOutputs(const std::string &filename, std::vector<Allocatio
          case NNFW_TYPE_TENSOR_BOOL:
          {
            H5::DataSet data_set =
-              value_group.createDataSet(std::to_string(i), H5::PredType::STD_I8LE, data_space);
+              value_group.createDataSet(std::to_string(i), H5::PredType::STD_U8LE, data_space);
            data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT8);
            break;
          }
diff --git a/tests/tools/nnpackage_run/src/h5formatter.h b/tests/tools/nnpackage_run/src/h5formatter.h

index c8b64bf..203ba0e 100644 (file)
--- a/tests/tools/nnpackage_run/src/h5formatter.h
+++ b/tests/tools/nnpackage_run/src/h5formatter.h
@@ -20,6 +20,7 @@
  #include <string>
  #include <vector>
  
+#include "types.h"
  #include "allocation.h"
  
  struct nnfw_session;
@@ -30,6 +31,7 @@ class H5Formatter
  {
  public:
    H5Formatter(nnfw_session *sess) : session_(sess) {}
+  std::vector<TensorShape> readTensorShapes(const std::string &filename);
    void loadInputs(const std::string &filename, std::vector<Allocation> &inputs);
    void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs);
  
diff --git a/tests/tools/nnpackage_run/src/nnpackage_run.cc b/tests/tools/nnpackage_run/src/nnpackage_run.cc

index 88d3307..a78e144 100644 (file)
--- a/tests/tools/nnpackage_run/src/nnpackage_run.cc
+++ b/tests/tools/nnpackage_run/src/nnpackage_run.cc
@@ -37,28 +37,13 @@
  #include <unordered_map>
  #include <vector>
  
-static const char *default_backend_cand = "acl_cl";
+static const char *default_backend_cand = "cpu";
  
-NNFW_STATUS resolve_op_backend(nnfw_session *session)
+void overwriteShapeMap(nnpkg_run::TensorShapeMap &shape_map,
+                       std::vector<nnpkg_run::TensorShape> shapes)
  {
-  static std::unordered_map<std::string, std::string> operation_map = {
-      {"TRANSPOSE_CONV", "OP_BACKEND_TransposeConv"},      {"CONV_2D", "OP_BACKEND_Conv2D"},
-      {"DEPTHWISE_CONV_2D", "OP_BACKEND_DepthwiseConv2D"}, {"MEAN", "OP_BACKEND_Mean"},
-      {"AVERAGE_POOL_2D", "OP_BACKEND_AvgPool2D"},         {"MAX_POOL_2D", "OP_BACKEND_MaxPool2D"},
-      {"INSTANCE_NORM", "OP_BACKEND_InstanceNorm"},        {"ADD", "OP_BACKEND_Add"}};
-
-  for (auto i : operation_map)
-  {
-    char *default_backend = std::getenv(i.second.c_str());
-    if (default_backend)
-    {
-      NNFW_STATUS return_result = nnfw_set_op_backend(session, i.first.c_str(), default_backend);
-      if (return_result == NNFW_STATUS_ERROR)
-        return return_result;
-    }
-  }
-
-  return NNFW_STATUS_NO_ERROR;
+  for (uint32_t i = 0; i < shapes.size(); i++)
+    shape_map[i] = shapes[i];
  }
  
  int main(const int argc, char **argv)
@@ -98,7 +83,6 @@ int main(const int argc, char **argv)
      char *available_backends = std::getenv("BACKENDS");
      if (available_backends)
        NNPR_ENSURE_STATUS(nnfw_set_available_backends(session, available_backends));
-    NNPR_ENSURE_STATUS(resolve_op_backend(session));
  
      uint32_t num_inputs;
      NNPR_ENSURE_STATUS(nnfw_input_size(session, &num_inputs));
@@ -157,7 +141,14 @@ int main(const int argc, char **argv)
      verifyInputTypes();
      verifyOutputTypes();
  
-    // set input shape before compilation
+// set input shape before compilation
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+    if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::PREPARE)
+    {
+      auto shapes = H5Formatter(session).readTensorShapes(args.getLoadFilename());
+      overwriteShapeMap(args.getShapeMapForPrepare(), shapes);
+    }
+#endif
      setTensorInfo(args.getShapeMapForPrepare());
  
      // prepare execution
@@ -167,7 +158,14 @@ int main(const int argc, char **argv)
        NNPR_ENSURE_STATUS(nnfw_prepare(session));
      });
  
-    // set input shape after compilation and before execution
+// set input shape after compilation and before execution
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+    if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::RUN)
+    {
+      auto shapes = H5Formatter(session).readTensorShapes(args.getLoadFilename());
+      overwriteShapeMap(args.getShapeMapForRun(), shapes);
+    }
+#endif
      setTensorInfo(args.getShapeMapForRun());
  
      // prepare input
diff --git a/compiler/circle-quantizer/src/CircleExpContract.cpp b/tests/tools/nnpackage_run/src/types.h

similarity index 63%

rename from compiler/circle-quantizer/src/CircleExpContract.cpp

rename to tests/tools/nnpackage_run/src/types.h

index b56b7ee..93a7ab2 100644 (file)
--- a/compiler/circle-quantizer/src/CircleExpContract.cpp
+++ b/tests/tools/nnpackage_run/src/types.h
@@ -14,20 +14,14 @@
   * limitations under the License.
   */
  
-#include "CircleExpContract.h"
+#ifndef __NNPACKAGE_RUN_TYPES_H__
+#define __NNPACKAGE_RUN_TYPES_H__
  
-#include <oops/InternalExn.h>
-
-#include <fstream>
-#include <iostream>
-
-bool CircleExpContract::store(const char *ptr, const size_t size) const
+namespace nnpkg_run
  {
-  if (!ptr)
-    INTERNAL_EXN("Graph was not serialized by FlatBuffer for some reason");
  
-  std::ofstream fs(_filepath.c_str(), std::ofstream::binary);
-  fs.write(ptr, size);
+using TensorShape = std::vector<int>;
+
+} // end of namespace nnpkg_run
  
-  return fs.good();
-}
+#endif // __NNPACKAGE_RUN_TYPES_H__
diff --git a/tests/tools/tflite_run/src/tflite_run.cc b/tests/tools/tflite_run/src/tflite_run.cc

index 00b8b0e..e72966d 100644 (file)
--- a/tests/tools/tflite_run/src/tflite_run.cc
+++ b/tests/tools/tflite_run/src/tflite_run.cc
@@ -220,13 +220,16 @@ int main(const int argc, char **argv)
          // Generate unsigned 8-bit integer input
          auto tensor_view = nnfw::tflite::TensorView<uint8_t>::make(*interpreter, o);
  
-        uint8_t value = 0;
+        auto fp = static_cast<uint8_t (nnfw::misc::RandomGenerator::*)(
+            const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
+            &nnfw::misc::RandomGenerator::generate<uint8_t>);
+        const nnfw::misc::tensor::Object<uint8_t> data(tensor_view.shape(),
+                                                       std::bind(fp, randgen, _1, _2));
  
          nnfw::misc::tensor::iterate(tensor_view.shape())
              << [&](const nnfw::misc::tensor::Index &ind) {
-                 // TODO Generate random values
+                 const auto value = data.at(ind);
                   tensor_view.at(ind) = value;
-                 value = (value + 1) & 0xFF;
                 };
        }
        else if (tensor->type == kTfLiteBool)
diff --git a/tools/nnpackage_tool/model2nnpkg/README.md b/tools/nnpackage_tool/model2nnpkg/README.md

index 716f4f8..9d4676e 100644 (file)
--- a/tools/nnpackage_tool/model2nnpkg/README.md
+++ b/tools/nnpackage_tool/model2nnpkg/README.md
@@ -13,9 +13,11 @@ Convert modelfile (either tflite or circle) to nnpackage.
  Options:
      -h   show this help
      -o   set nnpackage output directory (default=.)
+    -p   set nnpackage output name (default=[modelfile name])
  
  Examples:
-    model2nnpkg.sh add.tflite        => create nnpackage in ./
-    model2nnpkg.sh -o out add.tflite => create nnpackage in out/
+    model2nnpkg.sh add.tflite                  => create nnpackage 'add' in ./
+    model2nnpkg.sh -o out add.tflite           => create nnpackage 'add' in out/
+    model2nnpkg.sh -o out -p addpkg add.tflite => create nnpackage 'addpkg' in out/
  
  ```
diff --git a/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh b/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh

index 87cd787..26f6c70 100755 (executable)
--- a/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh
+++ b/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh
@@ -4,6 +4,7 @@ set -eu
  
  progname=$(basename "${BASH_SOURCE[0]}")
  outdir="."
+name=""
  
  usage() {
    echo "Usage: $progname [options] modelfile"
@@ -12,10 +13,12 @@ usage() {
    echo "Options:"
    echo "    -h   show this help"
    echo "    -o   set nnpackage output directory (default=$outdir)"
+  echo "    -p   set nnpackage output name (default=[modelfile name])"
    echo ""
    echo "Examples:"
-  echo "    $progname add.tflite        => create nnpackage in $outdir/"
-  echo "    $progname -o out add.tflite => create nnpackage in out/"
+  echo "    $progname add.tflite                  => create nnpackage 'add' in $outdir/"
+  echo "    $progname -o out add.tflite           => create nnpackage 'add' in out/"
+  echo "    $progname -o out -p addpkg add.tflite => create nnpackage 'addpkg' in out/"
    exit 1
  }
  
@@ -24,10 +27,11 @@ if [ $# -eq 0 ]; then
    exit 1
  fi
  
-while getopts "ho:" OPTION; do
+while getopts "ho:p:" OPTION; do
  case "${OPTION}" in
      h) usage;;
      o) outdir=$OPTARG;;
+    p) name=$OPTARG;;
      ?) exit 1;;
  esac
  done
@@ -53,7 +57,9 @@ if [ ! -e $1 ]; then
    exit 1
  fi
  
-name=${modelfile%.*}
+if [ -z "$name" ]; then
+  name=${modelfile%.*}
+fi
  extension=${modelfile##*.}
  
  echo "Generating nnpackage "$name" in "$outdir""
@@ -63,7 +69,7 @@ cat > "$outdir"/"$name"/metadata/MANIFEST <<-EOF
    "major-version" : "1",
    "minor-version" : "0",
    "patch-version" : "0",
-  "models"      : [ "$name.$extension" ],
+  "models"      : [ "$modelfile" ],
    "model-types" : [ "$extension" ]
  }
  EOF
diff --git a/tools/nnpackage_tool/tflite2circle/tflite2circle.sh b/tools/nnpackage_tool/tflite2circle/tflite2circle.sh

index 6ad2ef9..409c058 100755 (executable)
--- a/tools/nnpackage_tool/tflite2circle/tflite2circle.sh
+++ b/tools/nnpackage_tool/tflite2circle/tflite2circle.sh
@@ -7,7 +7,7 @@ script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
  nnfw_root="$( cd "${script_dir%*/*/*/*}" && pwd )"
  outdir="."
  flatc=${flatc:-"$nnfw_root/build/externals/FLATBUFFERS/build/flatc"}
-tflite_schema=${tflite_schema:-"$nnfw_root/externals/TENSORFLOW-1.12/tensorflow/contrib/lite/schema/schema.fbs"}
+tflite_schema=${tflite_schema:-"$nnfw_root/externals/TENSORFLOW-1.13.1/tensorflow/lite/schema/schema.fbs"}
  circle_schema=${circle_schema:-"$nnfw_root/nnpackage/schema/circle_schema.fbs"}
  
  if ! [ -x "$flatc" ]; then
@@ -73,7 +73,7 @@ name=${tflite_base%.*}
  # convert
  
  mkdir -p "${outdir}"
-${flatc} -o ${outdir} --defaults-json --strict-json -t ${tflite_schema} -- $1
+${flatc} -o ${outdir} --strict-json -t ${tflite_schema} -- $1
  ${script_dir}/tflitejson2circlejson.py "${outdir}/${name}.json" > "${outdir}/${name}.circle"
  ${flatc} -o ${outdir} -b ${circle_schema} "${outdir}/${name}.circle"
  rm -f ${outdir}/${name}.json
diff --git a/tools/nnpackage_tool/tflite2circle/tflitejson2circlejson.py b/tools/nnpackage_tool/tflite2circle/tflitejson2circlejson.py

index a6955d8..272463f 100755 (executable)
--- a/tools/nnpackage_tool/tflite2circle/tflitejson2circlejson.py
+++ b/tools/nnpackage_tool/tflite2circle/tflitejson2circlejson.py
@@ -34,8 +34,6 @@ if __name__ == '__main__':
      with open(json_path, "r") as f:
          try:
              json_dict = json.load(f, object_pairs_hook=OrderedDict)
-            for subgraph in json_dict["subgraphs"]:
-                subgraph["data_format"] = "CHANNELS_LAST"
              json_dict["version"] = 0
              print(json.dumps(json_dict, indent=2))
          except KeyError:
diff --git a/tools/release_tool/README.md b/tools/release_tool/README.md

new file mode 100644 (file)

index 0000000..8918604
--- /dev/null
+++ b/tools/release_tool/README.md
@@ -0,0 +1,68 @@
+# Content
+
+- git_release.sh
+- onert_version.sh
+
+# git_release.sh
+
+This tool helps you to automate GitHub releases.
+
+## Usage
+```
+$ ./git_release.sh --tag TAG --release_note RELEASE_NOTE \
+--token TOKEN [--release_name RELEASE_NAME] [--commitish COMMITISH] [--draft] \
+[--host_name HOST_NAME] [--repo_owner REPO_OWNER] [--repo_name REPO_NAME] [--asset] ...
+```
+
+## Options
+```
+--tag              The name of the tag
+--release_name     The name of the release
+--release_note     Path of text file describing the contents of the release
+--commitish        The commitish value that determines where the Git tag is created from
+--draft            Create a draft release
+--token            User token for authentication
+--host_name        Host name for endpoint URL [Enterprise-specific endpoint only]
+--repo_owner       Owner of the repository
+--repo_name        The name of the repository
+--asset            Path of release asset
+```
+
+## Examples
+```
+$ ./git_release.sh --tag 1.9.0 --commitish release/1.9.0 --token 0de25f1ca5d1d758fe877b18c06 \
+  --repo_owner mhs4670go --repo_name test_repo --release_note local/repo/release_note \
+  --asset ONE-compiler.tar.gz --asset ONE-runtime.tar.gz"
+
+$ ./git_release.sh --tag v1.1 --commitish c024e85d0ce6cb1ed2fbc66f1a9c1c2814da7575 \
+  --token 0de25f1ca5d1d758fe877b18c06 --repo_owner Samsung --repo_name ONE \
+  --release_name "Release Automation" --release_note /home/mhs4670go/ONE/release_doc \
+  --host_name github.sec.company.net --draft
+```
+
+## Reference
+https://developer.github.com/v3/repos/releases/#create-a-release
+
+
+# onert_version.sh
+
+onert_version.sh updates version information.
+
+## Usage
+```
+$ ./onert_version.sh -h
+Usage: onert_version.sh version
+Update or show onert version information
+```
+
+## Options
+```
+-h   show this help
+-s   set onert version
+```
+
+## Examples
+```
+$ ./onert_version.sh           => show current onert version
+$ ./onert_version.sh -s 1.6.0  => set onert version info in all sources
+```
diff --git a/tools/release_tool/git_release.sh b/tools/release_tool/git_release.sh

new file mode 100755 (executable)

index 0000000..adba7df
--- /dev/null
+++ b/tools/release_tool/git_release.sh
@@ -0,0 +1,206 @@
+#!/bin/bash
+# This script is to automate the process of monthly release with github API
+
+# Test if getopt is enhanced version
+getopt --test > /dev/null
+if [ $? -ne 4 ]; then
+  echo "[ERROR] Your system doesn't have enhanced getopt"
+  echo 2
+fi
+
+function Usage()
+{
+  echo "Usage: ./$(basename ${BASH_SOURCE[0]}) --tag TAG --release_note RELEASE_NOTE \
+--token TOKEN [--release_name RELEASE_NAME] [--commitish COMMITISH] [--draft] \
+[--host_name HOST_NAME] [--repo_owner REPO_OWNER] [--repo_name REPO_NAME] [--asset] ..."
+  echo ""
+  echo "[OPTIONS]"
+  echo "--tag              The name of the tag"
+  echo "--release_name     The name of the release"
+  echo "--release_note     Path of text file describing the contents of the release"
+  echo "--commitish        The commitish value that determines where the Git tag is created from"
+  echo "--draft            Create a draft release"
+  echo "--token            User token for authentication"
+  echo "--host_name        Host name for endpoint URL [Enterprise-specific endpoint only]"
+  echo "--repo_owner       Owner of the repository"
+  echo "--repo_name        The name of the repository"
+  echo "--asset            Path of release asset"
+  echo ""
+  echo "[EXAMPLE]"
+  echo "$ ./git_release.sh --tag 1.9.0 --commitish release/1.9.0 --token 0de25f1ca5d1d758fe877b18c06 \\"
+  echo "  --repo_owner mhs4670go --repo_name test_repo --release_note local/repo/release_note \\"
+  echo "  --asset ONE-compiler.tar.gz --asset ONE-runtime.tar.gz"
+  echo ""
+  echo "$ ./git_release.sh --tag v1.1 --commitish c024e85d0ce6cb1ed2fbc66f1a9c1c2814da7575 \\"
+  echo "  --token 0de25f1ca5d1d758fe877b18c06 --repo_owner Samsung --repo_name ONE \\"
+  echo "  --release_name \"Release Automation\" --release_note /home/mhs4670go/ONE/release_doc \\"
+  echo "  --host_name github.sec.company.net --draft"
+  echo ""
+  echo "[REFERENCE]"
+  echo "https://developer.github.com/v3/repos/releases/#create-a-release"
+
+}
+
+SHORT_OPTS=h
+LONG_OPTS="\
+help,\
+tag:,\
+release_name:,\
+release_note:,\
+commitish:,\
+draft,\
+token:,\
+host_name:,\
+repo_owner:,\
+repo_name:,\
+asset:"
+
+OPTS=$(getopt --options "$SHORT_OPTS" --longoptions "$LONG_OPTS" --name "$0" -- "$@")
+
+if [ $? != 0 ] ; then echo "[ERROR] Failed to parse options" ; exit 2 ; fi
+
+eval set -- "$OPTS"
+
+unset TAG_NAME
+unset RELEASE_NAME
+unset RELEASE_NOTE
+unset TARGET_COMMITISH
+unset USER_TOKEN
+unset HOST_NAME
+unset REPO_OWNER
+unset REPO_NAME
+IS_DRAFT=false
+ASSET_PATHS=()
+
+while true ; do
+  case "$1" in
+    -h|--help )
+      Usage
+      exit 0
+      ;;
+    --tag ) # REQUIRED
+      TAG_NAME="$2"
+      shift 2
+      ;;
+    --release_name )
+      RELEASE_NAME="$2"
+      shift 2
+      ;;
+    --release_note ) # REQUIRED
+      RELEASE_NOTE="$2"
+      shift 2
+      ;;
+    --commitish )
+      TARGET_COMMITISH="$2"
+      shift 2
+      ;;
+    --draft )
+      IS_DRAFT=true
+      shift
+      ;;
+    --token ) # REQUIRED
+      USER_TOKEN="$2"
+      shift 2
+      ;;
+    --host_name )
+      HOST_NAME="$2/api/v3"
+      shift 2
+      ;;
+    --repo_owner )
+      REPO_OWNER="$2"
+      shift 2
+      ;;
+    --repo_name )
+      REPO_NAME="$2"
+      shift 2
+      ;;
+    --asset )
+      ASSET_PATHS+=("$2")
+      shift 2
+      ;;
+    -- )
+      shift
+      break
+      ;;
+    *)
+      echo "[ERROR] getopt internal error"
+      exit 2
+      ;;
+  esac
+done
+
+# Check if required options are specified
+if [ -z ${TAG_NAME} ]; then
+  echo "[ERROR] You must specify '--tag' option"
+  Usage
+  exit 0
+fi
+if [ -z ${RELEASE_NOTE} ]; then
+  echo "[ERROR] You must specify '--release_note' option"
+  Usage
+  exit 0
+fi
+if [ -z ${USER_TOKEN} ]; then
+  echo "[ERROR] You must specify '--token' option"
+  Usage
+  exit 0
+fi
+
+# Print variables and set default value
+DEFAULT_RELEASE_NAME="ONE Release ${TAG_NAME}"
+DEFAULT_HOST_NAME="api.github.com"
+DEFAULT_REPO_OWNER="Samsung"
+DEFAULT_REPO_NAME="ONE"
+echo "======================[RELEASE INFO]======================"
+echo "TAG_NAME         : ${TAG_NAME}"
+echo "RELEASE_NAME     : ${RELEASE_NAME:=${DEFAULT_RELEASE_NAME}}"
+echo "RELEASE_NOTE     : ${RELEASE_NOTE}"
+echo "TARGET_COMMITISH : ${TARGET_COMMITISH:=${TAG_NAME}}"
+echo "IS_DRAFT         : ${IS_DRAFT}"
+echo "USER_TOKEN       : ${USER_TOKEN}"
+echo "HOST_NAME        : ${HOST_NAME:=${DEFAULT_HOST_NAME}}"
+echo "REPO_OWNER       : ${REPO_OWNER:=${DEFAULT_REPO_OWNER}}"
+echo "REPO_NAME        : ${REPO_NAME:=${DEFAULT_REPO_NAME}}"
+echo "ASSETS           : ${ASSET_PATHS[@]}"
+echo "==========================================================="
+
+function generate_release_data()
+{
+  cat <<EOF
+{
+  "tag_name": "${TAG_NAME}",
+  "target_commitish": "${TARGET_COMMITISH}",
+  "name": "${RELEASE_NAME}",
+  "body": "$(cat $1 | sed 's/$/\\n/' | tr -d '\n')",
+  "draft": ${IS_DRAFT},
+  "prerelease": false
+}
+EOF
+}
+
+# Check if the release already exists
+RELEASE_URL=$(curl -s --request GET --header "Authorization: token ${USER_TOKEN}" \
+https://${HOST_NAME}/repos/${REPO_OWNER}/${REPO_NAME}/releases/tags/${TAG_NAME} | \
+jq -r '.url')
+
+if [ $RELEASE_URL != null ]; then
+  echo "[ERROR] The tag name you specified already exists."
+  exit 2
+fi
+
+# Create a release (with assinging upload_url using jq)
+UPLOAD_URL=$(curl -s --request POST --header "Authorization: token ${USER_TOKEN}" \
+--header "Accept: application/json" \
+--data "$(eval generate_release_data '${RELEASE_NOTE}')" \
+"https://${HOST_NAME}/repos/${REPO_OWNER}/${REPO_NAME}/releases" | \
+jq -r '.upload_url')
+
+UPLOAD_URL=$(echo ${UPLOAD_URL} | cut -d "{" -f 1)?name=
+
+# Upload the assets
+for ASSET_PATH in "${ASSET_PATHS[@]}"; do
+  curl -s --request POST --header "Authorization: token ${USER_TOKEN}" \
+  --header "Content-Type: $(file -b --mime-type ${ASSET_PATH})" \
+  --data-binary @${ASSET_PATH} \
+  ${UPLOAD_URL}${ASSET_PATH} > /dev/null
+done
diff --git a/tools/release_tool/onert_version.sh b/tools/release_tool/onert_version.sh

new file mode 100755 (executable)

index 0000000..eafe96e
--- /dev/null
+++ b/tools/release_tool/onert_version.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+set -eu
+
+progname=$(basename "${BASH_SOURCE[0]}")
+script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+nnfw_root="$( cd "${script_dir%*/*/*}" && pwd )"
+
+usage() {
+  echo "Usage: $progname version"
+  echo "Update or show onert version information"
+  echo ""
+  echo "Options:"
+  echo "    -h   show this help"
+  echo "    -s   set onert  version"
+  echo ""
+  echo "Examples:"
+  echo "    $progname           => show current onert version"
+  echo "    $progname -s 1.6.0  => set onert version info in all sources"
+  exit 1
+}
+
+show_version() {
+  version_line=$(cat ${nnfw_root}/packaging/nnfw.spec | grep "Version:")
+  echo ${version_line#"Version:"}
+
+  exit 0
+}
+
+set_version() {
+  version=$1
+  perl -pi -e "s/^release = .*/release = \'$version\'/" ${nnfw_root}/docs/conf.py
+  perl -pi -e "s/^Version: .*/Version: $version/" ${nnfw_root}/packaging/nnfw.spec
+
+  IFS=. read M m p <<< "$version"
+  hex=$(printf '0x%08x' $(( (($M << 24)) | (($m << 8)) | $p )))
+  perl -pi -e "s/^#define NNFW_VERSION.*/#define NNFW_VERSION $hex/" ${nnfw_root}/runtime/onert/api/include/nnfw_version.h
+
+  perl -pi -e "s/versionName .*$/versionName \"$version\"/" ${nnfw_root}/runtime/contrib/android/api/build.gradle
+}
+
+if [ $# -eq 0 ]; then
+  show_version
+fi
+
+while getopts "hs:" OPTION; do
+case "${OPTION}" in
+    h) usage;;
+    s) set_version "$OPTARG";;
+    ?) exit 1;;
+esac
+done
+
+shift $((OPTIND-1))
diff --git a/tools/tflitefile_tool/model_parser.py b/tools/tflitefile_tool/model_parser.py

index 4ef2374..cd66bf5 100755 (executable)
--- a/tools/tflitefile_tool/model_parser.py
+++ b/tools/tflitefile_tool/model_parser.py
@@ -17,12 +17,6 @@
  import os
  import sys
  import numpy
-
-sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tflite'))
-flatbuffersPath = '../../externals/flatbuffers'
-sys.path.append(
-    os.path.join(os.path.dirname(os.path.abspath(__file__)), flatbuffersPath + '/python'))
-
  import flatbuffers
  import tflite.Model
  import tflite.SubGraph
diff --git a/tools/tflitefile_tool/requirements.txt b/tools/tflitefile_tool/requirements.txt

new file mode 100644 (file)

index 0000000..9b4366a
--- /dev/null
+++ b/tools/tflitefile_tool/requirements.txt
@@ -0,0 +1,2 @@
+flatbuffers>=1.12
+numpy
diff --git a/tools/tflitefile_tool/select_operator.py b/tools/tflitefile_tool/select_operator.py

index 333ca32..863edea 100755 (executable)
--- a/tools/tflitefile_tool/select_operator.py
+++ b/tools/tflitefile_tool/select_operator.py
@@ -17,12 +17,6 @@
  import os
  import sys
  import numpy
-
-sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tflite'))
-sys.path.append(
-    os.path.join(
-        os.path.dirname(os.path.abspath(__file__)), '../../externals/flatbuffers/python'))
-
  import flatbuffers
  import tflite.Model
  import tflite.SubGraph
@@ -278,6 +272,10 @@ def GenerateBuiltinOption(new_builder, selected_builtin_option, builtin_option_t
                                                       conv2d_options.StrideW())
          tflite.Conv2DOptions.Conv2DOptionsAddStrideH(new_builder,
                                                       conv2d_options.StrideH())
+        tflite.Conv2DOptions.Conv2DOptionsAddDilationWFactor(
+            new_builder, conv2d_options.DilationWFactor())
+        tflite.Conv2DOptions.Conv2DOptionsAddDilationHFactor(
+            new_builder, conv2d_options.DilationHFactor())
          tflite.Conv2DOptions.Conv2DOptionsAddFusedActivationFunction(
              new_builder, conv2d_options.FusedActivationFunction())
          return tflite.Conv2DOptions.Conv2DOptionsEnd(new_builder)
@@ -725,7 +723,17 @@ def GenerateBuiltinOption(new_builder, selected_builtin_option, builtin_option_t
      # GreaterOptions: not supported
      # GreaterEqualOptions: not supported
      # LessEqualOptions: not supported
-    # SelectOptions: not supported
+
+    # SelectOptions
+    import tflite.SelectOptions
+    if builtin_option_type == tflite.BuiltinOptions.BuiltinOptions().SelectOptions:
+
+        select_option = tflite.SelectOptions.SelectOptions()
+        select_option.Init(selected_builtin_option.Bytes, selected_builtin_option.Pos)
+
+        tflite.SelectOptions.SelectOptionsStart(new_builder)
+        return tflite.SelectOptions.SelectOptionsEnd(new_builder)
+
      # SliceOptions: not supported
  
      # TransposeConvOptions
@@ -867,7 +875,18 @@ def GenerateBuiltinOption(new_builder, selected_builtin_option, builtin_option_t
      # FloorModOptions: not supported
      # RangeOptions: not supported
      # ResizeNearestNeighborOptions: not supported
-    # LeakyReluOptions: not supported
+
+    # LeakyReluOptions
+    import tflite.LeakyReluOptions
+    if builtin_option_type == tflite.BuiltinOptions.BuiltinOptions().LeakyReluOptions:
+
+        leaky_relu_option = tflite.LeakyReluOptions.LeakyReluOptions()
+        leaky_relu_option.Init(selected_builtin_option.Bytes, selected_builtin_option.Pos)
+
+        tflite.LeakyReluOptions.LeakyReluOptionsStart(new_builder)
+        tflite.LeakyReluOptions.LeakyReluOptionsAddAlpha(new_builder,
+                                                         leaky_relu_option.Alpha())
+        return tflite.LeakyReluOptions.LeakyReluOptionsEnd(new_builder)
  
      # SquaredDifferenceOptions
      import tflite.SquaredDifferenceOptions
@@ -915,7 +934,8 @@ def GenerateBuiltinOption(new_builder, selected_builtin_option, builtin_option_t
          return tflite.WhileOptions.WhileOptionsEnd(new_builder)
  
      # Cannot handle builtin option type yet
-    print("Cannot handle this option yet")
+    print("Cannot handle BuiltinOptions {} yet. See BuiltinOptions.py for op name".format(
+        builtin_option_type))
      exit(1)
  
  
diff --git a/tools/update_version/update-version b/tools/update_version/update-version

deleted file mode 100644 (file)

index 1b77c10..0000000
--- a/tools/update_version/update-version
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/bin/bash
-
-set -eu
-
-progname=$(basename "${BASH_SOURCE[0]}")
-script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-nnfw_root="$( cd "${script_dir%*/*/*}" && pwd )"
-
-usage() {
-  echo "Usage: $progname version"
-  echo "Update all version information"
-  echo ""
-  echo "Options:"
-  echo "    -h   show this help"
-  echo ""
-  echo "Examples:"
-  echo "    $progname 1.6.0"
-  exit 1
-}
-
-if [ $# -eq 0 ]; then
-  echo "For help, type $progname -h"
-  exit 1
-fi
-
-while getopts "ho:" OPTION; do
-case "${OPTION}" in
-    h) usage;;
-    ?) exit 1;;
-esac
-done
-
-shift $((OPTIND-1))
-
-if [ $# -ne 1 ]; then
-  echo "error: wrong argument (no argument or too many arguments)."
-  echo "For help, type $progname -h"
-  exit 1
-fi
-
-version=$1
-
-perl -pi -e "s/^release = .*/release = \'$version\'/" ${nnfw_root}/docs/conf.py
-
-perl -pi -e "s/^Version: .*/Version: $version/" ${nnfw_root}/packaging/nnfw.spec
-
-IFS=. read M m p <<< "$version"
-hex=$(printf '0x%08x' $(( (($M << 24)) | (($m << 8)) | $p )))
-perl -pi -e "s/^#define NNFW_VERSION.*/#define NNFW_VERSION $hex/" ${nnfw_root}/runtime/onert/api/include/nnfw_version.h
-
-perl -pi -e "s/versionName .*$/versionName \"$version\"/" ${nnfw_root}/runtime/contrib/android/api/build.gradle
author	Chunseok Lee <chunseok.lee@samsung.com>
	Sat, 5 Sep 2020 12:49:46 +0000 (21:49 +0900)
committer	Chunseok Lee <chunseok.lee@samsung.com>
	Sat, 5 Sep 2020 12:49:46 +0000 (21:49 +0900)